diff options
Diffstat (limited to 'src/arrow/csharp')
284 files changed, 26742 insertions, 0 deletions
diff --git a/src/arrow/csharp/.editorconfig b/src/arrow/csharp/.editorconfig new file mode 100644 index 000000000..01506a0e2 --- /dev/null +++ b/src/arrow/csharp/.editorconfig @@ -0,0 +1,169 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +root = true + +# Default settings: +# A newline ending every file +# Use 4 spaces as indentation +[*] +insert_final_newline = true +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true + +# C# files +[*.cs] +# New line preferences +csharp_new_line_before_open_brace = all +csharp_new_line_before_else = true +csharp_new_line_before_catch = true +csharp_new_line_before_finally = true +csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_anonymous_types = true +csharp_new_line_between_query_expression_clauses = true + +# Indentation preferences +csharp_indent_block_contents = true +csharp_indent_braces = false +csharp_indent_case_contents = true +csharp_indent_case_contents_when_block = true +csharp_indent_switch_labels = true +csharp_indent_labels = one_less_than_current + +# Modifier preferences +csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion + +# avoid this. unless absolutely necessary +dotnet_style_qualification_for_field = false:suggestion +dotnet_style_qualification_for_property = false:suggestion +dotnet_style_qualification_for_method = false:suggestion +dotnet_style_qualification_for_event = false:suggestion + +# Types: use keywords instead of BCL types, and permit var only when the type is clear +csharp_style_var_for_built_in_types = false:suggestion +csharp_style_var_when_type_is_apparent = false:none +csharp_style_var_elsewhere = false:suggestion +dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion +dotnet_style_predefined_type_for_member_access = true:suggestion + +# name all constant fields using PascalCase +dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields +dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style +dotnet_naming_symbols.constant_fields.applicable_kinds = field +dotnet_naming_symbols.constant_fields.required_modifiers = const +dotnet_naming_style.pascal_case_style.capitalization = pascal_case + +# static fields should have s_ prefix +dotnet_naming_rule.static_fields_should_have_prefix.severity = suggestion +dotnet_naming_rule.static_fields_should_have_prefix.symbols = static_fields +dotnet_naming_rule.static_fields_should_have_prefix.style = static_prefix_style +dotnet_naming_symbols.static_fields.applicable_kinds = field +dotnet_naming_symbols.static_fields.required_modifiers = static +dotnet_naming_symbols.static_fields.applicable_accessibilities = private, internal, private_protected +dotnet_naming_style.static_prefix_style.required_prefix = s_ +dotnet_naming_style.static_prefix_style.capitalization = camel_case + +# internal and private fields should be _camelCase +dotnet_naming_rule.camel_case_for_private_internal_fields.severity = suggestion +dotnet_naming_rule.camel_case_for_private_internal_fields.symbols = private_internal_fields +dotnet_naming_rule.camel_case_for_private_internal_fields.style = camel_case_underscore_style +dotnet_naming_symbols.private_internal_fields.applicable_kinds = field +dotnet_naming_symbols.private_internal_fields.applicable_accessibilities = private, internal +dotnet_naming_style.camel_case_underscore_style.required_prefix = _ +dotnet_naming_style.camel_case_underscore_style.capitalization = camel_case + +# Code style defaults +csharp_using_directive_placement = outside_namespace:suggestion +dotnet_sort_system_directives_first = true +csharp_prefer_braces = true:refactoring +csharp_preserve_single_line_blocks = true:none +csharp_preserve_single_line_statements = false:none +csharp_prefer_static_local_function = true:suggestion +csharp_prefer_simple_using_statement = false:none +csharp_style_prefer_switch_expression = true:suggestion + +# Code quality +dotnet_style_readonly_field = true:suggestion +dotnet_code_quality_unused_parameters = non_public:suggestion + +# Expression-level preferences +dotnet_style_object_initializer = true:suggestion +dotnet_style_collection_initializer = true:suggestion +dotnet_style_explicit_tuple_names = true:suggestion +dotnet_style_coalesce_expression = true:suggestion +dotnet_style_null_propagation = true:suggestion +dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion +dotnet_style_prefer_inferred_tuple_names = true:suggestion +dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion +dotnet_style_prefer_auto_properties = true:suggestion +dotnet_style_prefer_conditional_expression_over_assignment = true:refactoring +dotnet_style_prefer_conditional_expression_over_return = true:refactoring +csharp_prefer_simple_default_expression = true:suggestion + +# Expression-bodied members +csharp_style_expression_bodied_methods = true:refactoring +csharp_style_expression_bodied_constructors = true:refactoring +csharp_style_expression_bodied_operators = true:refactoring +csharp_style_expression_bodied_properties = true:refactoring +csharp_style_expression_bodied_indexers = true:refactoring +csharp_style_expression_bodied_accessors = true:refactoring +csharp_style_expression_bodied_lambdas = true:refactoring +csharp_style_expression_bodied_local_functions = true:refactoring + +# Pattern matching +csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion +csharp_style_pattern_matching_over_as_with_null_check = true:suggestion +csharp_style_inlined_variable_declaration = true:suggestion + +# Null checking preferences +csharp_style_throw_expression = true:suggestion +csharp_style_conditional_delegate_call = true:suggestion + +# Other features +csharp_style_prefer_index_operator = false:none +csharp_style_prefer_range_operator = false:none +csharp_style_pattern_local_over_anonymous_function = false:none + +# Space preferences +csharp_space_after_cast = false +csharp_space_after_colon_in_inheritance_clause = true +csharp_space_after_comma = true +csharp_space_after_dot = false +csharp_space_after_keywords_in_control_flow_statements = true +csharp_space_after_semicolon_in_for_statement = true +csharp_space_around_binary_operators = before_and_after +csharp_space_around_declaration_statements = do_not_ignore +csharp_space_before_colon_in_inheritance_clause = true +csharp_space_before_comma = false +csharp_space_before_dot = false +csharp_space_before_open_square_brackets = false +csharp_space_before_semicolon_in_for_statement = false +csharp_space_between_empty_square_brackets = false +csharp_space_between_method_call_empty_parameter_list_parentheses = false +csharp_space_between_method_call_name_and_opening_parenthesis = false +csharp_space_between_method_call_parameter_list_parentheses = false +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +csharp_space_between_method_declaration_name_and_open_parenthesis = false +csharp_space_between_method_declaration_parameter_list_parentheses = false +csharp_space_between_parentheses = false +csharp_space_between_square_brackets = false + +# Xml project files +[*.{csproj,props,targets}] +indent_size = 2 +charset = utf-8 diff --git a/src/arrow/csharp/.gitattributes b/src/arrow/csharp/.gitattributes new file mode 100644 index 000000000..d2ff52b12 --- /dev/null +++ b/src/arrow/csharp/.gitattributes @@ -0,0 +1,36 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary
\ No newline at end of file diff --git a/src/arrow/csharp/.gitignore b/src/arrow/csharp/.gitignore new file mode 100644 index 000000000..a9fbd5882 --- /dev/null +++ b/src/arrow/csharp/.gitignore @@ -0,0 +1,267 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +project.fragment.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Project-specific +artifacts/ + +# add .sln files back because they are ignored by the root .gitignore file +!*.sln diff --git a/src/arrow/csharp/Apache.Arrow.sln b/src/arrow/csharp/Apache.Arrow.sln new file mode 100644 index 000000000..873a7f5f1 --- /dev/null +++ b/src/arrow/csharp/Apache.Arrow.sln @@ -0,0 +1,67 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29926.136 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow", "src\Apache.Arrow\Apache.Arrow.csproj", "{BA6B2B0D-EAAE-4183-8A39-1B9CF571F71F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Tests", "test\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj", "{9CCEC01B-E67A-4726-BE72-7B514F76163F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Benchmarks", "test\Apache.Arrow.Benchmarks\Apache.Arrow.Benchmarks.csproj", "{742DF47D-77C5-4B84-9E0C-69645F1161EA}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Flight.Tests", "test\Apache.Arrow.Flight.Tests\Apache.Arrow.Flight.Tests.csproj", "{D6443535-3740-4F6C-8001-F90EDAF4CF0C}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Flight.TestWeb", "test\Apache.Arrow.Flight.TestWeb\Apache.Arrow.Flight.TestWeb.csproj", "{058F9CFA-2A13-43B8-87D9-E69F63F9EFF0}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Flight", "src\Apache.Arrow.Flight\Apache.Arrow.Flight.csproj", "{2490AA1E-DDA4-4069-B065-79A4897B0582}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Flight.AspNetCore", "src\Apache.Arrow.Flight.AspNetCore\Apache.Arrow.Flight.AspNetCore.csproj", "{E4F74938-E8FF-4AC1-A495-FEE95FC1EFDF}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.IntegrationTest", "test\Apache.Arrow.IntegrationTest\Apache.Arrow.IntegrationTest.csproj", "{E8264B7F-B680-4A55-939B-85DB628164BB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {BA6B2B0D-EAAE-4183-8A39-1B9CF571F71F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BA6B2B0D-EAAE-4183-8A39-1B9CF571F71F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BA6B2B0D-EAAE-4183-8A39-1B9CF571F71F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BA6B2B0D-EAAE-4183-8A39-1B9CF571F71F}.Release|Any CPU.Build.0 = Release|Any CPU + {9CCEC01B-E67A-4726-BE72-7B514F76163F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9CCEC01B-E67A-4726-BE72-7B514F76163F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9CCEC01B-E67A-4726-BE72-7B514F76163F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9CCEC01B-E67A-4726-BE72-7B514F76163F}.Release|Any CPU.Build.0 = Release|Any CPU + {742DF47D-77C5-4B84-9E0C-69645F1161EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {742DF47D-77C5-4B84-9E0C-69645F1161EA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {742DF47D-77C5-4B84-9E0C-69645F1161EA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {742DF47D-77C5-4B84-9E0C-69645F1161EA}.Release|Any CPU.Build.0 = Release|Any CPU + {D6443535-3740-4F6C-8001-F90EDAF4CF0C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D6443535-3740-4F6C-8001-F90EDAF4CF0C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D6443535-3740-4F6C-8001-F90EDAF4CF0C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D6443535-3740-4F6C-8001-F90EDAF4CF0C}.Release|Any CPU.Build.0 = Release|Any CPU + {058F9CFA-2A13-43B8-87D9-E69F63F9EFF0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {058F9CFA-2A13-43B8-87D9-E69F63F9EFF0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {058F9CFA-2A13-43B8-87D9-E69F63F9EFF0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {058F9CFA-2A13-43B8-87D9-E69F63F9EFF0}.Release|Any CPU.Build.0 = Release|Any CPU + {2490AA1E-DDA4-4069-B065-79A4897B0582}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2490AA1E-DDA4-4069-B065-79A4897B0582}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2490AA1E-DDA4-4069-B065-79A4897B0582}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2490AA1E-DDA4-4069-B065-79A4897B0582}.Release|Any CPU.Build.0 = Release|Any CPU + {E4F74938-E8FF-4AC1-A495-FEE95FC1EFDF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E4F74938-E8FF-4AC1-A495-FEE95FC1EFDF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E4F74938-E8FF-4AC1-A495-FEE95FC1EFDF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E4F74938-E8FF-4AC1-A495-FEE95FC1EFDF}.Release|Any CPU.Build.0 = Release|Any CPU + {E8264B7F-B680-4A55-939B-85DB628164BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E8264B7F-B680-4A55-939B-85DB628164BB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E8264B7F-B680-4A55-939B-85DB628164BB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E8264B7F-B680-4A55-939B-85DB628164BB}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {FD0BB617-6031-4844-B99D-B331E335B572} + EndGlobalSection +EndGlobal diff --git a/src/arrow/csharp/ApacheArrow.snk b/src/arrow/csharp/ApacheArrow.snk Binary files differnew file mode 100644 index 000000000..68df43972 --- /dev/null +++ b/src/arrow/csharp/ApacheArrow.snk diff --git a/src/arrow/csharp/Directory.Build.props b/src/arrow/csharp/Directory.Build.props new file mode 100644 index 000000000..affee1814 --- /dev/null +++ b/src/arrow/csharp/Directory.Build.props @@ -0,0 +1,59 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<Project> + + <!-- Common repo directories --> + <PropertyGroup> + <RepoRoot>$(MSBuildThisFileDirectory)../</RepoRoot> + <CSharpDir>$(MSBuildThisFileDirectory)</CSharpDir> + <BaseOutputPath>$(CSharpDir)/artifacts/$(MSBuildProjectName)</BaseOutputPath> + </PropertyGroup> + + <!-- AssemblyInfo properties --> + <PropertyGroup> + <Product>Apache Arrow library</Product> + <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright> + <Company>The Apache Software Foundation</Company> + <Version>6.0.1</Version> + </PropertyGroup> + + <PropertyGroup> + <EmbedUntrackedSources>true</EmbedUntrackedSources> + <LangVersion>8.0</LangVersion> + <SignAssembly>true</SignAssembly> + <AssemblyOriginatorKeyFile>$(CSharpDir)ApacheArrow.snk</AssemblyOriginatorKeyFile> + </PropertyGroup> + + <!-- NuGet properties --> + <PropertyGroup> + <Authors>The Apache Software Foundation</Authors> + <PackageIconUrl>https://www.apache.org/images/feather.png</PackageIconUrl> + <PackageLicenseFile>LICENSE.txt</PackageLicenseFile> + <PackageProjectUrl>https://arrow.apache.org/</PackageProjectUrl> + <PackageTags>apache arrow</PackageTags> + <RepositoryType>git</RepositoryType> + <RepositoryUrl>https://github.com/apache/arrow</RepositoryUrl> + <IncludeSymbols>true</IncludeSymbols> + <SymbolPackageFormat>snupkg</SymbolPackageFormat> + </PropertyGroup> + + <ItemGroup Condition="'$(IsPackable)' == 'true'"> + <Content Include="$(RepoRoot)LICENSE.txt" Pack="true" PackagePath="" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/Directory.Build.targets b/src/arrow/csharp/Directory.Build.targets new file mode 100644 index 000000000..498c752f2 --- /dev/null +++ b/src/arrow/csharp/Directory.Build.targets @@ -0,0 +1,29 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<Project> + + <!-- The following works around https://github.com/dotnet/sourcelink/issues/572 --> + <PropertyGroup> + <TargetFrameworkMonikerAssemblyAttributesPath>$([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)'))</TargetFrameworkMonikerAssemblyAttributesPath> + </PropertyGroup> + <ItemGroup> + <EmbeddedFiles Include="$(GeneratedAssemblyInfoFile)"/> + <EmbeddedFiles Include="$(TargetFrameworkMonikerAssemblyAttributesPath)"/> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/README.md b/src/arrow/csharp/README.md new file mode 100644 index 000000000..2a60cd27c --- /dev/null +++ b/src/arrow/csharp/README.md @@ -0,0 +1,184 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Apache Arrow + +An implementation of Arrow targeting .NET Standard. + +This implementation is under development and may not be suitable for use in production environments. + +# Implementation + +- Arrow 0.11 (specification) +- C# 7.2 +- .NET Standard 1.3 +- Asynchronous I/O +- Uses modern .NET runtime features such as **Span<T>**, **Memory<T>**, **MemoryManager<T>**, and **System.Buffers** primitives for memory allocation, memory storage, and fast serialization. +- Uses **Acyclic Visitor Pattern** for array types and arrays to facilitate serialization, record batch traversal, and format growth. + +# Known Issues + +- Can not read Arrow files containing dictionary batches, tensors, or tables. +- Can not easily modify allocation strategy without implementing a custom memory pool. All allocations are currently 64-byte aligned and padded to 8-bytes. +- Default memory allocation strategy uses an over-allocation strategy with pointer fixing, which results in significant memory overhead for small buffers. A buffer that requires a single byte for storage may be backed by an allocation of up to 64-bytes to satisfy alignment requirements. +- There are currently few builder APIs available for specific array types. Arrays must be built manually with an arrow buffer builder abstraction. +- FlatBuffer code generation is not included in the build process. +- Serialization implementation does not perform exhaustive validation checks during deserialization in every scenario. +- Throws exceptions with vague, inconsistent, or non-localized messages in many situations +- Throws exceptions that are non-specific to the Arrow implementation in some circumstances where it probably should (eg. does not throw ArrowException exceptions) +- Lack of code documentation +- Lack of usage examples +- Lack of comprehensive unit tests +- Lack of comprehensive benchmarks + +# Usage + + using System.Diagnostics; + using System.IO; + using System.Threading.Tasks; + using Apache.Arrow; + using Apache.Arrow.Ipc; + + public static async Task<RecordBatch> ReadArrowAsync(string filename) + { + using (var stream = File.OpenRead("test.arrow")) + using (var reader = new ArrowFileReader(stream)) + { + var recordBatch = await reader.ReadNextRecordBatchAsync(); + Debug.WriteLine("Read record batch with {0} column(s)", recordBatch.ColumnCount); + return recordBatch; + } + } + + +# Status + +## Memory Management + +- Allocations are 64-byte aligned and padded to 8-bytes. +- Allocations are automatically garbage collected + +## Arrays + +### Primitive Types + +- Int8, Int16, Int32, Int64 +- UInt8, UInt16, UInt32, UInt64 +- Float, Double +- Binary (variable-length) +- String (utf-8) +- Null + +### Parametric Types + +- Timestamp +- Date32 +- Date64 +- Decimal +- Time32 +- Time64 +- Binary (fixed-length) +- List +- Struct + +### Type Metadata + +- Data Types +- Fields +- Schema + +### Serialization + +- File +- Stream + +## Not Implemented + +- Serialization + - Exhaustive validation + - Dictionary Batch + - Can not serialize or deserialize files or streams containing dictionary batches + - Dictionary Encoding + - Schema Metadata + - Schema Field Metadata +- Types + - Tensor + - Table +- Arrays + - Union + - Dense + - Sparse + - Half-Float + - Dictionary +- Array Operations + - Equality / Comparison + - Casting + - Builders +- Compute + - There is currently no API available for a compute / kernel abstraction. + +# Build + +Install the latest `.NET Core SDK` from https://dotnet.microsoft.com/download. + + dotnet build + +## NuGet Build + +To build the NuGet package run the following command to build a debug flavor, preview package into the **artifacts** folder. + + dotnet pack + +When building the officially released version run: (see Note below about current `git` repository) + + dotnet pack -c Release + +Which will build the final/stable package. + +NOTE: When building the officially released version, ensure that your `git` repository has the `origin` remote set to `https://github.com/apache/arrow.git`, which will ensure Source Link is set correctly. See https://github.com/dotnet/sourcelink/blob/master/docs/README.md for more information. + +There are two output artifacts: +1. `Apache.Arrow.<version>.nupkg` - this contains the executable assemblies +2. `Apache.Arrow.<version>.snupkg` - this contains the debug symbols files + +Both of these artifacts can then be uploaded to https://www.nuget.org/packages/manage/upload. + +## Docker Build + +Build from the Apache Arrow project root. + + docker build -f csharp/build/docker/Dockerfile . + +## Testing + + dotnet test + +All build artifacts are placed in the **artifacts** folder in the project root. + +# Coding Style + +This project follows the coding style specified in [Coding Style](https://github.com/dotnet/runtime/blob/master/docs/coding-guidelines/coding-style.md). + +# Updating FlatBuffers code + +See https://google.github.io/flatbuffers/flatbuffers_guide_use_java_c-sharp.html for how to get the `flatc` executable. + +Run `flatc --csharp` on each `.fbs` file in the [format](../format) folder. And replace the checked in `.cs` files under [FlatBuf](src/Apache.Arrow/Flatbuf) with the generated files. + +Update the non-generated [FlatBuffers](src/Apache.Arrow/Flatbuf/FlatBuffers) `.cs` files with the files from the [google/flatbuffers repo](https://github.com/google/flatbuffers/tree/master/net/FlatBuffers). diff --git a/src/arrow/csharp/examples/Examples.sln b/src/arrow/csharp/examples/Examples.sln new file mode 100644 index 000000000..c0a4199ca --- /dev/null +++ b/src/arrow/csharp/examples/Examples.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27703.2042 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FluentBuilderExample", "FluentBuilderExample\FluentBuilderExample.csproj", "{ECE22119-D91D-44F7-9575-85B98F946289}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow", "..\src\Apache.Arrow\Apache.Arrow.csproj", "{1FE1DE95-FF6E-4895-82E7-909713C53524}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {ECE22119-D91D-44F7-9575-85B98F946289}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ECE22119-D91D-44F7-9575-85B98F946289}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ECE22119-D91D-44F7-9575-85B98F946289}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ECE22119-D91D-44F7-9575-85B98F946289}.Release|Any CPU.Build.0 = Release|Any CPU + {1FE1DE95-FF6E-4895-82E7-909713C53524}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1FE1DE95-FF6E-4895-82E7-909713C53524}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1FE1DE95-FF6E-4895-82E7-909713C53524}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1FE1DE95-FF6E-4895-82E7-909713C53524}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C22A81AD-8B64-4D7C-97AC-49E9F118AE78} + EndGlobalSection +EndGlobal diff --git a/src/arrow/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj b/src/arrow/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj new file mode 100644 index 000000000..575a2743e --- /dev/null +++ b/src/arrow/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj @@ -0,0 +1,12 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <OutputType>Exe</OutputType> + <TargetFramework>netcoreapp2.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" /> + </ItemGroup> + +</Project>
\ No newline at end of file diff --git a/src/arrow/csharp/examples/FluentBuilderExample/Program.cs b/src/arrow/csharp/examples/FluentBuilderExample/Program.cs new file mode 100644 index 000000000..6dbdc3d77 --- /dev/null +++ b/src/arrow/csharp/examples/FluentBuilderExample/Program.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow; +using Apache.Arrow.Ipc; +using Apache.Arrow.Memory; +using System; +using System.IO; +using System.Linq; +using System.Threading.Tasks; + +namespace FluentBuilderExample +{ + public class Program + { + public static async Task Main(string[] args) + { + // Use a specific memory pool from which arrays will be allocated (optional) + + var memoryAllocator = new NativeMemoryAllocator(alignment: 64); + + // Build a record batch using the Fluent API + + var recordBatch = new RecordBatch.Builder(memoryAllocator) + .Append("Column A", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) + .Append("Column B", false, col => col.Float(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => Convert.ToSingle(x * 2))))) + .Append("Column C", false, col => col.String(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => $"Item {x+1}")))) + .Append("Column D", false, col => col.Boolean(array => array.AppendRange(Enumerable.Range(0, 10).Select(x => x % 2 == 0)))) + .Build(); + + // Print memory allocation statistics + + Console.WriteLine("Allocations: {0}", memoryAllocator.Statistics.Allocations); + Console.WriteLine("Allocated: {0} byte(s)", memoryAllocator.Statistics.BytesAllocated); + + // Write record batch to a file + + using (var stream = File.OpenWrite("test.arrow")) + using (var writer = new ArrowFileWriter(stream, recordBatch.Schema)) + { + await writer.WriteRecordBatchAsync(recordBatch); + await writer.WriteEndAsync(); + } + + Console.WriteLine("Done"); + Console.ReadKey(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj new file mode 100644 index 000000000..7cfa33c5d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj @@ -0,0 +1,15 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>netcoreapp3.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="Grpc.AspNetCore.Server" Version="2.33.1" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\Apache.Arrow.Flight\Apache.Arrow.Flight.csproj" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIEndpointRouteBuilderExtensions.cs b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIEndpointRouteBuilderExtensions.cs new file mode 100644 index 000000000..5902d7b01 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIEndpointRouteBuilderExtensions.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Flight.Server.Internal; +using Microsoft.AspNetCore.Routing; + +namespace Microsoft.AspNetCore.Builder +{ + public static class FlightIEndpointRouteBuilderExtensions + { + public static GrpcServiceEndpointConventionBuilder MapFlightEndpoint(this IEndpointRouteBuilder endpointRouteBuilder) + { + return endpointRouteBuilder.MapGrpcService<FlightServerImplementation>(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIGrpcServerBuilderExtensions.cs b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIGrpcServerBuilderExtensions.cs new file mode 100644 index 000000000..692e86f62 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight.AspNetCore/Extensions/FlightIGrpcServerBuilderExtensions.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Flight.Server; +using Grpc.AspNetCore.Server; + +namespace Microsoft.Extensions.DependencyInjection +{ + public static class FlightIGrpcServerBuilderExtensions + { + public static IGrpcServerBuilder AddFlightServer<T>(this IGrpcServerBuilder grpcServerBuilder) + where T : FlightServer + { + grpcServerBuilder.Services.AddScoped<FlightServer, T>(); + return grpcServerBuilder; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/src/arrow/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj new file mode 100644 index 000000000..bd59268ad --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -0,0 +1,21 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>netstandard2.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="Google.Protobuf" Version="3.14.0" /> + <PackageReference Include="Grpc.Net.Client" Version="2.33.1" /> + <PackageReference Include="Grpc.Tools" Version="2.33.1" PrivateAssets="All" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\Apache.Arrow\Apache.Arrow.csproj" /> + </ItemGroup> + + <ItemGroup> + <Protobuf Include="..\..\..\format\Flight.proto" Access="internal" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs new file mode 100644 index 000000000..8140e0649 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Internal; +using Apache.Arrow.Flight.Protocol; +using Grpc.Core; +using Grpc.Net.Client; + +namespace Apache.Arrow.Flight.Client +{ + public class FlightClient + { + internal static readonly Empty EmptyInstance = new Empty(); + + private readonly FlightService.FlightServiceClient _client; + + public FlightClient(GrpcChannel grpcChannel) + { + _client = new FlightService.FlightServiceClient(grpcChannel); + } + + public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null) + { + if(criteria == null) + { + criteria = FlightCriteria.Empty; + } + + var response = _client.ListFlights(criteria.ToProtocol(), headers); + var convertStream = new StreamReader<Protocol.FlightInfo, FlightInfo>(response.ResponseStream, inFlight => new FlightInfo(inFlight)); + + return new AsyncServerStreamingCall<FlightInfo>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose); + } + + public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers = null) + { + var response = _client.ListActions(EmptyInstance, headers); + var convertStream = new StreamReader<Protocol.ActionType, FlightActionType>(response.ResponseStream, actionType => new FlightActionType(actionType)); + + return new AsyncServerStreamingCall<FlightActionType>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose); + } + + public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers = null) + { + var stream = _client.DoGet(ticket.ToProtocol(), headers); + var responseStream = new FlightClientRecordBatchStreamReader(stream.ResponseStream); + return new FlightRecordBatchStreamingCall(responseStream, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose); + } + + public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers = null) + { + var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers); + + var flightInfo = flightInfoResult + .ResponseAsync + .ContinueWith(async flightInfo => new FlightInfo(await flightInfo.ConfigureAwait(false))) + .Unwrap(); + + return new AsyncUnaryCall<FlightInfo>( + flightInfo, + flightInfoResult.ResponseHeadersAsync, + flightInfoResult.GetStatus, + flightInfoResult.GetTrailers, + flightInfoResult.Dispose); + } + + public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null) + { + var channels = _client.DoPut(headers); + var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor); + var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult)); + return new FlightRecordBatchDuplexStreamingCall( + requestStream, + readStream, + channels.ResponseHeadersAsync, + channels.GetStatus, + channels.GetTrailers, + channels.Dispose); + } + + public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers = null) + { + var stream = _client.DoAction(action.ToProtocol(), headers); + var streamReader = new StreamReader<Protocol.Result, FlightResult>(stream.ResponseStream, result => new FlightResult(result)); + return new AsyncServerStreamingCall<FlightResult>(streamReader, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose); + } + + public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers = null) + { + var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers); + + var schema = schemaResult + .ResponseAsync + .ContinueWith(async schema => FlightMessageSerializer.DecodeSchema((await schemaResult.ResponseAsync.ConfigureAwait(false)).Schema.Memory)) + .Unwrap(); + + return new AsyncUnaryCall<Schema>( + schema, + schemaResult.ResponseHeadersAsync, + schemaResult.GetStatus, + schemaResult.GetTrailers, + schemaResult.Dispose); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamReader.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamReader.cs new file mode 100644 index 000000000..011af0c83 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamReader.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Flight.Internal; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Client +{ + public class FlightClientRecordBatchStreamReader : FlightRecordBatchStreamReader + { + internal FlightClientRecordBatchStreamReader(IAsyncStreamReader<FlightData> flightDataStream) : base(flightDataStream) + { + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamWriter.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamWriter.cs new file mode 100644 index 000000000..d2e62c42e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightClientRecordBatchStreamWriter.cs @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Flight.Internal; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Client +{ + public class FlightClientRecordBatchStreamWriter : FlightRecordBatchStreamWriter, IClientStreamWriter<RecordBatch> + { + private readonly IClientStreamWriter<FlightData> _clientStreamWriter; + private bool _completed = false; + internal FlightClientRecordBatchStreamWriter(IClientStreamWriter<FlightData> clientStreamWriter, FlightDescriptor flightDescriptor) : base(clientStreamWriter, flightDescriptor) + { + _clientStreamWriter = clientStreamWriter; + } + + protected override void Dispose(bool disposing) + { + if (!_completed) + { + throw new InvalidOperationException("Dispose called before completing the stream."); + } + + base.Dispose(disposing); + } + + public async Task CompleteAsync() + { + if (_completed) + { + return; + } + + await _clientStreamWriter.CompleteAsync().ConfigureAwait(false); + _completed = true; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchDuplexStreamingCall.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchDuplexStreamingCall.cs new file mode 100644 index 000000000..c9e6ecd35 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchDuplexStreamingCall.cs @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Threading.Tasks; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Client +{ + public class FlightRecordBatchDuplexStreamingCall : IDisposable + { + private readonly Func<Status> _getStatusFunc; + private readonly Func<Metadata> _getTrailersFunc; + private readonly Action _disposeAction; + + internal FlightRecordBatchDuplexStreamingCall( + FlightClientRecordBatchStreamWriter requestStream, + IAsyncStreamReader<FlightPutResult> responseStream, + Task<Metadata> responseHeadersAsync, + Func<Status> getStatusFunc, + Func<Metadata> getTrailersFunc, + Action disposeAction) + { + RequestStream = requestStream; + ResponseStream = responseStream; + ResponseHeadersAsync = responseHeadersAsync; + _getStatusFunc = getStatusFunc; + _getTrailersFunc = getTrailersFunc; + _disposeAction = disposeAction; + } + + /// <summary> + /// Async stream to read streaming responses. + /// </summary> + public IAsyncStreamReader<FlightPutResult> ResponseStream { get; } + + /// <summary> + /// Async stream to send streaming requests. + /// </summary> + public FlightClientRecordBatchStreamWriter RequestStream { get; } + + /// <summary> + /// Asynchronous access to response headers. + /// </summary> + public Task<Metadata> ResponseHeadersAsync { get; } + + /// <summary> + /// Provides means to cleanup after the call. If the call has already finished normally + /// (response stream has been fully read), doesn't do anything. Otherwise, requests + /// cancellation of the call which should terminate all pending async operations + /// associated with the call. As a result, all resources being used by the call should + /// be released eventually. + /// </summary> + /// <remarks> + /// Normally, there is no need for you to dispose the call unless you want to utilize + /// the "Cancel" semantics of invoking Dispose. + /// </remarks> + public void Dispose() + { + _disposeAction(); + } + + /// <summary> + /// Gets the call status if the call has already finished. Throws InvalidOperationException otherwise. + /// </summary> + /// <returns></returns> + public Status GetStatus() + { + return _getStatusFunc(); + } + + /// <summary> + /// Gets the call trailing metadata if the call has already finished. Throws InvalidOperationException otherwise. + /// </summary> + /// <returns></returns> + public Metadata GetTrailers() + { + return _getTrailersFunc(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchStreamingCall.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchStreamingCall.cs new file mode 100644 index 000000000..246cfa7cd --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Client/FlightRecordBatchStreamingCall.cs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Threading.Tasks; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Client +{ + public class FlightRecordBatchStreamingCall : IDisposable + { + private readonly Func<Status> _getStatusFunc; + private readonly Func<Metadata> _getTrailersFunc; + private readonly Action _disposeAction; + + internal FlightRecordBatchStreamingCall( + FlightClientRecordBatchStreamReader recordBatchStreamReader, + Task<Metadata> responseHeadersAsync, + Func<Status> getStatusFunc, + Func<Metadata> getTrailersFunc, + Action disposeAction) + { + ResponseStream = recordBatchStreamReader; + ResponseHeadersAsync = responseHeadersAsync; + _getStatusFunc = getStatusFunc; + _getTrailersFunc = getTrailersFunc; + _disposeAction = disposeAction; + } + + public FlightClientRecordBatchStreamReader ResponseStream { get; } + + /// <summary> + /// Asynchronous access to response headers. + /// </summary> + public Task<Metadata> ResponseHeadersAsync { get; } + + /// <summary> + /// Gets the call status if the call has already finished. Throws InvalidOperationException otherwise. + /// </summary> + /// <returns></returns> + public Status GetStatus() + { + return _getStatusFunc(); + } + + /// <summary> + /// Gets the call trailing metadata if the call has already finished. Throws InvalidOperationException otherwise. + /// </summary> + /// <returns></returns> + public Metadata GetTrailers() + { + return _getTrailersFunc(); + } + + /// <summary> + /// Provides means to cleanup after the call. If the call has already finished normally + /// (response stream has been fully read), doesn't do anything. Otherwise, requests + /// cancellation of the call which should terminate all pending async operations + /// associated with the call. As a result, all resources being used by the call should + /// be released eventually. + /// </summary> + /// <remarks> + /// Normally, there is no need for you to dispose the call unless you want to utilize + /// the "Cancel" semantics of invoking Dispose. + /// </remarks> + public void Dispose() + { + _disposeAction(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightAction.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightAction.cs new file mode 100644 index 000000000..4a82fa62a --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightAction.cs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightAction + { + private readonly Protocol.Action _action; + internal FlightAction(Protocol.Action action) + { + _action = action; + } + + public FlightAction(string type, ByteString body) + { + _action = new Protocol.Action() + { + Body = body, + Type = type + }; + } + + public FlightAction(string type, string body) + { + _action = new Protocol.Action() + { + Body = ByteString.CopyFromUtf8(body), + Type = type + }; + } + + public FlightAction(string type, byte[] body) + { + _action = new Protocol.Action() + { + Body = ByteString.CopyFrom(body), + Type = type + }; + } + + public FlightAction(string type) + { + _action = new Protocol.Action() + { + Type = type + }; + } + + public string Type => _action.Type; + + public ByteString Body => _action.Body; + + internal Protocol.Action ToProtocol() + { + return _action; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightActionType.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightActionType.cs new file mode 100644 index 000000000..8df893946 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightActionType.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace Apache.Arrow.Flight +{ + public class FlightActionType + { + private readonly Protocol.ActionType _actionType; + internal FlightActionType(Protocol.ActionType actionType) + { + _actionType = actionType; + } + + public FlightActionType(string type, string description) + { + _actionType = new Protocol.ActionType() + { + Description = description, + Type = type + }; + } + + public string Type => _actionType.Type; + public string Description => _actionType.Description; + + internal Protocol.ActionType ToProtocol() + { + return _actionType; + } + + public override bool Equals(object obj) + { + if(obj is FlightActionType other) + { + return Equals(_actionType, other._actionType); + } + return false; + } + + public override int GetHashCode() + { + return _actionType.GetHashCode(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightCriteria.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightCriteria.cs new file mode 100644 index 000000000..6bcb087ac --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightCriteria.cs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightCriteria + { + internal static readonly FlightCriteria Empty = new FlightCriteria(); + + private readonly Protocol.Criteria _criteria; + + internal FlightCriteria(Protocol.Criteria criteria) + { + _criteria = criteria; + } + + public FlightCriteria() + { + _criteria = new Protocol.Criteria(); + } + + public FlightCriteria(string expression) + { + _criteria = new Protocol.Criteria() + { + Expression = ByteString.CopyFromUtf8(expression) + }; + } + + public FlightCriteria(byte[] bytes) + { + _criteria = new Protocol.Criteria() + { + Expression = ByteString.CopyFrom(bytes) + }; + } + + public FlightCriteria(ByteString byteString) + { + _criteria = new Protocol.Criteria() + { + Expression = byteString + }; + } + + public ByteString Expression => _criteria.Expression; + + internal Protocol.Criteria ToProtocol() + { + return _criteria; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptor.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptor.cs new file mode 100644 index 000000000..7d4433291 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptor.cs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightDescriptor + { + private readonly Protocol.FlightDescriptor _flightDescriptor; + + private FlightDescriptor(ByteString command) + { + _flightDescriptor = new Protocol.FlightDescriptor() + { + Cmd = command, + Type = Protocol.FlightDescriptor.Types.DescriptorType.Cmd + }; + } + + private FlightDescriptor(params string[] paths) + { + _flightDescriptor = new Protocol.FlightDescriptor() + { + Type = Protocol.FlightDescriptor.Types.DescriptorType.Path + }; + + foreach(var path in paths) + { + _flightDescriptor.Path.Add(path); + } + } + + + public static FlightDescriptor CreateCommandDescriptor(byte[] command) + { + return new FlightDescriptor(ByteString.CopyFrom(command)); + } + + public static FlightDescriptor CreateCommandDescriptor(string command) + { + return new FlightDescriptor(ByteString.CopyFromUtf8(command)); + } + + public static FlightDescriptor CreatePathDescriptor(params string[] paths) + { + return new FlightDescriptor(paths); + } + + + internal FlightDescriptor(Protocol.FlightDescriptor flightDescriptor) + { + if(flightDescriptor.Type != Protocol.FlightDescriptor.Types.DescriptorType.Cmd && flightDescriptor.Type != Protocol.FlightDescriptor.Types.DescriptorType.Path) + { + throw new NotSupportedException(); + } + _flightDescriptor = flightDescriptor; + } + + internal Protocol.FlightDescriptor ToProtocol() + { + return _flightDescriptor; + } + + public FlightDescriptorType Type => (FlightDescriptorType)_flightDescriptor.Type; + + public IEnumerable<string> Paths => _flightDescriptor.Path; + + public ByteString Command => _flightDescriptor.Cmd; + + + public override int GetHashCode() + { + return _flightDescriptor.GetHashCode(); + } + + public override bool Equals(object obj) + { + if(obj is FlightDescriptor other) + { + return Equals(_flightDescriptor, other._flightDescriptor); + } + return false; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptorType.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptorType.cs new file mode 100644 index 000000000..120ed225c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightDescriptorType.cs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Flight +{ + public enum FlightDescriptorType + { + Path = 1, + Command = 2 + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightEndpoint.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightEndpoint.cs new file mode 100644 index 000000000..ab15fed01 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightEndpoint.cs @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Apache.Arrow.Flight +{ + public class FlightEndpoint + { + private readonly FlightTicket _ticket; + private readonly IReadOnlyList<FlightLocation> _locations; + internal FlightEndpoint(Protocol.FlightEndpoint flightEndpoint) + { + _ticket = new FlightTicket(flightEndpoint.Ticket); + _locations = flightEndpoint.Location.Select(x => new FlightLocation(x)).ToList(); + } + + public FlightEndpoint(FlightTicket ticket, IReadOnlyList<FlightLocation> locations) + { + _ticket = ticket; + _locations = locations; + } + + public FlightTicket Ticket => _ticket; + + public IEnumerable<FlightLocation> Locations => _locations; + + internal Protocol.FlightEndpoint ToProtocol() + { + var output = new Protocol.FlightEndpoint() + { + Ticket = _ticket.ToProtocol() + }; + + foreach(var location in _locations) + { + output.Location.Add(location.ToProtocol()); + } + return output; + } + + public override bool Equals(object obj) + { + if(obj is FlightEndpoint other) + { + return Equals(_ticket, other._ticket) && + Enumerable.SequenceEqual(_locations, other._locations); + } + return false; + } + + public override int GetHashCode() + { + //Ticket should contain enough to get a good hash code + return _ticket.GetHashCode(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightInfo.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightInfo.cs new file mode 100644 index 000000000..44a7965cc --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightInfo.cs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Flight.Internal; +using Apache.Arrow.Ipc; + +namespace Apache.Arrow.Flight +{ + public class FlightInfo + { + internal FlightInfo(Protocol.FlightInfo flightInfo) + { + Schema = FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory); + Descriptor = new FlightDescriptor(flightInfo.FlightDescriptor); + + var endpoints = new List<FlightEndpoint>(); + foreach(var endpoint in flightInfo.Endpoint) + { + endpoints.Add(new FlightEndpoint(endpoint)); + } + Endpoints = endpoints; + + TotalBytes = flightInfo.TotalBytes; + TotalRecords = flightInfo.TotalRecords; + } + + public FlightInfo(Schema schema, FlightDescriptor descriptor, IReadOnlyList<FlightEndpoint> endpoints, long totalRecords = 0, long totalBytes = 0) + { + Schema = schema; + Descriptor = descriptor; + Endpoints = endpoints; + TotalBytes = totalBytes; + TotalRecords = totalRecords; + } + + public FlightDescriptor Descriptor { get; } + + public Schema Schema { get; } + + public long TotalBytes { get; } + + public long TotalRecords { get; } + + public IReadOnlyList<FlightEndpoint> Endpoints { get; } + + internal Protocol.FlightInfo ToProtocol() + { + var serializedSchema = SchemaWriter.SerializeSchema(Schema); + var response = new Protocol.FlightInfo() + { + Schema = serializedSchema, + FlightDescriptor = Descriptor.ToProtocol() + }; + + foreach(var endpoint in Endpoints) + { + response.Endpoint.Add(endpoint.ToProtocol()); + } + + return response; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightLocation.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightLocation.cs new file mode 100644 index 000000000..25b9d5d45 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightLocation.cs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace Apache.Arrow.Flight +{ + public class FlightLocation + { + private readonly Protocol.Location _location; + internal FlightLocation(Protocol.Location location) + { + _location = location; + } + + public FlightLocation(string uri) + { + _location = new Protocol.Location() + { + Uri = uri + }; + } + + public string Uri => _location.Uri; + + internal Protocol.Location ToProtocol() + { + return _location; + } + + public override bool Equals(object obj) + { + if(obj is FlightLocation other) + { + return Equals(_location, other._location); + } + return false; + } + + public override int GetHashCode() + { + return _location.GetHashCode(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightPutResult.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightPutResult.cs new file mode 100644 index 000000000..16f278aa5 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightPutResult.cs @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightPutResult + { + public static readonly FlightPutResult Empty = new FlightPutResult(); + + private readonly Protocol.PutResult _putResult; + + public FlightPutResult() + { + _putResult = new Protocol.PutResult(); + } + + public FlightPutResult(ByteString applicationMetadata) + { + _putResult = new Protocol.PutResult() + { + AppMetadata = applicationMetadata + }; + } + + public FlightPutResult(byte[] applicationMetadata) + : this(ByteString.CopyFrom(applicationMetadata)) + { + } + + public FlightPutResult(string applicationMetadata) + : this(ByteString.CopyFromUtf8(applicationMetadata)) + { + } + + internal FlightPutResult(Protocol.PutResult putResult) + { + _putResult = putResult; + } + + public ByteString ApplicationMetadata => _putResult.AppMetadata; + + internal Protocol.PutResult ToProtocol() + { + return _putResult; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamReader.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamReader.cs new file mode 100644 index 000000000..588127537 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamReader.cs @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Flatbuf; +using Apache.Arrow.Flight.Internal; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Ipc; +using Google.Protobuf; +using Grpc.Core; + +namespace Apache.Arrow.Flight +{ + /// <summary> + /// Stream of record batches + /// + /// Use MoveNext() and Current to iterate over the batches. + /// There are also gRPC helper functions such as ToListAsync() etc. + /// </summary> + public abstract class FlightRecordBatchStreamReader : IAsyncStreamReader<RecordBatch>, IAsyncEnumerable<RecordBatch>, IDisposable + { + //Temporary until .NET 5.0 upgrade + private static ValueTask CompletedValueTask = new ValueTask(); + + private readonly RecordBatcReaderImplementation _arrowReaderImplementation; + + private protected FlightRecordBatchStreamReader(IAsyncStreamReader<Protocol.FlightData> flightDataStream) + { + _arrowReaderImplementation = new RecordBatcReaderImplementation(flightDataStream); + } + + public ValueTask<Schema> Schema => _arrowReaderImplementation.ReadSchema(); + + internal ValueTask<FlightDescriptor> GetFlightDescriptor() + { + return _arrowReaderImplementation.ReadFlightDescriptor(); + } + + /// <summary> + /// Get the application metadata from the latest recieved record batch + /// </summary> + public IReadOnlyList<ByteString> ApplicationMetadata => _arrowReaderImplementation.ApplicationMetadata; + + public RecordBatch Current { get; private set; } + + public async Task<bool> MoveNext(CancellationToken cancellationToken) + { + Current = await _arrowReaderImplementation.ReadNextRecordBatchAsync(cancellationToken); + + return Current != null; + } + + public IAsyncEnumerator<RecordBatch> GetAsyncEnumerator(CancellationToken cancellationToken = default) + { + return new AsyncEnumerator(this, cancellationToken); + } + + public void Dispose() + { + _arrowReaderImplementation.Dispose(); + } + + private class AsyncEnumerator : IAsyncEnumerator<RecordBatch> + { + private readonly FlightRecordBatchStreamReader _flightRecordBatchStreamReader; + private readonly CancellationToken _cancellationToken; + + internal AsyncEnumerator(FlightRecordBatchStreamReader flightRecordBatchStreamReader, CancellationToken cancellationToken) + { + _flightRecordBatchStreamReader = flightRecordBatchStreamReader; + _cancellationToken = cancellationToken; + } + + public RecordBatch Current => _flightRecordBatchStreamReader.Current; + + public async ValueTask<bool> MoveNextAsync() + { + return await _flightRecordBatchStreamReader.MoveNext(_cancellationToken); + } + + public ValueTask DisposeAsync() + { + _flightRecordBatchStreamReader.Dispose(); + return CompletedValueTask; + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs new file mode 100644 index 000000000..a72be5a82 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightRecordBatchStreamWriter.cs @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Internal; +using Apache.Arrow.Flight.Protocol; +using Google.Protobuf; +using Grpc.Core; + +namespace Apache.Arrow.Flight +{ + public abstract class FlightRecordBatchStreamWriter : IAsyncStreamWriter<RecordBatch>, IDisposable + { + private FlightDataStream _flightDataStream; + private readonly IAsyncStreamWriter<FlightData> _clientStreamWriter; + private readonly FlightDescriptor _flightDescriptor; + + private bool _disposed; + + private protected FlightRecordBatchStreamWriter(IAsyncStreamWriter<FlightData> clientStreamWriter, FlightDescriptor flightDescriptor) + { + _clientStreamWriter = clientStreamWriter; + _flightDescriptor = flightDescriptor; + } + + private void SetupStream(Schema schema) + { + _flightDataStream = new FlightDataStream(_clientStreamWriter, _flightDescriptor, schema); + } + + public WriteOptions WriteOptions { get => throw new NotImplementedException(); set => throw new NotImplementedException(); } + + public Task WriteAsync(RecordBatch message) + { + return WriteAsync(message, default); + } + + public Task WriteAsync(RecordBatch message, ByteString applicationMetadata) + { + if (_flightDataStream == null) + { + SetupStream(message.Schema); + } + + return _flightDataStream.Write(message, applicationMetadata); + } + + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + _flightDataStream.Dispose(); + _disposed = true; + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightResult.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightResult.cs new file mode 100644 index 000000000..3ddadd4bc --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightResult.cs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightResult + { + private readonly Protocol.Result _result; + + internal FlightResult(Protocol.Result result) + { + _result = result; + } + + public FlightResult(ByteString body) + { + _result = new Protocol.Result() + { + Body = body + }; + } + + public FlightResult(string body) + : this(ByteString.CopyFromUtf8(body)) + { + } + + public FlightResult(byte[] body) + : this(ByteString.CopyFrom(body)) + { + } + + public ByteString Body => _result.Body; + + internal Protocol.Result ToProtocol() + { + return _result; + } + + public override bool Equals(object obj) + { + if(obj is FlightResult other) + { + return Equals(_result, other._result); + } + return false; + } + + public override int GetHashCode() + { + return _result.GetHashCode(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/FlightTicket.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightTicket.cs new file mode 100644 index 000000000..7b3d6dd75 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/FlightTicket.cs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Google.Protobuf; + +namespace Apache.Arrow.Flight +{ + public class FlightTicket + { + private readonly Protocol.Ticket _ticket; + internal FlightTicket(Protocol.Ticket ticket) + { + _ticket = ticket; + } + + public FlightTicket(ByteString ticket) + { + _ticket = new Protocol.Ticket() + { + Ticket_ = ticket + }; + } + + public FlightTicket(string ticket) + : this(ByteString.CopyFromUtf8(ticket)) + { + } + + public FlightTicket(byte[] bytes) + : this(ByteString.CopyFrom(bytes)) + { + } + + public ByteString Ticket => _ticket.Ticket_; + + internal Protocol.Ticket ToProtocol() + { + return _ticket; + } + + public override bool Equals(object obj) + { + if(obj is FlightTicket other) + { + return Equals(_ticket, other._ticket); + } + return false; + } + + public override int GetHashCode() + { + return _ticket.GetHashCode(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs new file mode 100644 index 000000000..865884572 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Flatbuf; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Ipc; +using FlatBuffers; +using Google.Protobuf; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Internal +{ + /// <summary> + /// Handles writing record batches as flight data + /// </summary> + internal class FlightDataStream : ArrowStreamWriter + { + private readonly FlightDescriptor _flightDescriptor; + private readonly IAsyncStreamWriter<FlightData> _clientStreamWriter; + private Protocol.FlightData _currentFlightData; + + public FlightDataStream(IAsyncStreamWriter<FlightData> clientStreamWriter, FlightDescriptor flightDescriptor, Schema schema) + : base(new MemoryStream(), schema) + { + _clientStreamWriter = clientStreamWriter; + _flightDescriptor = flightDescriptor; + } + + private async Task SendSchema() + { + _currentFlightData = new Protocol.FlightData(); + + if(_flightDescriptor != null) + { + _currentFlightData.FlightDescriptor = _flightDescriptor.ToProtocol(); + } + + var offset = SerializeSchema(Schema); + CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(); + await WriteMessageAsync(MessageHeader.Schema, offset, 0, cancellationTokenSource.Token).ConfigureAwait(false); + await _clientStreamWriter.WriteAsync(_currentFlightData).ConfigureAwait(false); + HasWrittenSchema = true; + } + + private void ResetStream() + { + this.BaseStream.Position = 0; + this.BaseStream.SetLength(0); + } + + public async Task Write(RecordBatch recordBatch, ByteString applicationMetadata) + { + if (!HasWrittenSchema) + { + await SendSchema().ConfigureAwait(false); + } + ResetStream(); + + _currentFlightData = new Protocol.FlightData(); + + if(applicationMetadata != null) + { + _currentFlightData.AppMetadata = applicationMetadata; + } + + await WriteRecordBatchInternalAsync(recordBatch).ConfigureAwait(false); + + //Reset stream position + this.BaseStream.Position = 0; + var bodyData = await ByteString.FromStreamAsync(this.BaseStream).ConfigureAwait(false); + + _currentFlightData.DataBody = bodyData; + await _clientStreamWriter.WriteAsync(_currentFlightData).ConfigureAwait(false); + } + + private protected override ValueTask<long> WriteMessageAsync<T>(MessageHeader headerType, Offset<T> headerOffset, int bodyLength, CancellationToken cancellationToken) + { + Offset<Flatbuf.Message> messageOffset = Flatbuf.Message.CreateMessage( + Builder, CurrentMetadataVersion, headerType, headerOffset.Value, + bodyLength); + + Builder.Finish(messageOffset.Value); + + ReadOnlyMemory<byte> messageData = Builder.DataBuffer.ToReadOnlyMemory(Builder.DataBuffer.Position, Builder.Offset); + + _currentFlightData.DataHeader = ByteString.CopyFrom(messageData.Span); + + return new ValueTask<long>(0); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs new file mode 100644 index 000000000..36b13a63d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Text; +using Apache.Arrow.Ipc; +using FlatBuffers; + +namespace Apache.Arrow.Flight +{ + internal static class FlightMessageSerializer + { + public static Schema DecodeSchema(ReadOnlyMemory<byte> buffer) + { + int bufferPosition = 0; + int schemaMessageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer.Span.Slice(bufferPosition)); + bufferPosition += sizeof(int); + + if (schemaMessageLength == MessageSerializer.IpcContinuationToken) + { + // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length + if (buffer.Length <= bufferPosition + sizeof(int)) + { + throw new InvalidDataException("Corrupted IPC message. Received a continuation token at the end of the message."); + } + + schemaMessageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer.Span.Slice(bufferPosition)); + bufferPosition += sizeof(int); + } + + ByteBuffer schemaBuffer = ArrowReaderImplementation.CreateByteBuffer(buffer.Slice(bufferPosition)); + //DictionaryBatch not supported for now + DictionaryMemo dictionaryMemo = null; + var schema = MessageSerializer.GetSchema(ArrowReaderImplementation.ReadMessage<Flatbuf.Schema>(schemaBuffer), ref dictionaryMemo); + return schema; + } + + internal static Schema DecodeSchema(ByteBuffer schemaBuffer) + { + //DictionaryBatch not supported for now + DictionaryMemo dictionaryMemo = null; + var schema = MessageSerializer.GetSchema(ArrowReaderImplementation.ReadMessage<Flatbuf.Schema>(schemaBuffer), ref dictionaryMemo); + return schema; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/RecordBatcReaderImplementation.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/RecordBatcReaderImplementation.cs new file mode 100644 index 000000000..10d4d731e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/RecordBatcReaderImplementation.cs @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Flatbuf; +using Apache.Arrow.Ipc; +using Google.Protobuf; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Internal +{ + internal class RecordBatcReaderImplementation : ArrowReaderImplementation + { + private readonly IAsyncStreamReader<Protocol.FlightData> _flightDataStream; + private FlightDescriptor _flightDescriptor; + private readonly List<ByteString> _applicationMetadatas; + + public RecordBatcReaderImplementation(IAsyncStreamReader<Protocol.FlightData> streamReader) + { + _flightDataStream = streamReader; + _applicationMetadatas = new List<ByteString>(); + } + + public override RecordBatch ReadNextRecordBatch() + { + throw new NotImplementedException(); + } + + public IReadOnlyList<ByteString> ApplicationMetadata => _applicationMetadatas; + + public async ValueTask<FlightDescriptor> ReadFlightDescriptor() + { + if (!HasReadSchema) + { + await ReadSchema().ConfigureAwait(false); + } + return _flightDescriptor; + } + + public async ValueTask<Schema> ReadSchema() + { + if (HasReadSchema) + { + return Schema; + } + + var moveNextResult = await _flightDataStream.MoveNext().ConfigureAwait(false); + + if (!moveNextResult) + { + throw new Exception("No records or schema in this flight"); + } + + //AppMetadata will never be null, but length 0 if empty + //Those are skipped + if(_flightDataStream.Current.AppMetadata.Length > 0) + { + _applicationMetadatas.Add(_flightDataStream.Current.AppMetadata); + } + + var header = _flightDataStream.Current.DataHeader.Memory; + Message message = Message.GetRootAsMessage( + ArrowReaderImplementation.CreateByteBuffer(header)); + + + if(_flightDataStream.Current.FlightDescriptor != null) + { + _flightDescriptor = new FlightDescriptor(_flightDataStream.Current.FlightDescriptor); + } + + switch (message.HeaderType) + { + case MessageHeader.Schema: + Schema = FlightMessageSerializer.DecodeSchema(message.ByteBuffer); + break; + default: + throw new Exception($"Expected schema as the first message, but got: {message.HeaderType.ToString()}"); + } + return Schema; + } + + public override async ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken) + { + _applicationMetadatas.Clear(); //Clear any metadata from previous calls + + if (!HasReadSchema) + { + await ReadSchema().ConfigureAwait(false); + } + var moveNextResult = await _flightDataStream.MoveNext().ConfigureAwait(false); + if (moveNextResult) + { + //AppMetadata will never be null, but length 0 if empty + //Those are skipped + if (_flightDataStream.Current.AppMetadata.Length > 0) + { + _applicationMetadatas.Add(_flightDataStream.Current.AppMetadata); + } + + var header = _flightDataStream.Current.DataHeader.Memory; + Message message = Message.GetRootAsMessage(CreateByteBuffer(header)); + + switch (message.HeaderType) + { + case MessageHeader.RecordBatch: + var body = _flightDataStream.Current.DataBody.Memory; + return CreateArrowObjectFromMessage(message, CreateByteBuffer(body.Slice(0, (int)message.BodyLength)), null); + default: + throw new NotImplementedException(); + } + } + return null; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/SchemaWriter.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/SchemaWriter.cs new file mode 100644 index 000000000..c7e7d8135 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/SchemaWriter.cs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Flatbuf; +using Apache.Arrow.Ipc; +using Google.Protobuf; + +namespace Apache.Arrow.Flight.Internal +{ + /// <summary> + /// This class handles writing schemas + /// </summary> + internal class SchemaWriter : ArrowStreamWriter + { + private SchemaWriter(Stream baseStream, Schema schema) : base(baseStream, schema) + { + } + + public void WriteSchema(Schema schema, CancellationToken cancellationToken) + { + var offset = base.SerializeSchema(schema); + WriteMessage(MessageHeader.Schema, offset, 0); + } + + public static ByteString SerializeSchema(Schema schema, CancellationToken cancellationToken = default(CancellationToken)) + { + using(var memoryStream = new MemoryStream()) + { + var writer = new SchemaWriter(memoryStream, schema); + writer.WriteSchema(schema, cancellationToken); + + memoryStream.Position = 0; + return ByteString.FromStream(memoryStream); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamReader.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamReader.cs new file mode 100644 index 000000000..a2c3db3d3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamReader.cs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Internal +{ + + /// <summary> + /// This is a helper class that allows conversions from gRPC types to the Arrow types. + /// It maintains the stream so data can be read as soon as possible. + /// </summary> + /// <typeparam name="TIn">In paramter from gRPC</typeparam> + /// <typeparam name="TOut">The arrow type returned</typeparam> + internal class StreamReader<TIn, TOut> : IAsyncStreamReader<TOut> + { + private readonly IAsyncStreamReader<TIn> _inputStream; + private readonly Func<TIn, TOut> _convertFunction; + internal StreamReader(IAsyncStreamReader<TIn> inputStream, Func<TIn, TOut> convertFunction) + { + _inputStream = inputStream; + _convertFunction = convertFunction; + } + + public TOut Current { get; private set; } + + public async Task<bool> MoveNext(CancellationToken cancellationToken) + { + var moveNextResult = await _inputStream.MoveNext(cancellationToken).ConfigureAwait(false); + if (moveNextResult) + { + Current = _convertFunction(_inputStream.Current); + } + return moveNextResult; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamWriter.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamWriter.cs new file mode 100644 index 000000000..c50b41e1b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Internal/StreamWriter.cs @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Internal +{ + internal class StreamWriter<TIn, TOut> : IAsyncStreamWriter<TIn> + { + private readonly IAsyncStreamWriter<TOut> _inputStream; + private readonly Func<TIn, TOut> _convertFunction; + internal StreamWriter(IAsyncStreamWriter<TOut> inputStream, Func<TIn, TOut> convertFunction) + { + _inputStream = inputStream; + _convertFunction = convertFunction; + } + + public WriteOptions WriteOptions + { + get + { + return _inputStream.WriteOptions; + } + set + { + _inputStream.WriteOptions = value; + } + } + + public Task WriteAsync(TIn message) + { + return _inputStream.WriteAsync(_convertFunction(message)); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Properties/AssemblyInfo.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Properties/AssemblyInfo.cs new file mode 100644 index 000000000..07934ad05 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Properties/AssemblyInfo.cs @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Apache.Arrow.Flight.AspNetCore, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServer.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServer.cs new file mode 100644 index 000000000..30b0409d4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServer.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Server +{ + public abstract class FlightServer + { + public virtual Task DoPut(FlightServerRecordBatchStreamReader requestStream, IAsyncStreamWriter<FlightPutResult> responseStream, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task DoGet(FlightTicket ticket, FlightServerRecordBatchStreamWriter responseStream, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task ListFlights(FlightCriteria request, IAsyncStreamWriter<FlightInfo> responseStream, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task ListActions(IAsyncStreamWriter<FlightActionType> responseStream, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task DoAction(FlightAction request, IAsyncStreamWriter<FlightResult> responseStream, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task<Schema> GetSchema(FlightDescriptor request, ServerCallContext context) + { + throw new NotImplementedException(); + } + + public virtual Task<FlightInfo> GetFlightInfo(FlightDescriptor request, ServerCallContext context) + { + throw new NotImplementedException(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamReader.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamReader.cs new file mode 100644 index 000000000..5476d3d0e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamReader.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Threading.Tasks; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Flight.Internal; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Server +{ + public class FlightServerRecordBatchStreamReader : FlightRecordBatchStreamReader + { + internal FlightServerRecordBatchStreamReader(IAsyncStreamReader<FlightData> flightDataStream) : base(flightDataStream) + { + } + + public ValueTask<FlightDescriptor> FlightDescriptor => GetFlightDescriptor(); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamWriter.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamWriter.cs new file mode 100644 index 000000000..6c1987339 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/FlightServerRecordBatchStreamWriter.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Flight.Internal; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Server +{ + public class FlightServerRecordBatchStreamWriter : FlightRecordBatchStreamWriter, IServerStreamWriter<RecordBatch> + { + internal FlightServerRecordBatchStreamWriter(IServerStreamWriter<FlightData> clientStreamWriter) : base(clientStreamWriter, null) + { + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow.Flight/Server/Internal/FlightServerImplementation.cs b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/Internal/FlightServerImplementation.cs new file mode 100644 index 000000000..dcf6e5768 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow.Flight/Server/Internal/FlightServerImplementation.cs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Internal; +using Apache.Arrow.Flight.Protocol; +using Apache.Arrow.Flight.Server; +using Grpc.Core; + +namespace Apache.Arrow.Flight.Server.Internal +{ + /// <summary> + /// This class has to be internal, since the generated code from proto is set as internal. + /// </summary> + internal class FlightServerImplementation : FlightService.FlightServiceBase + { + private readonly FlightServer _flightServer; + public FlightServerImplementation(FlightServer flightServer) + { + _flightServer = flightServer; + } + + public override async Task DoPut(IAsyncStreamReader<FlightData> requestStream, IServerStreamWriter<Protocol.PutResult> responseStream, ServerCallContext context) + { + var readStream = new FlightServerRecordBatchStreamReader(requestStream); + var writeStream = new StreamWriter<FlightPutResult, Protocol.PutResult>(responseStream, putResult => putResult.ToProtocol()); + await _flightServer.DoPut(readStream, writeStream, context).ConfigureAwait(false); + } + + public override Task DoGet(Protocol.Ticket request, IServerStreamWriter<FlightData> responseStream, ServerCallContext context) + { + return _flightServer.DoGet(new FlightTicket(request.Ticket_), new FlightServerRecordBatchStreamWriter(responseStream), context); + } + + public override Task ListFlights(Protocol.Criteria request, IServerStreamWriter<Protocol.FlightInfo> responseStream, ServerCallContext context) + { + var writeStream = new StreamWriter<FlightInfo, Protocol.FlightInfo>(responseStream, flightInfo => flightInfo.ToProtocol()); + return _flightServer.ListFlights(new FlightCriteria(request), writeStream, context); + } + + public override Task DoAction(Protocol.Action request, IServerStreamWriter<Protocol.Result> responseStream, ServerCallContext context) + { + var action = new FlightAction(request); + var writeStream = new StreamWriter<FlightResult, Protocol.Result>(responseStream, result => result.ToProtocol()); + return _flightServer.DoAction(action, writeStream, context); + } + + public override async Task<SchemaResult> GetSchema(Protocol.FlightDescriptor request, ServerCallContext context) + { + var flightDescriptor = new FlightDescriptor(request); + var schema = await _flightServer.GetSchema(flightDescriptor, context).ConfigureAwait(false); + + return new SchemaResult() + { + Schema = SchemaWriter.SerializeSchema(schema) + }; + } + + public override async Task<Protocol.FlightInfo> GetFlightInfo(Protocol.FlightDescriptor request, ServerCallContext context) + { + var flightDescriptor = new FlightDescriptor(request); + var flightInfo = await _flightServer.GetFlightInfo(flightDescriptor, context).ConfigureAwait(false); + + return flightInfo.ToProtocol(); + } + + public override Task DoExchange(IAsyncStreamReader<FlightData> requestStream, IServerStreamWriter<FlightData> responseStream, ServerCallContext context) + { + //Exchange is not yet implemented + throw new NotImplementedException(); + } + + public override Task Handshake(IAsyncStreamReader<HandshakeRequest> requestStream, IServerStreamWriter<HandshakeResponse> responseStream, ServerCallContext context) + { + //Handshake is not yet implemented + throw new NotImplementedException(); + } + + public override Task ListActions(Empty request, IServerStreamWriter<Protocol.ActionType> responseStream, ServerCallContext context) + { + var writeStream = new StreamWriter<FlightActionType, Protocol.ActionType>(responseStream, (actionType) => actionType.ToProtocol()); + return _flightServer.ListActions(writeStream, context); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Apache.Arrow.csproj b/src/arrow/csharp/src/Apache.Arrow/Apache.Arrow.csproj new file mode 100644 index 000000000..62574029f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Apache.Arrow.csproj @@ -0,0 +1,42 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFrameworks>netstandard1.3;netcoreapp2.1</TargetFrameworks> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + <DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants> + + <Description>Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware.</Description> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="System.Buffers" Version="4.5.0" /> + <PackageReference Include="System.Memory" Version="4.5.2" /> + <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.5.2" /> + <PackageReference Include="System.Threading.Tasks.Extensions" Version="4.5.2" /> + + <PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="All" /> + </ItemGroup> + + <ItemGroup> + <Compile Update="Properties\Resources.Designer.cs"> + <DesignTime>True</DesignTime> + <AutoGen>True</AutoGen> + <DependentUpon>Resources.resx</DependentUpon> + </Compile> + </ItemGroup> + + <ItemGroup> + <EmbeddedResource Update="Properties\Resources.resx"> + <Generator>ResXFileCodeGenerator</Generator> + <LastGenOutput>Resources.Designer.cs</LastGenOutput> + </EmbeddedResource> + </ItemGroup> + + <ItemGroup Condition="'$(TargetFramework)' == 'netstandard1.3'"> + <Compile Remove="Extensions\StreamExtensions.netcoreapp2.1.cs" /> + </ItemGroup> + <ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp2.1'"> + <Compile Remove="Extensions\StreamExtensions.netstandard.cs" /> + <Compile Remove="Extensions\TupleExtensions.netstandard.cs" /> + </ItemGroup> +</Project> diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Array.cs new file mode 100644 index 000000000..a453b0807 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Array.cs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Runtime.CompilerServices; + +namespace Apache.Arrow +{ + public abstract class Array : IArrowArray + { + public ArrayData Data { get; } + + protected Array(ArrayData data) + { + Data = data ?? throw new ArgumentNullException(nameof(data)); + } + + public int Length => Data.Length; + + public int Offset => Data.Offset; + + public int NullCount => Data.NullCount; + + public ArrowBuffer NullBitmapBuffer => Data.Buffers[0]; + + public virtual void Accept(IArrowArrayVisitor visitor) + { + Accept(this, visitor); + } + + public bool IsValid(int index) => + NullCount == 0 || NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index + Offset); + + public bool IsNull(int index) => !IsValid(index); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Accept<T>(T array, IArrowArrayVisitor visitor) + where T : class, IArrowArray + { + switch (visitor) + { + case IArrowArrayVisitor<T> typedVisitor: + typedVisitor.Visit(array); + break; + default: + visitor.Visit(array); + break; + } + } + + public Array Slice(int offset, int length) + { + if (offset > Length) + { + throw new ArgumentException($"Offset {offset} cannot be greater than Length {Length} for Array.Slice"); + } + + length = Math.Min(Data.Length - offset, length); + offset += Data.Offset; + + ArrayData newData = Data.Slice(offset, length); + return ArrowArrayFactory.BuildArray(newData) as Array; + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + Data.Dispose(); + } + } + } +}
\ No newline at end of file diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayData.cs new file mode 100644 index 000000000..fb5aa1b5f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayData.cs @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow +{ + public sealed class ArrayData : IDisposable + { + private const int RecalculateNullCount = -1; + + public readonly IArrowType DataType; + public readonly int Length; + public readonly int NullCount; + public readonly int Offset; + public readonly ArrowBuffer[] Buffers; + public readonly ArrayData[] Children; + public readonly ArrayData Dictionary; // Only used for dictionary type + + // This is left for compatibility with lower version binaries + // before the dictionary type was supported. + public ArrayData( + IArrowType dataType, + int length, int nullCount, int offset, + IEnumerable<ArrowBuffer> buffers, IEnumerable<ArrayData> children) : + this(dataType, length, nullCount, offset, buffers, children, null) + { } + + // This is left for compatibility with lower version binaries + // before the dictionary type was supported. + public ArrayData( + IArrowType dataType, + int length, int nullCount, int offset, + ArrowBuffer[] buffers, ArrayData[] children) : + this(dataType, length, nullCount, offset, buffers, children, null) + { } + + public ArrayData( + IArrowType dataType, + int length, int nullCount = 0, int offset = 0, + IEnumerable<ArrowBuffer> buffers = null, IEnumerable<ArrayData> children = null, ArrayData dictionary = null) + { + DataType = dataType ?? NullType.Default; + Length = length; + NullCount = nullCount; + Offset = offset; + Buffers = buffers?.ToArray(); + Children = children?.ToArray(); + Dictionary = dictionary; + } + + public ArrayData( + IArrowType dataType, + int length, int nullCount = 0, int offset = 0, + ArrowBuffer[] buffers = null, ArrayData[] children = null, ArrayData dictionary = null) + { + DataType = dataType ?? NullType.Default; + Length = length; + NullCount = nullCount; + Offset = offset; + Buffers = buffers; + Children = children; + Dictionary = dictionary; + } + + public void Dispose() + { + if (Buffers != null) + { + foreach (ArrowBuffer buffer in Buffers) + { + buffer.Dispose(); + } + } + + if (Children != null) + { + foreach (ArrayData child in Children) + { + child?.Dispose(); + } + } + + Dictionary?.Dispose(); + } + + public ArrayData Slice(int offset, int length) + { + if (offset > Length) + { + throw new ArgumentException($"Offset {offset} cannot be greater than Length {Length} for Array.Slice"); + } + + length = Math.Min(Length - offset, length); + offset += Offset; + + return new ArrayData(DataType, length, RecalculateNullCount, offset, Buffers, Children, Dictionary); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs new file mode 100644 index 000000000..0efb60ab8 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + static class ArrayDataConcatenator + { + internal static ArrayData Concatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + if (arrayDataList == null || arrayDataList.Count == 0) + { + return null; + } + + if (arrayDataList.Count == 1) + { + return arrayDataList[0]; + } + + var arrowArrayConcatenationVisitor = new ArrayDataConcatenationVisitor(arrayDataList, allocator); + + IArrowType type = arrayDataList[0].DataType; + type.Accept(arrowArrayConcatenationVisitor); + + return arrowArrayConcatenationVisitor.Result; + } + + private class ArrayDataConcatenationVisitor : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<FixedWidthType>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + public ArrayData Result { get; private set; } + private readonly IReadOnlyList<ArrayData> _arrayDataList; + private readonly int _totalLength; + private readonly int _totalNullCount; + private readonly MemoryAllocator _allocator; + + public ArrayDataConcatenationVisitor(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + _arrayDataList = arrayDataList; + _allocator = allocator; + + foreach (ArrayData arrayData in _arrayDataList) + { + _totalLength += arrayData.Length; + _totalNullCount += arrayData.NullCount; + } + } + + public void Visit(BooleanType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateBitmapBuffer(1); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(FixedWidthType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(ListType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); + ArrayData child = Concatenate(SelectChildren(0), _allocator); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); + } + + public void Visit(StructType type) + { + CheckData(type, 1); + List<ArrayData> children = new List<ArrayData>(type.Fields.Count); + + for (int i = 0; i < type.Fields.Count; i++) + { + children.Add(Concatenate(SelectChildren(i), _allocator)); + } + + Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet."); + } + + private void CheckData(IArrowType type, int expectedBufferCount) + { + foreach (ArrayData arrayData in _arrayDataList) + { + arrayData.EnsureDataType(type.TypeId); + arrayData.EnsureBufferCount(expectedBufferCount); + } + } + + private void ConcatenateVariableBinaryArrayData(IArrowType type) + { + CheckData(type, 3); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); + ArrowBuffer valueBuffer = ConcatenateVariableBinaryValueBuffer(); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); + } + + private ArrowBuffer ConcatenateValidityBuffer() + { + if (_totalNullCount == 0) + { + return ArrowBuffer.Empty; + } + + return ConcatenateBitmapBuffer(0); + } + + private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex) + { + var builder = new ArrowBuffer.BitmapBuilder(_totalLength); + + foreach (ArrayData arrayData in _arrayDataList) + { + int length = arrayData.Length; + ReadOnlySpan<byte> span = arrayData.Buffers[bufferIndex].Span; + + for (int i = 0; i < length; i++) + { + builder.Append(span.IsEmpty || BitUtility.GetBit(span, i)); + } + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type) + { + int typeByteWidth = type.BitWidth / 8; + var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth); + + foreach (ArrayData arrayData in _arrayDataList) + { + int length = arrayData.Length; + int byteLength = length * typeByteWidth; + + builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength)); + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcatenateVariableBinaryValueBuffer() + { + var builder = new ArrowBuffer.Builder<byte>(); + + foreach (ArrayData arrayData in _arrayDataList) + { + int lastOffset = arrayData.Buffers[1].Span.CastTo<int>()[arrayData.Length]; + builder.Append(arrayData.Buffers[2].Span.Slice(0, lastOffset)); + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcatenateOffsetBuffer() + { + var builder = new ArrowBuffer.Builder<int>(_totalLength + 1); + int baseOffset = 0; + + builder.Append(0); + + foreach (ArrayData arrayData in _arrayDataList) + { + if (arrayData.Length == 0) + { + continue; + } + + // The first offset is always 0. + // It should be skipped because it duplicate to the last offset of builder. + ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(1, arrayData.Length); + + foreach (int offset in span) + { + builder.Append(baseOffset + offset); + } + + // The next offset must start from the current last offset. + baseOffset += span[arrayData.Length - 1]; + } + + return builder.Build(_allocator); + } + + private List<ArrayData> SelectChildren(int index) + { + var children = new List<ArrayData>(_arrayDataList.Count); + + foreach (ArrayData arrayData in _arrayDataList) + { + children.Add(arrayData.Children[index]); + } + + return children; + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs new file mode 100644 index 000000000..e7360942f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + static class ArrowArrayBuilderFactory + { + internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> Build(IArrowType dataType) + { + switch (dataType.TypeId) + { + case ArrowTypeId.Boolean: + return new BooleanArray.Builder(); + case ArrowTypeId.UInt8: + return new UInt8Array.Builder(); + case ArrowTypeId.Int8: + return new Int8Array.Builder(); + case ArrowTypeId.UInt16: + return new UInt16Array.Builder(); + case ArrowTypeId.Int16: + return new Int16Array.Builder(); + case ArrowTypeId.UInt32: + return new UInt32Array.Builder(); + case ArrowTypeId.Int32: + return new Int32Array.Builder(); + case ArrowTypeId.UInt64: + return new UInt64Array.Builder(); + case ArrowTypeId.Int64: + return new Int64Array.Builder(); + case ArrowTypeId.Float: + return new FloatArray.Builder(); + case ArrowTypeId.Double: + return new DoubleArray.Builder(); + case ArrowTypeId.String: + return new StringArray.Builder(); + case ArrowTypeId.Binary: + return new BinaryArray.Builder(); + case ArrowTypeId.Timestamp: + return new TimestampArray.Builder(); + case ArrowTypeId.Date64: + return new Date64Array.Builder(); + case ArrowTypeId.Date32: + return new Date32Array.Builder(); + case ArrowTypeId.List: + return new ListArray.Builder(dataType as ListType); + case ArrowTypeId.Decimal128: + return new Decimal128Array.Builder(dataType as Decimal128Type); + case ArrowTypeId.Decimal256: + return new Decimal256Array.Builder(dataType as Decimal256Type); + case ArrowTypeId.Struct: + case ArrowTypeId.Union: + case ArrowTypeId.Dictionary: + case ArrowTypeId.FixedSizedBinary: + case ArrowTypeId.HalfFloat: + case ArrowTypeId.Interval: + case ArrowTypeId.Map: + case ArrowTypeId.Time32: + case ArrowTypeId.Time64: + default: + throw new NotSupportedException($"An ArrowArrayBuilder cannot be built for type {dataType.TypeId}."); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayConcatenator.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayConcatenator.cs new file mode 100644 index 000000000..cc151210a --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayConcatenator.cs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + static class ArrowArrayConcatenator + { + internal static IArrowArray Concatenate(IReadOnlyList<IArrowArray> arrowArrayList , MemoryAllocator allocator = default) + { + if(arrowArrayList == null || arrowArrayList.Count == 0) + { + return null; + } + + if (arrowArrayList.Count == 1) + { + return arrowArrayList[0]; + } + + var arrayDataList = new List<ArrayData>(arrowArrayList.Count); + + foreach(IArrowArray array in arrowArrayList) + { + arrayDataList.Add(array.Data); + } + + return ArrowArrayFactory.BuildArray(ArrayDataConcatenator.Concatenate(arrayDataList, allocator)); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs new file mode 100644 index 000000000..2b74709e7 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + public static class ArrowArrayFactory + { + public static IArrowArray BuildArray(ArrayData data) + { + switch (data.DataType.TypeId) + { + case ArrowTypeId.Boolean: + return new BooleanArray(data); + case ArrowTypeId.UInt8: + return new UInt8Array(data); + case ArrowTypeId.Int8: + return new Int8Array(data); + case ArrowTypeId.UInt16: + return new UInt16Array(data); + case ArrowTypeId.Int16: + return new Int16Array(data); + case ArrowTypeId.UInt32: + return new UInt32Array(data); + case ArrowTypeId.Int32: + return new Int32Array(data); + case ArrowTypeId.UInt64: + return new UInt64Array(data); + case ArrowTypeId.Int64: + return new Int64Array(data); + case ArrowTypeId.Float: + return new FloatArray(data); + case ArrowTypeId.Double: + return new DoubleArray(data); + case ArrowTypeId.String: + return new StringArray(data); + case ArrowTypeId.FixedSizedBinary: + return new FixedSizeBinaryArray(data); + case ArrowTypeId.Binary: + return new BinaryArray(data); + case ArrowTypeId.Timestamp: + return new TimestampArray(data); + case ArrowTypeId.List: + return new ListArray(data); + case ArrowTypeId.Struct: + return new StructArray(data); + case ArrowTypeId.Union: + return new UnionArray(data); + case ArrowTypeId.Date64: + return new Date64Array(data); + case ArrowTypeId.Date32: + return new Date32Array(data); + case ArrowTypeId.Decimal128: + return new Decimal128Array(data); + case ArrowTypeId.Decimal256: + return new Decimal256Array(data); + case ArrowTypeId.Dictionary: + return new DictionaryArray(data); + case ArrowTypeId.HalfFloat: + case ArrowTypeId.Interval: + case ArrowTypeId.Map: + case ArrowTypeId.Time32: + case ArrowTypeId.Time64: + default: + throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}."); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayVisitor.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayVisitor.cs new file mode 100644 index 000000000..fc56b6601 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ArrowArrayVisitor.cs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow +{ + public abstract class ArrowArrayVisitor : IArrowArrayVisitor + { + public virtual void Visit(IArrowArray array) { } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs new file mode 100644 index 000000000..4fd8059f6 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -0,0 +1,358 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using Apache.Arrow.Memory; + +namespace Apache.Arrow +{ + public class BinaryArray : Array + { + public class Builder : BuilderBase<BinaryArray, Builder> + { + public Builder() : base(BinaryType.Default) { } + public Builder(IArrowType dataType) : base(dataType) { } + + protected override BinaryArray Build(ArrayData data) + { + return new BinaryArray(data); + } + } + + public BinaryArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Binary); + data.EnsureBufferCount(3); + } + + public BinaryArray(ArrowTypeId typeId, ArrayData data) + : base(data) + { + data.EnsureDataType(typeId); + data.EnsureBufferCount(3); + } + + public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder> + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder> + { + protected IArrowType DataType { get; } + protected TBuilder Instance => this as TBuilder; + protected ArrowBuffer.Builder<int> ValueOffsets { get; } + protected ArrowBuffer.Builder<byte> ValueBuffer { get; } + protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; } + protected int Offset { get; set; } + protected int NullCount => this.ValidityBuffer.UnsetBitCount; + + protected BuilderBase(IArrowType dataType) + { + DataType = dataType; + ValueOffsets = new ArrowBuffer.Builder<int>(); + ValueBuffer = new ArrowBuffer.Builder<byte>(); + ValidityBuffer = new ArrowBuffer.BitmapBuilder(); + + // From the docs: + // + // The offsets buffer contains length + 1 signed integers (either 32-bit or 64-bit, depending on the + // logical type), which encode the start position of each slot in the data buffer. The length of the + // value in each slot is computed using the difference between the offset at that slot’s index and the + // subsequent offset. + // + // In this builder, we choose to append the first offset (zero) upon construction, and each trailing + // offset is then added after each individual item has been appended. + ValueOffsets.Append(this.Offset); + } + + protected abstract TArray Build(ArrayData data); + + /// <summary> + /// Gets the length of the array built so far. + /// </summary> + public int Length => ValueOffsets.Length - 1; + + /// <summary> + /// Build an Arrow array from the appended contents so far. + /// </summary> + /// <param name="allocator">Optional memory allocator.</param> + /// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns> + public TArray Build(MemoryAllocator allocator = default) + { + var bufs = new[] + { + NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty, + ValueOffsets.Build(allocator), + ValueBuffer.Build(allocator), + }; + var data = new ArrayData( + DataType, + length: Length, + NullCount, + offset: 0, + bufs); + + return Build(data); + } + + /// <summary> + /// Append a single null value to the array. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendNull() + { + // Do not add to the value buffer in the case of a null. + // Note that we do not need to increment the offset as a result. + ValidityBuffer.Append(false); + ValueOffsets.Append(Offset); + return Instance; + } + + /// <summary> + /// Appends a value, consisting of a single byte, to the array. + /// </summary> + /// <param name="value">Byte value to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(byte value) + { + ValueBuffer.Append(value); + ValidityBuffer.Append(true); + Offset++; + ValueOffsets.Append(Offset); + return Instance; + } + + /// <summary> + /// Append a value, consisting of a span of bytes, to the array. + /// </summary> + /// <remarks> + /// Note that a single value is added, which consists of arbitrarily many bytes. If multiple values are + /// to be added, use the <see cref="AppendRange"/> method. + /// </remarks> + /// <param name="span">Span of bytes to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(ReadOnlySpan<byte> span) + { + ValueBuffer.Append(span); + ValidityBuffer.Append(true); + Offset += span.Length; + ValueOffsets.Append(Offset); + return Instance; + } + + /// <summary> + /// Append a value, consisting of an enumerable collection of bytes, to the array. + /// </summary> + /// <remarks> + /// Note that this method appends a single value, which may consist of arbitrarily many bytes. If multiple + /// values are to be added, use the <see cref="AppendRange(IEnumerable{byte})"/> method instead. + /// </remarks> + /// <param name="value">Enumerable collection of bytes to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(IEnumerable<byte> value) + { + if (value == null) + { + return AppendNull(); + } + + // Note: by looking at the length of the value buffer before and after, we avoid having to iterate + // through the enumerable multiple times to get both length and contents. + int priorLength = ValueBuffer.Length; + ValueBuffer.AppendRange(value); + int valueLength = ValueBuffer.Length - priorLength; + Offset += valueLength; + ValidityBuffer.Append(true); + ValueOffsets.Append(Offset); + return Instance; + } + + /// <summary> + /// Append an enumerable collection of single-byte values to the array. + /// </summary> + /// <remarks> + /// Note that this method appends multiple values, each of which is a single byte. If a single value is + /// to be added, use the <see cref="Append(IEnumerable{byte})"/> method instead. + /// </remarks> + /// <param name="values">Single-byte values to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendRange(IEnumerable<byte> values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (byte b in values) + { + Append(b); + } + + return Instance; + } + + /// <summary> + /// Append an enumerable collection of values to the array. + /// </summary> + /// <param name="values">Values to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendRange(IEnumerable<byte[]> values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (byte[] arr in values) + { + if (arr == null) + { + AppendNull(); + } + else + { + Append((ReadOnlySpan<byte>)arr); + } + } + + return Instance; + } + + public TBuilder Reserve(int capacity) + { + // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way. + ValueOffsets.Reserve(capacity + 1); + ValueBuffer.Reserve(capacity); + ValidityBuffer.Reserve(capacity + 1); + return Instance; + } + + public TBuilder Resize(int length) + { + // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`. + ValueOffsets.Resize(length + 1); + ValueBuffer.Resize(length); + ValidityBuffer.Resize(length + 1); + return Instance; + } + + public TBuilder Swap(int i, int j) + { + // TODO: Implement + throw new NotImplementedException(); + } + + public TBuilder Set(int index, byte value) + { + // TODO: Implement + throw new NotImplementedException(); + } + + /// <summary> + /// Clear all contents appended so far. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Clear() + { + ValueOffsets.Clear(); + ValueBuffer.Clear(); + ValidityBuffer.Clear(); + + // Always write the first offset before anything has been written. + Offset = 0; + ValueOffsets.Append(Offset); + return Instance; + } + } + + public BinaryArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ArrowBuffer ValueBuffer => Data.Buffers[2]; + + public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1); + + public ReadOnlySpan<byte> Values => ValueBuffer.Span.CastTo<byte>(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Obsolete("This method has been deprecated. Please use ValueOffsets[index] instead.")] + public int GetValueOffset(int index) + { + if (index < 0 || index > Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + return ValueOffsets[index]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + if (!IsValid(index)) + { + return 0; + } + + ReadOnlySpan<int> offsets = ValueOffsets; + return offsets[index + 1] - offsets[index]; + } + + /// <summary> + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// </summary> + /// <remarks> + /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte + /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>. + /// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values. + /// </remarks> + /// <param name="index">Index at which to get bytes.</param> + /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns> + /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array. + /// </exception> + public ReadOnlySpan<byte> GetBytes(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span + // is actually returned as an empty span. + return ReadOnlySpan<byte>.Empty; + } + + return ValueBuffer.Span.Slice(ValueOffsets[index], GetValueLength(index)); + } + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs new file mode 100644 index 000000000..0915338fe --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + public class BooleanArray: Array + { + public class Builder : IArrowArrayBuilder<bool, BooleanArray, Builder> + { + private ArrowBuffer.BitmapBuilder ValueBuffer { get; } + private ArrowBuffer.BitmapBuilder ValidityBuffer { get; } + + public int Length => ValueBuffer.Length; + public int Capacity => ValueBuffer.Capacity; + public int NullCount => ValidityBuffer.UnsetBitCount; + + public Builder() + { + ValueBuffer = new ArrowBuffer.BitmapBuilder(); + ValidityBuffer = new ArrowBuffer.BitmapBuilder(); + } + + public Builder Append(bool value) + { + return NullableAppend(value); + } + + public Builder NullableAppend(bool? value) + { + // Note that we rely on the fact that null values are false in the value buffer. + ValueBuffer.Append(value ?? false); + ValidityBuffer.Append(value.HasValue); + return this; + } + + public Builder Append(ReadOnlySpan<bool> span) + { + foreach (bool value in span) + { + Append(value); + } + return this; + } + + public Builder AppendRange(IEnumerable<bool> values) + { + foreach (bool value in values) + { + Append(value); + } + return this; + } + + public Builder AppendNull() + { + return NullableAppend(null); + } + + public BooleanArray Build(MemoryAllocator allocator = default) + { + ArrowBuffer validityBuffer = NullCount > 0 + ? ValidityBuffer.Build(allocator) + : ArrowBuffer.Empty; + + return new BooleanArray( + ValueBuffer.Build(allocator), validityBuffer, + Length, NullCount, 0); + } + + public Builder Clear() + { + ValueBuffer.Clear(); + ValidityBuffer.Clear(); + return this; + } + + public Builder Reserve(int capacity) + { + if (capacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(capacity)); + } + + ValueBuffer.Reserve(capacity); + ValidityBuffer.Reserve(capacity); + return this; + } + + public Builder Resize(int length) + { + if (length < 0) + { + throw new ArgumentOutOfRangeException(nameof(length)); + } + + ValueBuffer.Resize(length); + ValidityBuffer.Resize(length); + return this; + } + + public Builder Toggle(int index) + { + CheckIndex(index); + + // If there is a null at this index, assume it was set to false in the value buffer, and so becomes + // true/non-null after toggling. + ValueBuffer.Toggle(index); + ValidityBuffer.Set(index); + return this; + } + + public Builder Set(int index) + { + CheckIndex(index); + ValueBuffer.Set(index); + ValidityBuffer.Set(index); + return this; + } + + public Builder Set(int index, bool value) + { + CheckIndex(index); + ValueBuffer.Set(index, value); + ValidityBuffer.Set(index); + return this; + } + + public Builder Swap(int i, int j) + { + CheckIndex(i); + CheckIndex(j); + ValueBuffer.Swap(i, j); + ValidityBuffer.Swap(i, j); + return this; + } + + private void CheckIndex(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } + + public ArrowBuffer ValueBuffer => Data.Buffers[1]; + public ReadOnlySpan<byte> Values => ValueBuffer.Span.Slice(0, (int) Math.Ceiling(Length / 8.0)); + + public BooleanArray( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(BooleanType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public BooleanArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Boolean); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + [Obsolete("GetBoolean does not support null values. Use GetValue instead (which this method invokes internally).")] + public bool GetBoolean(int index) + { + return GetValue(index).GetValueOrDefault(); + } + + public bool? GetValue(int index) + { + return IsNull(index) + ? (bool?)null + : BitUtility.GetBit(ValueBuffer.Span, index + Offset); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Date32Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Date32Array.cs new file mode 100644 index 000000000..35c0065e1 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Date32Array.cs @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + /// <summary> + /// The <see cref="Date32Array"/> class holds an array of dates in the <c>Date32</c> format, where each date is + /// stored as the number of days since the dawn of (UNIX) time. + /// </summary> + public class Date32Array : PrimitiveArray<int> + { + private static readonly DateTime _epochDate = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Unspecified); + + /// <summary> + /// The <see cref="Builder"/> class can be used to fluently build <see cref="Date32Array"/> objects. + /// </summary> + public class Builder : DateArrayBuilder<int, Date32Array, Builder> + { + private class DateBuilder : PrimitiveArrayBuilder<int, Date32Array, DateBuilder> + { + protected override Date32Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Date32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + /// <summary> + /// Construct a new instance of the <see cref="Builder"/> class. + /// </summary> + public Builder() : base(new DateBuilder()) { } + + protected override int Convert(DateTime dateTime) + { + return (int)(dateTime.Date - _epochDate).TotalDays; + } + + protected override int Convert(DateTimeOffset dateTimeOffset) + { + // The internal value stored for a DateTimeOffset can be thought of as the number of 24-hour "blocks" + // of time that have elapsed since the UNIX epoch. This is the same as converting it to UTC first and + // then taking the date element from that. It is not the same as what would result from looking at the + // DateTimeOffset.Date property. + return (int)(dateTimeOffset.UtcDateTime.Date - _epochDate).TotalDays; + } + } + + public Date32Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Date32Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public Date32Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Date32); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + [Obsolete("Use `GetDateTimeOffset()` instead")] + public DateTimeOffset? GetDate(int index) => GetDateTimeOffset(index); + + /// <summary> + /// Get the date at the specified index in the form of a <see cref="DateTime"/> object. + /// </summary> + /// <remarks> + /// The <see cref="DateTime.Kind"/> property of the returned object is set to + /// <see cref="DateTimeKind.Unspecified"/>. + /// </remarks> + /// <param name="index">Index at which to get the date.</param> + /// <returns>Returns a <see cref="DateTime"/> object, or <c>null</c> if there is no object at that index. + /// </returns> + public DateTime? GetDateTime(int index) + { + int? value = GetValue(index); + return value.HasValue + ? _epochDate.AddDays(value.Value) + : default(DateTime?); + } + + /// <summary> + /// Get the date at the specified index in the form of a <see cref="DateTimeOffset"/> object. + /// </summary> + /// <param name="index">Index at which to get the date.</param> + /// <returns>Returns a <see cref="DateTimeOffset"/> object, or <c>null</c> if there is no object at that index. + /// </returns> + public DateTimeOffset? GetDateTimeOffset(int index) + { + int? value = GetValue(index); + return value.HasValue + ? new DateTimeOffset(_epochDate.AddDays(value.Value), TimeSpan.Zero) + : default(DateTimeOffset?); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Date64Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Date64Array.cs new file mode 100644 index 000000000..cf977b2e4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Date64Array.cs @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + /// <summary> + /// The <see cref="Date64Array"/> class holds an array of dates in the <c>Date64</c> format, where each date is + /// stored as the number of milliseconds since the dawn of (UNIX) time, excluding leap seconds, in multiples of + /// 86400000. + /// </summary> + public class Date64Array: PrimitiveArray<long> + { + private const long MillisecondsPerDay = 86400000; + + public Date64Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Date64Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + /// <summary> + /// The <see cref="Builder"/> class can be used to fluently build <see cref="Date64Array"/> objects. + /// </summary> + public class Builder : DateArrayBuilder<long, Date64Array, Builder> + { + private class DateBuilder: PrimitiveArrayBuilder<long, Date64Array, DateBuilder> + { + protected override Date64Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Date64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + /// <summary> + /// Construct a new instance of the <see cref="Builder"/> class. + /// </summary> + public Builder() : base(new DateBuilder()) { } + + protected override long Convert(DateTime dateTime) + { + var dateTimeOffset = new DateTimeOffset( + DateTime.SpecifyKind(dateTime.Date, DateTimeKind.Unspecified), + TimeSpan.Zero); + return dateTimeOffset.ToUnixTimeMilliseconds(); + } + + protected override long Convert(DateTimeOffset dateTimeOffset) + { + // The internal value stored for a DateTimeOffset can be thought of as the number of milliseconds, + // in multiples of 86400000, that have passed since the UNIX epoch. It is not the same as what would + // result from encoding the date from the DateTimeOffset.Date property. + long millis = dateTimeOffset.ToUnixTimeMilliseconds(); + long days = millis / MillisecondsPerDay; + return (millis < 0 ? days - 1 : days) * MillisecondsPerDay; + } + } + + public Date64Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Date64); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + [Obsolete("Use `GetDateTimeOffset()` instead")] + public DateTimeOffset? GetDate(int index) => GetDateTimeOffset(index); + + /// <summary> + /// Get the date at the specified index in the form of a <see cref="DateTime"/> object. + /// </summary> + /// <remarks> + /// The <see cref="DateTime.Kind"/> property of the returned object is set to + /// <see cref="DateTimeKind.Unspecified"/>. + /// </remarks> + /// <param name="index">Index at which to get the date.</param> + /// <returns>Returns a <see cref="DateTime"/> object, or <c>null</c> if there is no object at that index. + /// </returns> + public DateTime? GetDateTime(int index) + { + long? value = GetValue(index); + return value.HasValue + ? DateTimeOffset.FromUnixTimeMilliseconds(value.Value).Date + : default(DateTime?); + } + + /// <summary> + /// Get the date at the specified index in the form of a <see cref="DateTimeOffset"/> object. + /// </summary> + /// <param name="index">Index at which to get the date.</param> + /// <returns>Returns a <see cref="DateTimeOffset"/> object, or <c>null</c> if there is no object at that index. + /// </returns> + public DateTimeOffset? GetDateTimeOffset(int index) + { + long? value = GetValue(index); + return value.HasValue + ? DateTimeOffset.FromUnixTimeMilliseconds(value.Value) + : default(DateTimeOffset?); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/DateArrayBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/DateArrayBuilder.cs new file mode 100644 index 000000000..4e69f6fe3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/DateArrayBuilder.cs @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow +{ + /// <summary> + /// The <see cref="DateArrayBuilder{TUnderlying,TArray,TBuilder}"/> class is an abstract array builder that can + /// accept dates in the form of <see cref="DateTime"/> or <see cref="DateTimeOffset"/> and convert to some + /// underlying date representation. + /// </summary> + public abstract class DateArrayBuilder<TUnderlying, TArray, TBuilder> : + DelegatingArrayBuilder<TUnderlying, TArray, TBuilder>, + IArrowArrayBuilder<DateTime, TArray, TBuilder>, + IArrowArrayBuilder<DateTimeOffset, TArray, TBuilder> + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<TArray> + { + /// <summary> + /// Construct a new instance of the <see cref="DateArrayBuilder{TUnderlying,TArray,TBuilder}"/> class. + /// </summary> + /// <param name="innerBuilder">Inner builder that will produce arrays of type <typeparamref name="TArray"/>. + /// </param> + protected DateArrayBuilder(IArrowArrayBuilder<TUnderlying, TArray, IArrowArrayBuilder<TArray>> innerBuilder) + : base(innerBuilder) + { } + + /// <summary> + /// Append a date in the form of a <see cref="DateTime"/> object to the array. + /// </summary> + /// <remarks> + /// The value of <see cref="DateTime.Kind"/> on the input does not have any effect on the behaviour of this + /// method. + /// </remarks> + /// <param name="value">Date to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(DateTime value) + { + InnerBuilder.Append(Convert(value)); + return this as TBuilder; + } + + /// <summary> + /// Append a date from a <see cref="DateTimeOffset"/> object to the array. + /// </summary> + /// <remarks> + /// Note that to convert the supplied <paramref name="value"/> parameter to a date, it is first converted to + /// UTC and the date then taken from the UTC date/time. Depending on the value of its + /// <see cref="DateTimeOffset.Offset"/> property, this may not necessarily be the same as the date obtained by + /// calling its <see cref="DateTimeOffset.Date"/> property. + /// </remarks> + /// <param name="value">Date to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(DateTimeOffset value) + { + InnerBuilder.Append(Convert(value)); + return this as TBuilder; + } + + /// <summary> + /// Append a span of dates in the form of <see cref="DateTime"/> objects to the array. + /// </summary> + /// <remarks> + /// The value of <see cref="DateTime.Kind"/> on any of the inputs does not have any effect on the behaviour of + /// this method. + /// </remarks> + /// <param name="span">Span of dates to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(ReadOnlySpan<DateTime> span) + { + InnerBuilder.Reserve(span.Length); + foreach (var item in span) + { + InnerBuilder.Append(Convert(item)); + } + + return this as TBuilder; + } + + /// <summary> + /// Append a span of dates in the form of <see cref="DateTimeOffset"/> objects to the array. + /// </summary> + /// <remarks> + /// Note that to convert the <see cref="DateTimeOffset"/> objects in the <paramref name="span"/> parameter to + /// dates, they are first converted to UTC and the date then taken from the UTC date/times. Depending on the + /// value of each <see cref="DateTimeOffset.Offset"/> property, this may not necessarily be the same as the + /// date obtained by calling the <see cref="DateTimeOffset.Date"/> property. + /// </remarks> + /// <param name="span">Span of dates to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Append(ReadOnlySpan<DateTimeOffset> span) + { + InnerBuilder.Reserve(span.Length); + foreach (var item in span) + { + InnerBuilder.Append(Convert(item)); + } + + return this as TBuilder; + } + + /// <summary> + /// Append a null date to the array. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendNull() + { + InnerBuilder.AppendNull(); + return this as TBuilder; + } + + /// <summary> + /// Append a collection of dates in the form of <see cref="DateTime"/> objects to the array. + /// </summary> + /// <remarks> + /// The value of <see cref="DateTime.Kind"/> on any of the inputs does not have any effect on the behaviour of + /// this method. + /// </remarks> + /// <param name="values">Collection of dates to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendRange(IEnumerable<DateTime> values) + { + InnerBuilder.AppendRange(values.Select(Convert)); + return this as TBuilder; + } + + /// <summary> + /// Append a collection of dates in the form of <see cref="DateTimeOffset"/> objects to the array. + /// </summary> + /// <remarks> + /// Note that to convert the <see cref="DateTimeOffset"/> objects in the <paramref name="values"/> parameter to + /// dates, they are first converted to UTC and the date then taken from the UTC date/times. Depending on the + /// value of each <see cref="DateTimeOffset.Offset"/> property, this may not necessarily be the same as the + /// date obtained by calling the <see cref="DateTimeOffset.Date"/> property. + /// </remarks> + /// <param name="values">Collection of dates to add.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder AppendRange(IEnumerable<DateTimeOffset> values) + { + InnerBuilder.AppendRange(values.Select(Convert)); + return this as TBuilder; + } + + /// <summary> + /// Set the value of a date in the form of a <see cref="DateTime"/> object at the specified index. + /// </summary> + /// <remarks> + /// The value of <see cref="DateTime.Kind"/> on the input does not have any effect on the behaviour of this + /// method. + /// </remarks> + /// <param name="index">Index at which to set value.</param> + /// <param name="value">Date to set.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Set(int index, DateTime value) + { + InnerBuilder.Set(index, Convert(value)); + return this as TBuilder; + } + + /// <summary> + /// Set the value of a date in the form of a <see cref="DateTimeOffset"/> object at the specified index. + /// </summary> + /// <remarks> + /// Note that to convert the supplied <paramref name="value"/> parameter to a date, it is first converted to + /// UTC and the date then taken from the UTC date/time. Depending on the value of its + /// <see cref="DateTimeOffset.Offset"/> property, this may not necessarily be the same as the date obtained by + /// calling its <see cref="DateTimeOffset.Date"/> property. + /// </remarks> + /// <param name="index">Index at which to set value.</param> + /// <param name="value">Date to set.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Set(int index, DateTimeOffset value) + { + InnerBuilder.Set(index, Convert(value)); + return this as TBuilder; + } + + /// <summary> + /// Swap the values of the dates at the specified indices. + /// </summary> + /// <param name="i">First index.</param> + /// <param name="j">Second index.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Swap(int i, int j) + { + InnerBuilder.Swap(i, j); + return this as TBuilder; + } + + protected abstract TUnderlying Convert(DateTime dateTime); + + protected abstract TUnderlying Convert(DateTimeOffset dateTimeOffset); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs new file mode 100644 index 000000000..128e9e5f0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Decimal128Array : FixedSizeBinaryArray + { + public class Builder : BuilderBase<Decimal128Array, Builder> + { + public Builder(Decimal128Type type) : base(type, 16) + { + DataType = type; + } + + protected new Decimal128Type DataType { get; } + + protected override Decimal128Array Build(ArrayData data) + { + return new Decimal128Array(data); + } + + public Builder Append(decimal value) + { + Span<byte> bytes = stackalloc byte[DataType.ByteWidth]; + DecimalUtility.GetBytes(value, DataType.Precision, DataType.Scale, DataType.ByteWidth, bytes); + + return Append(bytes); + } + + public Builder AppendRange(IEnumerable<decimal> values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (decimal d in values) + { + Append(d); + } + + return Instance; + } + + public Builder Set(int index, decimal value) + { + Span<byte> bytes = stackalloc byte[DataType.ByteWidth]; + DecimalUtility.GetBytes(value, DataType.Precision, DataType.Scale, DataType.ByteWidth, bytes); + + return Set(index, bytes); + } + } + + public Decimal128Array(ArrayData data) + : base(ArrowTypeId.Decimal128, data) + { + data.EnsureDataType(ArrowTypeId.Decimal128); + data.EnsureBufferCount(2); + Debug.Assert(Data.DataType is Decimal128Type); + } + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public int Scale => ((Decimal128Type)Data.DataType).Scale; + public int Precision => ((Decimal128Type)Data.DataType).Precision; + public int ByteWidth => ((Decimal128Type)Data.DataType).ByteWidth; + + public decimal? GetValue(int index) + { + if (IsNull(index)) + { + return null; + } + return DecimalUtility.GetDecimal(ValueBuffer, index, Scale, ByteWidth); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs new file mode 100644 index 000000000..fb4cd6be3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Decimal256Array : FixedSizeBinaryArray + { + public class Builder : BuilderBase<Decimal256Array, Builder> + { + public Builder(Decimal256Type type) : base(type, 32) + { + DataType = type; + } + + protected new Decimal256Type DataType { get; } + + protected override Decimal256Array Build(ArrayData data) + { + return new Decimal256Array(data); + } + + public Builder Append(decimal value) + { + Span<byte> bytes = stackalloc byte[DataType.ByteWidth]; + DecimalUtility.GetBytes(value, DataType.Precision, DataType.Scale, DataType.ByteWidth, bytes); + + return Append(bytes); + } + + public Builder AppendRange(IEnumerable<decimal> values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (decimal d in values) + { + Append(d); + } + + return Instance; + } + + public Builder Set(int index, decimal value) + { + Span<byte> bytes = stackalloc byte[DataType.ByteWidth]; + DecimalUtility.GetBytes(value, DataType.Precision, DataType.Scale, DataType.ByteWidth, bytes); + + return Set(index, bytes); + } + } + + public Decimal256Array(ArrayData data) + : base(ArrowTypeId.Decimal256, data) + { + data.EnsureDataType(ArrowTypeId.Decimal256); + data.EnsureBufferCount(2); + Debug.Assert(Data.DataType is Decimal256Type); + } + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public int Scale => ((Decimal256Type)Data.DataType).Scale; + public int Precision => ((Decimal256Type)Data.DataType).Precision; + public int ByteWidth => ((Decimal256Type)Data.DataType).ByteWidth; + + public decimal? GetValue(int index) + { + if (IsNull(index)) + { + return null; + } + + return DecimalUtility.GetDecimal(ValueBuffer, index, Scale, ByteWidth); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/DelegatingArrayBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/DelegatingArrayBuilder.cs new file mode 100644 index 000000000..f2ab3ee13 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/DelegatingArrayBuilder.cs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Memory; + +namespace Apache.Arrow +{ + /// <summary> + /// The <see cref="DelegatingArrayBuilder{T,TArray,TBuilder}"/> class can be used as the base for any array builder + /// that needs to delegate most of its functionality to an inner array builder. + /// </summary> + /// <remarks> + /// The typical use case is when an array builder may accept a number of different types as input, but which are + /// all internally converted to a single type for assembly into an array. + /// </remarks> + /// <typeparam name="T">Type of item accepted by inner array builder.</typeparam> + /// <typeparam name="TArray">Type of array produced by this (and the inner) builder.</typeparam> + /// <typeparam name="TBuilder">Type of builder (see Curiously-Recurring Template Pattern).</typeparam> + public abstract class DelegatingArrayBuilder<T, TArray, TBuilder> : IArrowArrayBuilder<TArray, TBuilder> + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<TArray> + { + /// <summary> + /// Gets the inner array builder. + /// </summary> + protected IArrowArrayBuilder<T, TArray, IArrowArrayBuilder<TArray>> InnerBuilder { get; } + + /// <summary> + /// Gets the number of items added to the array so far. + /// </summary> + public int Length => InnerBuilder.Length; + + /// <summary> + /// Construct a new instance of the <see cref="DelegatingArrayBuilder{T,TArray,TBuilder}"/> class. + /// </summary> + /// <param name="innerBuilder">Inner array builder.</param> + protected DelegatingArrayBuilder(IArrowArrayBuilder<T, TArray, IArrowArrayBuilder<TArray>> innerBuilder) + { + InnerBuilder = innerBuilder ?? throw new ArgumentNullException(nameof(innerBuilder)); + } + + /// <summary> + /// Build an Arrow Array from the appended contents so far. + /// </summary> + /// <param name="allocator">Optional memory allocator.</param> + /// <returns>Returns the built array.</returns> + public TArray Build(MemoryAllocator allocator = default) => InnerBuilder.Build(allocator); + + /// <summary> + /// Reserve a given number of items' additional capacity. + /// </summary> + /// <param name="additionalCapacity">Number of items of required additional capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Reserve(int additionalCapacity) + { + InnerBuilder.Reserve(additionalCapacity); + return this as TBuilder; + } + + /// <summary> + /// Resize the array to a given size. + /// </summary> + /// <remarks> + /// Note that if the required capacity is larger than the current length of the populated array so far, + /// the array's contents in the new, expanded region are undefined. + /// </remarks> + /// <remarks> + /// Note that if the required capacity is smaller than the current length of the populated array so far, + /// the array will be truncated and items at the end of the array will be lost. + /// </remarks> + /// <param name="capacity">Number of items of required capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Resize(int capacity) + { + InnerBuilder.Resize(capacity); + return this as TBuilder; + } + + /// <summary> + /// Clear all contents appended so far. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public TBuilder Clear() + { + InnerBuilder.Clear(); + return this as TBuilder; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/DictionaryArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/DictionaryArray.cs new file mode 100644 index 000000000..29c0f5c84 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/DictionaryArray.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class DictionaryArray : Array + { + public IArrowArray Dictionary { get; } + public IArrowArray Indices { get; } + public ArrowBuffer IndicesBuffer => Data.Buffers[1]; + + public DictionaryArray(ArrayData data) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(ArrowTypeId.Dictionary); + + if (data.Dictionary == null) + { + throw new ArgumentException($"{nameof(data.Dictionary)} must not be null"); + } + + var dicType = (DictionaryType)data.DataType; + data.Dictionary.EnsureDataType(dicType.ValueType.TypeId); + + var indicesData = new ArrayData(dicType.IndexType, data.Length, data.NullCount, data.Offset, data.Buffers, data.Children); + + Indices = ArrowArrayFactory.BuildArray(indicesData); + Dictionary = ArrowArrayFactory.BuildArray(data.Dictionary); + } + + public DictionaryArray(DictionaryType dataType, IArrowArray indicesArray, IArrowArray dictionary) : + base(new ArrayData(dataType, indicesArray.Length, indicesArray.Data.NullCount, indicesArray.Data.Offset, indicesArray.Data.Buffers, indicesArray.Data.Children, dictionary.Data)) + { + Data.EnsureBufferCount(2); + + indicesArray.Data.EnsureDataType(dataType.IndexType.TypeId); + dictionary.Data.EnsureDataType(dataType.ValueType.TypeId); + + Indices = indicesArray; + Dictionary = dictionary; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/DoubleArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/DoubleArray.cs new file mode 100644 index 000000000..6450aa140 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/DoubleArray.cs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class DoubleArray : PrimitiveArray<double> + { + public class Builder : PrimitiveArrayBuilder<double, DoubleArray, Builder> + { + protected override DoubleArray Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new DoubleArray(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public DoubleArray( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(DoubleType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public DoubleArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Double); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs new file mode 100644 index 000000000..7d3d87547 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays +{ + public class FixedSizeBinaryArray : Array + { + public FixedSizeBinaryArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.FixedSizedBinary); + data.EnsureBufferCount(2); + } + + public FixedSizeBinaryArray(ArrowTypeId typeId, ArrayData data) + : base(data) + { + data.EnsureDataType(typeId); + data.EnsureBufferCount(2); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public ArrowBuffer ValueBuffer => Data.Buffers[1]; + + /// <summary> + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// </summary> + /// <remarks> + /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte + /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>. + /// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values. + /// </remarks> + /// <param name="index">Index at which to get bytes.</param> + /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns> + /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array. + /// </exception> + public ReadOnlySpan<byte> GetBytes(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span + // is actually returned as an empty span. + return ReadOnlySpan<byte>.Empty; + } + + int size = ((FixedSizeBinaryType)Data.DataType).ByteWidth; + return ValueBuffer.Span.Slice(index * size, size); + } + + public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte[], TArray, TBuilder> + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<byte[], TArray, TBuilder> + { + protected IArrowType DataType { get; } + protected TBuilder Instance => this as TBuilder; + protected int ByteWidth { get; } + protected ArrowBuffer.Builder<byte> ValueBuffer { get; } + protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; } + public int Length => ValueBuffer.Length / ByteWidth; + protected int NullCount => this.ValidityBuffer.UnsetBitCount; + protected abstract TArray Build(ArrayData data); + + protected BuilderBase(IArrowType dataType, int byteWidth) + { + DataType = dataType; + ByteWidth = byteWidth; + ValueBuffer = new ArrowBuffer.Builder<byte>(); + ValidityBuffer = new ArrowBuffer.BitmapBuilder(); + } + + public TArray Build(MemoryAllocator allocator = default) + { + var bufs = new[] + { + NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty, + ValueBuffer.Build(ByteWidth, allocator), + }; + var data = new ArrayData( + DataType, + Length, + NullCount, + 0, + bufs); + + return Build(data); + } + + public TBuilder Reserve(int capacity) + { + ValueBuffer.Reserve(capacity * ByteWidth); + ValidityBuffer.Reserve(capacity + 1); + return Instance; + } + + public TBuilder Resize(int length) + { + ValueBuffer.Resize(length * ByteWidth); + ValidityBuffer.Resize(length + 1); + return Instance; + } + + public TBuilder Clear() { + + ValueBuffer.Clear(); + ValidityBuffer.Clear(); + + return Instance; + } + + public TBuilder Append(byte[] value) + { + if(value.Length % ByteWidth != 0) + throw new ArgumentOutOfRangeException("Bytes of length: " + value.Length + " do not conform to the fixed size: " + ByteWidth); + return Append(value.AsSpan()); + } + public TBuilder Append(ReadOnlySpan<byte[]> span) + { + foreach (var b in span) + { + Append(b); + } + + return Instance; + } + + public TBuilder AppendRange(IEnumerable<byte[]> values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (byte[] b in values) + { + Append(b); + } + + return Instance; + } + + public TBuilder Append(ReadOnlySpan<byte> span) + { + ValueBuffer.Append(span); + ValidityBuffer.Append(true); + return Instance; + } + + public TBuilder AppendNull() + { + ValueBuffer.Append(new byte[ByteWidth]); + ValidityBuffer.Append(false); + return Instance; + } + + public TBuilder Swap(int i, int j) + { + int iStart = i * ByteWidth; + int jStart = j * ByteWidth; + byte[] iBytes = ValueBuffer.Span.Slice(iStart, ByteWidth).ToArray(); + Span<byte> jBytes = ValueBuffer.Span.Slice(jStart, ByteWidth); + + for (int m = 0; m < ByteWidth; m++) + { + ValueBuffer.Span[iStart + m] = jBytes[m]; + ValueBuffer.Span[jStart + m] = iBytes[m]; + } + + ValidityBuffer.Swap(i, j); + return Instance; + } + + public TBuilder Set(int index, byte[] value) + { + return Set(index, value.AsSpan()); + } + + public TBuilder Set(int index, ReadOnlySpan<byte> value) + { + int startIndex = index * ByteWidth; + for (int i = 0; i < ByteWidth; i++) + { + ValueBuffer.Span[startIndex + i] = value[i]; + } + + ValidityBuffer.Set(index, true); + return Instance; + } + + public TBuilder SetNull(int index) + { + int startIndex = index * ByteWidth; + for (int i = 0; i < ByteWidth; i++) + { + ValueBuffer.Span[startIndex + i] = 0; + } + + ValidityBuffer.Set(index, false); + return Instance; + } + + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/FloatArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/FloatArray.cs new file mode 100644 index 000000000..8feca3233 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/FloatArray.cs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class FloatArray : PrimitiveArray<float> + { + public class Builder : PrimitiveArrayBuilder<float, FloatArray, Builder> + { + protected override FloatArray Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new FloatArray(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public FloatArray( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(FloatType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public FloatArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Float); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Int16Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int16Array.cs new file mode 100644 index 000000000..0401865c9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int16Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Int16Array : PrimitiveArray<short> + { + public class Builder : PrimitiveArrayBuilder<short, Int16Array, Builder> + { + protected override Int16Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Int16Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public Int16Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Int16Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public Int16Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Int16); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Int32Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int32Array.cs new file mode 100644 index 000000000..ef356c7a6 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int32Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Int32Array : PrimitiveArray<int> + { + public class Builder : PrimitiveArrayBuilder<int, Int32Array, Builder> + { + protected override Int32Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Int32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public Int32Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Int32Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public Int32Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Int32); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Int64Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int64Array.cs new file mode 100644 index 000000000..fe8fbc62a --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int64Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Int64Array : PrimitiveArray<long> + { + public class Builder : PrimitiveArrayBuilder<long, Int64Array, Builder> + { + protected override Int64Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Int64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public Int64Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Int64Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public Int64Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Int64); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/Int8Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int8Array.cs new file mode 100644 index 000000000..58d543a10 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/Int8Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class Int8Array : PrimitiveArray<sbyte> + { + public class Builder : PrimitiveArrayBuilder<sbyte, Int8Array, Builder> + { + protected override Int8Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Int8Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public Int8Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(Int8Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public Int8Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Int8); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/ListArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/ListArray.cs new file mode 100644 index 000000000..97673cb48 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/ListArray.cs @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class ListArray : Array + { + public class Builder : IArrowArrayBuilder<ListArray, Builder> + { + public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; } + + public int Length => ValueOffsetsBufferBuilder.Length; + + private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; } + + private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; } + + public int NullCount { get; protected set; } + + private IArrowType DataType { get; } + + public Builder(IArrowType valueDataType) : this(new ListType(valueDataType)) + { + } + + public Builder(Field valueField) : this(new ListType(valueField)) + { + } + + internal Builder(ListType dataType) + { + ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType); + ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>(); + ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder(); + DataType = dataType; + } + + /// <summary> + /// Start a new variable-length list slot + /// + /// This function should be called before beginning to append elements to the + /// value builder + /// </summary> + /// <returns></returns> + public Builder Append() + { + ValueOffsetsBufferBuilder.Append(ValueBuilder.Length); + ValidityBufferBuilder.Append(true); + + return this; + } + + public Builder AppendNull() + { + ValueOffsetsBufferBuilder.Append(ValueBuilder.Length); + ValidityBufferBuilder.Append(false); + NullCount++; + + return this; + } + + public ListArray Build(MemoryAllocator allocator = default) + { + ValueOffsetsBufferBuilder.Append(ValueBuilder.Length); + + ArrowBuffer validityBuffer = NullCount > 0 + ? ValidityBufferBuilder.Build(allocator) + : ArrowBuffer.Empty; + + return new ListArray(DataType, Length - 1, + ValueOffsetsBufferBuilder.Build(allocator), ValueBuilder.Build(allocator), + validityBuffer, NullCount, 0); + } + + public Builder Reserve(int capacity) + { + ValueOffsetsBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity + 1); + return this; + } + + public Builder Resize(int length) + { + ValueOffsetsBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length + 1); + return this; + } + + public Builder Clear() + { + ValueOffsetsBufferBuilder.Clear(); + ValueBuilder.Clear(); + ValidityBufferBuilder.Clear(); + return this; + } + + } + + public IArrowArray Values { get; } + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1); + + public ListArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, IArrowArray values, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }), + values) + { + } + + public ListArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0])) + { + } + + private ListArray(ArrayData data, IArrowArray values) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(ArrowTypeId.List); + Values = values; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + + [Obsolete("This method has been deprecated. Please use ValueOffsets[index] instead.")] + public int GetValueOffset(int index) + { + if (index < 0 || index > Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + return ValueOffsets[index]; + } + + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return 0; + } + + ReadOnlySpan<int> offsets = ValueOffsets; + return offsets[index + 1] - offsets[index]; + } + + public IArrowArray GetSlicedValues(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return null; + } + + if (!(Values is Array array)) + { + return default; + } + + return array.Slice(ValueOffsets[index], GetValueLength(index)); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + Values?.Dispose(); + } + base.Dispose(disposing); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs new file mode 100644 index 000000000..7365a77b6 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Apache.Arrow +{ + public abstract class PrimitiveArray<T> : Array + where T : struct + { + protected PrimitiveArray(ArrayData data) + : base(data) + { + data.EnsureBufferCount(2); + } + + public ArrowBuffer ValueBuffer => Data.Buffers[1]; + + public ReadOnlySpan<T> Values => ValueBuffer.Span.CastTo<T>().Slice(Offset, Length); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T? GetValue(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + return IsValid(index) ? Values[index] : (T?)null; + } + + public IList<T?> ToList(bool includeNulls = false) + { + ReadOnlySpan<T> span = Values; + var list = new List<T?>(span.Length); + + for (int i = 0; i < span.Length; i++) + { + T? value = GetValue(i); + + if (value.HasValue) + { + list.Add(value.Value); + } + else + { + if (includeNulls) + { + list.Add(null); + } + } + } + + return list; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs new file mode 100644 index 000000000..326f04558 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs @@ -0,0 +1,201 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow +{ + public abstract class PrimitiveArrayBuilder<TFrom, TTo, TArray, TBuilder> : IArrowArrayBuilder<TArray, TBuilder> + where TTo : struct + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<TArray> + { + protected TBuilder Instance => this as TBuilder; + protected IArrowArrayBuilder<TTo, TArray, IArrowArrayBuilder<TArray>> ArrayBuilder { get; } + + public int Length => ArrayBuilder.Length; + + internal PrimitiveArrayBuilder(IArrowArrayBuilder<TTo, TArray, IArrowArrayBuilder<TArray>> builder) + { + ArrayBuilder = builder ?? throw new ArgumentNullException(nameof(builder)); + } + + public TArray Build(MemoryAllocator allocator = default) => ArrayBuilder.Build(allocator); + + public TBuilder Append(TFrom value) + { + ArrayBuilder.Append(ConvertTo(value)); + return Instance; + } + + public TBuilder Append(ReadOnlySpan<TFrom> span) + { + ArrayBuilder.Reserve(span.Length); + foreach (TFrom value in span) + { + Append(value); + } + return Instance; + } + + public TBuilder AppendRange(IEnumerable<TFrom> values) + { + ArrayBuilder.AppendRange(values.Select(ConvertTo)); + return Instance; + } + + public TBuilder AppendNull() + { + ArrayBuilder.AppendNull(); + return Instance; + } + + public TBuilder Reserve(int capacity) + { + ArrayBuilder.Reserve(capacity); + return Instance; + } + + public TBuilder Resize(int length) + { + ArrayBuilder.Resize(length); + return Instance; + } + + public TBuilder Swap(int i, int j) + { + ArrayBuilder.Swap(i, j); + return Instance; + } + + public TBuilder Set(int index, TFrom value) + { + ArrayBuilder.Set(index, ConvertTo(value)); + return Instance; + } + + public TBuilder Clear() + { + ArrayBuilder.Clear(); + return Instance; + } + + protected abstract TTo ConvertTo(TFrom value); + } + + public abstract class PrimitiveArrayBuilder<T, TArray, TBuilder> : IArrowArrayBuilder<T, TArray, TBuilder> + where T : struct + where TArray : IArrowArray + where TBuilder : class, IArrowArrayBuilder<TArray> + { + protected TBuilder Instance => this as TBuilder; + protected ArrowBuffer.Builder<T> ValueBuffer { get; } + protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; } + + public int Length => ValueBuffer.Length; + protected int NullCount => ValidityBuffer.UnsetBitCount; + + internal PrimitiveArrayBuilder() + { + ValueBuffer = new ArrowBuffer.Builder<T>(); + ValidityBuffer = new ArrowBuffer.BitmapBuilder(); + } + + public TBuilder Resize(int length) + { + ValueBuffer.Resize(length); + ValidityBuffer.Resize(length); + return Instance; + } + + public TBuilder Reserve(int capacity) + { + ValueBuffer.Reserve(capacity); + ValidityBuffer.Reserve(capacity); + return Instance; + } + + public TBuilder Append(T value) + { + ValueBuffer.Append(value); + ValidityBuffer.Append(true); + return Instance; + } + + public TBuilder Append(ReadOnlySpan<T> span) + { + int len = ValueBuffer.Length; + ValueBuffer.Append(span); + ValidityBuffer.AppendRange(Enumerable.Repeat(true, ValueBuffer.Length - len)); + return Instance; + } + + public TBuilder AppendRange(IEnumerable<T> values) + { + int len = ValueBuffer.Length; + ValueBuffer.AppendRange(values); + ValidityBuffer.AppendRange(Enumerable.Repeat(true, ValueBuffer.Length - len)); + return Instance; + } + + public TBuilder AppendNull() + { + ValidityBuffer.Append(false); + ValueBuffer.Append(default(T)); + return Instance; + } + + public TBuilder Clear() + { + ValueBuffer.Clear(); + ValidityBuffer.Clear(); + return Instance; + } + + public TBuilder Set(int index, T value) + { + ValueBuffer.Span[index] = value; + ValidityBuffer.Set(index, true); + return Instance; + } + + public TBuilder Swap(int i, int j) + { + T x = ValueBuffer.Span[i]; + ValueBuffer.Span[i] = ValueBuffer.Span[j]; + ValueBuffer.Span[j] = x; + ValidityBuffer.Swap(i, j); + return Instance; + } + + public TArray Build(MemoryAllocator allocator = default) + { + ArrowBuffer validityBuffer = NullCount > 0 + ? ValidityBuffer.Build(allocator) + : ArrowBuffer.Empty; + + return Build( + ValueBuffer.Build(allocator), validityBuffer, + ValueBuffer.Length, NullCount, 0); + } + + protected abstract TArray Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/StringArray.cs new file mode 100644 index 000000000..f008f56fa --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/StringArray.cs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace Apache.Arrow +{ + public class StringArray: BinaryArray + { + public static readonly Encoding DefaultEncoding = Encoding.UTF8; + + public new class Builder : BuilderBase<StringArray, Builder> + { + public Builder() : base(StringType.Default) { } + + protected override StringArray Build(ArrayData data) + { + return new StringArray(data); + } + + public Builder Append(string value, Encoding encoding = null) + { + if (value == null) + { + return AppendNull(); + } + encoding = encoding ?? DefaultEncoding; + byte[] span = encoding.GetBytes(value); + return Append(span.AsSpan()); + } + + public Builder AppendRange(IEnumerable<string> values, Encoding encoding = null) + { + foreach (string value in values) + { + Append(value, encoding); + } + + return this; + } + } + + public StringArray(ArrayData data) + : base(ArrowTypeId.String, data) { } + + public StringArray(int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(StringType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public string GetString(int index, Encoding encoding = default) + { + encoding = encoding ?? DefaultEncoding; + + ReadOnlySpan<byte> bytes = GetBytes(index); + + if (bytes == default) + { + return null; + } + if (bytes.Length == 0) + { + return string.Empty; + } + + unsafe + { + fixed (byte* data = &MemoryMarshal.GetReference(bytes)) + return encoding.GetString(data, bytes.Length); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/StructArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/StructArray.cs new file mode 100644 index 000000000..31aea9b41 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/StructArray.cs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System.Collections.Generic; +using System.Linq; +using System.Threading; + +namespace Apache.Arrow +{ + public class StructArray : Array + { + private IReadOnlyList<IArrowArray> _fields; + + public IReadOnlyList<IArrowArray> Fields => + LazyInitializer.EnsureInitialized(ref _fields, () => InitializeFields()); + + public StructArray( + IArrowType dataType, int length, + IEnumerable<IArrowArray> children, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this(new ArrayData( + dataType, length, nullCount, offset, new[] { nullBitmapBuffer }, + children.Select(child => child.Data))) + { + _fields = children.ToArray(); + } + + public StructArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Struct); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + private IReadOnlyList<IArrowArray> InitializeFields() + { + IArrowArray[] result = new IArrowArray[Data.Children.Length]; + for (int i = 0; i < Data.Children.Length; i++) + { + result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]); + } + return result; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs new file mode 100644 index 000000000..0269768f4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Diagnostics; +using System.IO; + +namespace Apache.Arrow +{ + public class TimestampArray: PrimitiveArray<long> + { + private static readonly DateTimeOffset s_epoch = new DateTimeOffset(1970, 1, 1, 0, 0, 0, 0, TimeSpan.Zero); + + public class Builder: PrimitiveArrayBuilder<DateTimeOffset, long, TimestampArray, Builder> + { + internal class TimestampBuilder : PrimitiveArrayBuilder<long, TimestampArray, TimestampBuilder> + { + internal TimestampBuilder(TimestampType type) + { + DataType = type ?? throw new ArgumentNullException(nameof(type)); + } + + protected TimestampType DataType { get; } + + protected override TimestampArray Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new TimestampArray(DataType, valueBuffer, nullBitmapBuffer, + length, nullCount, offset); + } + + protected TimestampType DataType { get; } + + public Builder() + : this(TimestampType.Default) { } + + public Builder(TimeUnit unit, TimeZoneInfo timezone) + : this(new TimestampType(unit, timezone)) { } + + public Builder(TimeUnit unit = TimeUnit.Millisecond, string timezone = "+00:00") + : this(new TimestampType(unit, timezone)) { } + + public Builder(TimeUnit unit) + : this(new TimestampType(unit, (string) null)) { } + + public Builder(TimestampType type) + : base(new TimestampBuilder(type)) + { + DataType = type; + } + + protected override long ConvertTo(DateTimeOffset value) + { + // We must return the absolute time since the UNIX epoch while + // respecting the timezone offset; the calculation is as follows: + // + // - Compute time span between epoch and specified time + // - Compute time divisions per tick + + TimeSpan timeSpan = value - s_epoch; + long ticks = timeSpan.Ticks; + + switch (DataType.Unit) + { + case TimeUnit.Nanosecond: + return ticks * 100; + case TimeUnit.Microsecond: + return ticks / 10; + case TimeUnit.Millisecond: + return ticks / TimeSpan.TicksPerMillisecond; + case TimeUnit.Second: + return ticks / TimeSpan.TicksPerSecond; + default: + throw new InvalidOperationException($"unsupported time unit <{DataType.Unit}>"); + } + } + } + + public TimestampArray( + TimestampType type, + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(type, length, nullCount, offset, + new[] {nullBitmapBuffer, valueBuffer})) { } + + public TimestampArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Timestamp); + + Debug.Assert(Data.DataType is TimestampType); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public DateTimeOffset GetTimestampUnchecked(int index) + { + var type = (TimestampType) Data.DataType; + long value = Values[index]; + + long ticks; + + switch (type.Unit) + { + case TimeUnit.Nanosecond: + ticks = value / 100; + break; + case TimeUnit.Microsecond: + ticks = value * 10; + break; + case TimeUnit.Millisecond: + ticks = value * TimeSpan.TicksPerMillisecond; + break; + case TimeUnit.Second: + ticks = value * TimeSpan.TicksPerSecond; + break; + default: + throw new InvalidDataException( + $"Unsupported timestamp unit <{type.Unit}>"); + } + + return new DateTimeOffset(s_epoch.Ticks + ticks, TimeSpan.Zero); + } + + public DateTimeOffset? GetTimestamp(int index) + { + if (IsNull(index)) + { + return null; + } + + return GetTimestampUnchecked(index); + } + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt16Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt16Array.cs new file mode 100644 index 000000000..bba244fe8 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt16Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class UInt16Array : PrimitiveArray<ushort> + { + public class Builder : PrimitiveArrayBuilder<ushort, UInt16Array, Builder> + { + protected override UInt16Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new UInt16Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public UInt16Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(UInt16Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public UInt16Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.UInt16); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + } + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt32Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt32Array.cs new file mode 100644 index 000000000..65320be6f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt32Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class UInt32Array : PrimitiveArray<uint> + { + public class Builder : PrimitiveArrayBuilder<uint, UInt32Array, Builder> + { + protected override UInt32Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new UInt32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public UInt32Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(UInt32Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public UInt32Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.UInt32); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt64Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt64Array.cs new file mode 100644 index 000000000..617949fab --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt64Array.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class UInt64Array : PrimitiveArray<ulong> + { + public class Builder : PrimitiveArrayBuilder<ulong, UInt64Array, Builder> + { + protected override UInt64Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new UInt64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public UInt64Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(UInt64Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public UInt64Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.UInt64); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt8Array.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt8Array.cs new file mode 100644 index 000000000..5cde7918c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/UInt8Array.cs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class UInt8Array : PrimitiveArray<byte> + { + public class Builder : PrimitiveArrayBuilder<byte, UInt8Array, Builder> + { + protected override UInt8Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new UInt8Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + + public UInt8Array( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(UInt8Type.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) { } + + public UInt8Array(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.UInt8); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Arrays/UnionArray.cs b/src/arrow/csharp/src/Apache.Arrow/Arrays/UnionArray.cs new file mode 100644 index 000000000..8bccea2b5 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Arrays/UnionArray.cs @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + public class UnionArray: Array + { + public UnionType Type => Data.DataType as UnionType; + + public UnionMode Mode => Type.Mode; + + public ArrowBuffer TypeBuffer => Data.Buffers[1]; + + public ArrowBuffer ValueOffsetBuffer => Data.Buffers[2]; + + public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span; + + public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>().Slice(0, Length + 1); + + public UnionArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Union); + data.EnsureBufferCount(3); + } + + public IArrowArray GetChild(int index) + { + // TODO: Implement + throw new NotImplementedException(); + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs new file mode 100644 index 000000000..c27ef35e8 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs @@ -0,0 +1,280 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Apache.Arrow.Memory; + +namespace Apache.Arrow +{ + public partial struct ArrowBuffer + { + /// <summary> + /// The <see cref="BitmapBuilder"/> class is a complement to <see cref="ArrowBuffer.Builder{T}"/> + /// and is designed for boolean fields, which are efficiently bit-packed into byte-aligned memory. + /// </summary> + public class BitmapBuilder + { + private const int DefaultBitCapacity = 64; + + /// <summary> + /// Gets the number of bits that can be contained in the memory allocated by the current instance. + /// </summary> + public int Capacity { get; private set; } + + /// <summary> + /// Gets the number of bits currently appended. + /// </summary> + public int Length { get; private set; } + + /// <summary> + /// Gets the raw byte memory underpinning the builder. + /// </summary> + public Memory<byte> Memory { get; private set; } + + /// <summary> + /// Gets the span of (bit-packed byte) memory underpinning the builder. + /// </summary> + public Span<byte> Span => Memory.Span; + + /// <summary> + /// Gets the number of set bits (i.e. set to 1). + /// </summary> + public int SetBitCount { get; private set; } + + /// <summary> + /// Gets the number of unset bits (i.e. set to 0). + /// </summary> + public int UnsetBitCount => Length - SetBitCount; + + /// <summary> + /// Creates an instance of the <see cref="BitmapBuilder"/> class. + /// </summary> + /// <param name="capacity">Number of bits of initial capacity to reserve.</param> + public BitmapBuilder(int capacity = DefaultBitCapacity) + { + Memory = new byte[BitUtility.ByteCount(capacity)]; + Capacity = capacity; + } + + /// <summary> + /// Append a single bit. + /// </summary> + /// <param name="value">Bit to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Append(bool value) + { + if (Length % 8 == 0) + { + // Append a new byte to the buffer when needed. + EnsureAdditionalCapacity(1); + } + + BitUtility.SetBit(Span, Length, value); + Length++; + SetBitCount += value ? 1 : 0; + return this; + } + + /// <summary> + /// Append multiple bits. + /// </summary> + /// <param name="values">Bits to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder AppendRange(IEnumerable<bool> values) + { + if (values != null) + { + foreach (var v in values) + { + Append(v); + } + } + + return this; + } + + /// <summary> + /// Toggle the bit at a particular index. + /// </summary> + /// <param name="index">Index of bit to toggle.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Toggle(int index) + { + CheckIndex(index); + bool priorValue = BitUtility.GetBit(Span, index); + SetBitCount += priorValue ? -1 : 1; + BitUtility.ToggleBit(Span, index); + return this; + } + + /// <summary> + /// Set the bit at a particular index to 1. + /// </summary> + /// <param name="index">Index of bit to set.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Set(int index) + { + CheckIndex(index); + bool priorValue = BitUtility.GetBit(Span, index); + SetBitCount += priorValue ? 0 : 1; + BitUtility.SetBit(Span, index); + return this; + } + + /// <summary> + /// Set the bit at a particular index to a given value. + /// </summary> + /// <param name="index">Index of bit to set/unset.</param> + /// <param name="value">Value of bit.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Set(int index, bool value) + { + CheckIndex(index); + bool priorValue = BitUtility.GetBit(Span, index); + SetBitCount -= priorValue ? 1 : 0; + SetBitCount += value ? 1 : 0; + BitUtility.SetBit(Span, index, value); + return this; + } + + /// <summary> + /// Swap the bits at two given indices. + /// </summary> + /// <param name="i">First index.</param> + /// <param name="j">Second index.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Swap(int i, int j) + { + CheckIndex(i); + CheckIndex(j); + bool bi = BitUtility.GetBit(Span, i); + bool bj = BitUtility.GetBit(Span, j); + BitUtility.SetBit(Span, i, bj); + BitUtility.SetBit(Span, j, bi); + return this; + } + + /// <summary> + /// Reserve a given number of bits' additional capacity. + /// </summary> + /// <param name="additionalCapacity">Number of bits of required additional capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Reserve(int additionalCapacity) + { + if (additionalCapacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(additionalCapacity)); + } + + EnsureAdditionalCapacity(additionalCapacity); + return this; + } + + /// <summary> + /// Resize the buffer to a given size. + /// </summary> + /// <remarks> + /// Note that if the required capacity is larger than the current length of the populated buffer so far, + /// the buffer's contents in the new, expanded region are undefined. + /// </remarks> + /// <remarks> + /// Note that if the required capacity is smaller than the current length of the populated buffer so far, + /// the buffer will be truncated and items at the end of the buffer will be lost. + /// </remarks> + /// <param name="capacity">Number of bits of required capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Resize(int capacity) + { + if (capacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(capacity), "Capacity must be non-negative"); + } + + EnsureCapacity(capacity); + Length = capacity; + + SetBitCount = BitUtility.CountBits(Span, 0, Length); + + return this; + } + + /// <summary> + /// Clear all contents appended so far. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public BitmapBuilder Clear() + { + Span.Fill(default); + Length = 0; + SetBitCount = 0; + return this; + } + + /// <summary> + /// Build an Arrow buffer from the appended contents so far. + /// </summary> + /// <param name="allocator">Optional memory allocator.</param> + /// <returns>Returns an <see cref="ArrowBuffer"/> object.</returns> + public ArrowBuffer Build(MemoryAllocator allocator = default) + { + int bufferLength = checked((int)BitUtility.RoundUpToMultipleOf64(Memory.Length)); + var memoryAllocator = allocator ?? MemoryAllocator.Default.Value; + var memoryOwner = memoryAllocator.Allocate(bufferLength); + Memory.Slice(0, Memory.Length).CopyTo(memoryOwner.Memory); + return new ArrowBuffer(memoryOwner); + } + + private void CheckIndex(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + private void EnsureAdditionalCapacity(int additionalCapacity) + { + EnsureCapacity(checked(Length + additionalCapacity)); + } + + private void EnsureCapacity(int requiredCapacity) + { + if (requiredCapacity > Capacity) + { + // TODO: specifiable growth strategy + // Double the length of the in-memory array, or use the byte count of the capacity, whichever is + // greater. + int byteCount = Math.Max(BitUtility.ByteCount(requiredCapacity), Memory.Length * 2); + Reallocate(byteCount); + Capacity = byteCount * 8; + } + } + + private void Reallocate(int numBytes) + { + if (numBytes != 0) + { + Debug.Assert(numBytes > Memory.Length); + var memory = new Memory<byte>(new byte[numBytes]); + Memory.CopyTo(memory); + + Memory = memory; + } + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs new file mode 100644 index 000000000..7c03027fe --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs @@ -0,0 +1,255 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Apache.Arrow +{ + public partial struct ArrowBuffer + { + /// <summary> + /// The <see cref="Builder{T}"/> class is able to append value-type items, with fluent-style methods, to build + /// up an <see cref="ArrowBuffer"/> of contiguous items. + /// </summary> + /// <remarks> + /// Note that <see cref="bool"/> is not supported as a generic type argument for this class. Please use + /// <see cref="BitmapBuilder"/> instead. + /// </remarks> + /// <typeparam name="T">Value-type of item to build into a buffer.</typeparam> + public class Builder<T> + where T : struct + { + private const int DefaultCapacity = 8; + + private readonly int _size; + + /// <summary> + /// Gets the number of items that can be contained in the memory allocated by the current instance. + /// </summary> + public int Capacity => Memory.Length / _size; + + /// <summary> + /// Gets the number of items currently appended. + /// </summary> + public int Length { get; private set; } + + /// <summary> + /// Gets the raw byte memory underpinning the builder. + /// </summary> + public Memory<byte> Memory { get; private set; } + + /// <summary> + /// Gets the span of memory underpinning the builder. + /// </summary> + public Span<T> Span + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Memory.Span.CastTo<T>(); + } + + /// <summary> + /// Creates an instance of the <see cref="Builder{T}"/> class. + /// </summary> + /// <param name="capacity">Number of items of initial capacity to reserve.</param> + public Builder(int capacity = DefaultCapacity) + { + // Using `bool` as the template argument, if used in an unrestricted fashion, would result in a buffer + // with inappropriate contents being produced. Because C# does not support template specialisation, + // and because generic type constraints do not support negation, we will throw a runtime error to + // indicate that such a template type is not supported. + if (typeof(T) == typeof(bool)) + { + throw new NotSupportedException( + $"An instance of {nameof(Builder<T>)} cannot be instantiated, as `bool` is not an " + + $"appropriate generic type to use with this class - please use {nameof(BitmapBuilder)} " + + $"instead"); + } + + _size = Unsafe.SizeOf<T>(); + + Memory = new byte[capacity * _size]; + Length = 0; + } + + /// <summary> + /// Append a buffer, assumed to contain items of the same type. + /// </summary> + /// <param name="buffer">Buffer to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Append(ArrowBuffer buffer) + { + Append(buffer.Span.CastTo<T>()); + return this; + } + + /// <summary> + /// Append a single item. + /// </summary> + /// <param name="value">Item to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Append(T value) + { + EnsureAdditionalCapacity(1); + Span[Length++] = value; + return this; + } + + /// <summary> + /// Append a span of items. + /// </summary> + /// <param name="source">Source of item span.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Append(ReadOnlySpan<T> source) + { + EnsureAdditionalCapacity(source.Length); + source.CopyTo(Span.Slice(Length, source.Length)); + Length += source.Length; + return this; + } + + /// <summary> + /// Append a number of items. + /// </summary> + /// <param name="values">Items to append.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> AppendRange(IEnumerable<T> values) + { + if (values != null) + { + foreach (T v in values) + { + Append(v); + } + } + + return this; + } + + /// <summary> + /// Reserve a given number of items' additional capacity. + /// </summary> + /// <param name="additionalCapacity">Number of items of required additional capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Reserve(int additionalCapacity) + { + if (additionalCapacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(additionalCapacity)); + } + + EnsureAdditionalCapacity(additionalCapacity); + return this; + } + + /// <summary> + /// Resize the buffer to a given size. + /// </summary> + /// <remarks> + /// Note that if the required capacity is larger than the current length of the populated buffer so far, + /// the buffer's contents in the new, expanded region are undefined. + /// </remarks> + /// <remarks> + /// Note that if the required capacity is smaller than the current length of the populated buffer so far, + /// the buffer will be truncated and items at the end of the buffer will be lost. + /// </remarks> + /// <param name="capacity">Number of items of required capacity.</param> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Resize(int capacity) + { + if (capacity < 0) + { + throw new ArgumentOutOfRangeException(nameof(capacity), "Capacity must be non-negative"); + } + + EnsureCapacity(capacity); + Length = capacity; + + return this; + } + + /// <summary> + /// Clear all contents appended so far. + /// </summary> + /// <returns>Returns the builder (for fluent-style composition).</returns> + public Builder<T> Clear() + { + Span.Fill(default); + Length = 0; + return this; + } + + /// <summary> + /// Build an Arrow buffer from the appended contents so far. + /// </summary> + /// <param name="allocator">Optional memory allocator.</param> + /// <returns>Returns an <see cref="ArrowBuffer"/> object.</returns> + public ArrowBuffer Build(MemoryAllocator allocator = default) + { + return Build(64, allocator); + } + + /// <summary> + /// Build an Arrow buffer from the appended contents so far of the specified byte size. + /// </summary> + /// <param name="allocator">Optional memory allocator.</param> + /// <returns>Returns an <see cref="ArrowBuffer"/> object.</returns> + internal ArrowBuffer Build(int byteSize, MemoryAllocator allocator = default) + { + int currentBytesLength = Length * _size; + int bufferLength = checked((int)BitUtility.RoundUpToMultiplePowerOfTwo(currentBytesLength, byteSize)); + + MemoryAllocator memoryAllocator = allocator ?? MemoryAllocator.Default.Value; + IMemoryOwner<byte> memoryOwner = memoryAllocator.Allocate(bufferLength); + Memory.Slice(0, currentBytesLength).CopyTo(memoryOwner.Memory); + + return new ArrowBuffer(memoryOwner); + } + + private void EnsureAdditionalCapacity(int additionalCapacity) + { + EnsureCapacity(checked(Length + additionalCapacity)); + } + + private void EnsureCapacity(int requiredCapacity) + { + if (requiredCapacity > Capacity) + { + // TODO: specifiable growth strategy + // Double the length of the in-memory array, or use the byte count of the capacity, whichever is + // greater. + int capacity = Math.Max(requiredCapacity * _size, Memory.Length * 2); + Reallocate(capacity); + } + } + + private void Reallocate(int numBytes) + { + if (numBytes != 0) + { + var memory = new Memory<byte>(new byte[numBytes]); + Memory.CopyTo(memory); + + Memory = memory; + } + } + + } + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.cs b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.cs new file mode 100644 index 000000000..f8e675921 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/ArrowBuffer.cs @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Runtime.CompilerServices; +using Apache.Arrow.Memory; + +namespace Apache.Arrow +{ + public readonly partial struct ArrowBuffer : IEquatable<ArrowBuffer>, IDisposable + { + private readonly IMemoryOwner<byte> _memoryOwner; + private readonly ReadOnlyMemory<byte> _memory; + + public static ArrowBuffer Empty => new ArrowBuffer(Memory<byte>.Empty); + + public ArrowBuffer(ReadOnlyMemory<byte> data) + { + _memoryOwner = null; + _memory = data; + } + + internal ArrowBuffer(IMemoryOwner<byte> memoryOwner) + { + // When wrapping an IMemoryOwner, don't cache the Memory<byte> + // since the owner may be disposed, and the cached Memory would + // be invalid. + + _memoryOwner = memoryOwner; + _memory = Memory<byte>.Empty; + } + + public ReadOnlyMemory<byte> Memory => + _memoryOwner != null ? _memoryOwner.Memory : _memory; + + public bool IsEmpty => Memory.IsEmpty; + + public int Length => Memory.Length; + + public ReadOnlySpan<byte> Span + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Memory.Span; + } + + public ArrowBuffer Clone(MemoryAllocator allocator = default) + { + return new Builder<byte>(Span.Length) + .Append(Span) + .Build(allocator); + } + + public bool Equals(ArrowBuffer other) + { + return Span.SequenceEqual(other.Span); + } + + public void Dispose() + { + _memoryOwner?.Dispose(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/BitUtility.cs b/src/arrow/csharp/src/Apache.Arrow/BitUtility.cs new file mode 100644 index 000000000..19417bbbe --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/BitUtility.cs @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Apache.Arrow +{ + public static class BitUtility + { + private static ReadOnlySpan<byte> PopcountTable => new byte[] { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, + }; + + private static ReadOnlySpan<byte> BitMask => new byte[] { + 1, 2, 4, 8, 16, 32, 64, 128 + }; + + public static bool GetBit(byte data, int index) => + ((data >> index) & 1) != 0; + + public static bool GetBit(ReadOnlySpan<byte> data, int index) => + (data[index / 8] & BitMask[index % 8]) != 0; + + public static void ClearBit(Span<byte> data, int index) + { + data[index / 8] &= (byte) ~BitMask[index % 8]; + } + + public static void SetBit(Span<byte> data, int index) + { + data[index / 8] |= BitMask[index % 8]; + } + + public static void SetBit(Span<byte> data, int index, bool value) + { + int idx = index / 8; + int mod = index % 8; + data[idx] = value + ? (byte)(data[idx] | BitMask[mod]) + : (byte)(data[idx] & ~BitMask[mod]); + } + + public static void ToggleBit(Span<byte> data, int index) + { + data[index / 8] ^= BitMask[index % 8]; + } + + /// <summary> + /// Counts the number of set bits in a span of bytes starting + /// at a specific bit offset. + /// </summary> + /// <param name="data">Span to count bits</param> + /// <param name="offset">Bit offset to start counting from</param> + /// <returns>Count of set (one) bits</returns> + public static int CountBits(ReadOnlySpan<byte> data, int offset) => + CountBits(data, offset, data.Length * 8 - offset); + + /// <summary> + /// Counts the number of set bits in a span of bytes starting + /// at a specific bit offset, and limiting to a certain number of bits + /// in the span. + /// </summary> + /// <param name="data">Span to count bits.</param> + /// <param name="offset">Bit offset to start counting from.</param> + /// <param name="length">Maximum of bits in the span to consider.</param> + /// <returns>Count of set (one) bits</returns> + public static int CountBits(ReadOnlySpan<byte> data, int offset, int length) + { + int startByteIndex = offset / 8; + int startBitOffset = offset % 8; + int endByteIndex = (offset + length - 1) / 8; + int endBitOffset = (offset + length - 1) % 8; + if (startBitOffset < 0) + return 0; + + int count = 0; + if (startByteIndex == endByteIndex) + { + // Range starts and ends within the same byte. + var slice = data.Slice(startByteIndex, 1); + for (int i = startBitOffset; i <= endBitOffset; i++) + count += GetBit(slice, i) ? 1 : 0; + + return count; + } + + // If the starting index and ending index are not byte-aligned, + // we'll need to count bits the slow way. If they are + // byte-aligned, and for all other bytes in the 'middle', we + // can use a faster byte-aligned count. + int fullByteStartIndex = startBitOffset == 0 ? startByteIndex : startByteIndex + 1; + int fullByteEndIndex = endBitOffset == 7 ? endByteIndex : endByteIndex - 1; + + if (startBitOffset != 0) + { + var slice = data.Slice(startByteIndex, 1); + for (int i = startBitOffset; i <= 7; i++) + count += GetBit(slice, i) ? 1 : 0; + } + + if (fullByteEndIndex >= fullByteStartIndex) + { + var slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1); + count += CountBits(slice); + } + + if (endBitOffset != 7) + { + var slice = data.Slice(endByteIndex, 1); + for (int i = 0; i <= endBitOffset; i++) + count += GetBit(slice, i) ? 1 : 0; + } + + return count; + } + + /// <summary> + /// Counts the number of set bits in a span of bytes. + /// </summary> + /// <param name="data">Span to count bits</param> + /// <returns>Count of set (one) bits.</returns> + public static int CountBits(ReadOnlySpan<byte> data) + { + int count = 0; + foreach (byte t in data) + count += PopcountTable[t]; + return count; + } + + /// <summary> + /// Rounds an integer to the nearest multiple of 64. + /// </summary> + /// <param name="n">Integer to round.</param> + /// <returns>Integer rounded to the nearest multiple of 64.</returns> + public static long RoundUpToMultipleOf64(long n) => + RoundUpToMultiplePowerOfTwo(n, 64); + + /// <summary> + /// Rounds an integer to the nearest multiple of 8. + /// </summary> + /// <param name="n">Integer to round.</param> + /// <returns>Integer rounded to the nearest multiple of 8.</returns> + public static long RoundUpToMultipleOf8(long n) => + RoundUpToMultiplePowerOfTwo(n, 8); + + /// <summary> + /// Rounds an integer up to the nearest multiple of factor, where + /// factor must be a power of two. + /// + /// This function does not throw when the factor is not a power of two. + /// </summary> + /// <param name="n">Integer to round up.</param> + /// <param name="factor">Power of two factor to round up to.</param> + /// <returns>Integer rounded up to the nearest power of two.</returns> + public static long RoundUpToMultiplePowerOfTwo(long n, int factor) + { + // Assert that factor is a power of two. + Debug.Assert(factor > 0 && (factor & (factor - 1)) == 0); + return (n + (factor - 1)) & ~(factor - 1); + } + + internal static bool IsMultipleOf8(long n) => n % 8 == 0; + + /// <summary> + /// Calculates the number of bytes required to store n bits. + /// </summary> + /// <param name="n">number of bits</param> + /// <returns>number of bytes</returns> + public static int ByteCount(int n) + { + Debug.Assert(n >= 0); + return n / 8 + (n % 8 != 0 ? 1 : 0); // ceil(n / 8) + } + + internal static int ReadInt32(ReadOnlyMemory<byte> value) + { + Debug.Assert(value.Length >= sizeof(int)); + + return Unsafe.ReadUnaligned<int>(ref MemoryMarshal.GetReference(value.Span)); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/ChunkedArray.cs b/src/arrow/csharp/src/Apache.Arrow/ChunkedArray.cs new file mode 100644 index 000000000..5f25acfe0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/ChunkedArray.cs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + /// <summary> + /// A data structure to manage a list of primitive Array arrays logically as one large array + /// </summary> + public class ChunkedArray + { + private IList<Array> Arrays { get; } + public IArrowType DataType { get; } + public long Length { get; } + public long NullCount { get; } + + public int ArrayCount + { + get => Arrays.Count; + } + + public Array Array(int index) => Arrays[index]; + + public ChunkedArray(IList<Array> arrays) + { + Arrays = arrays ?? throw new ArgumentNullException(nameof(arrays)); + if (arrays.Count < 1) + { + throw new ArgumentException($"Count must be at least 1. Got {arrays.Count} instead"); + } + DataType = arrays[0].Data.DataType; + foreach (Array array in arrays) + { + Length += array.Length; + NullCount += array.NullCount; + } + } + + public ChunkedArray(Array array) : this(new[] { array }) { } + + public ChunkedArray Slice(long offset, long length) + { + if (offset >= Length) + { + throw new ArgumentException($"Index {offset} cannot be greater than the Column's Length {Length}"); + } + + int curArrayIndex = 0; + int numArrays = Arrays.Count; + while (curArrayIndex < numArrays && offset > Arrays[curArrayIndex].Length) + { + offset -= Arrays[curArrayIndex].Length; + curArrayIndex++; + } + + IList<Array> newArrays = new List<Array>(); + while (curArrayIndex < numArrays && length > 0) + { + newArrays.Add(Arrays[curArrayIndex].Slice((int)offset, + length > Arrays[curArrayIndex].Length ? Arrays[curArrayIndex].Length : (int)length)); + length -= Arrays[curArrayIndex].Length - offset; + offset = 0; + curArrayIndex++; + } + return new ChunkedArray(newArrays); + } + + public ChunkedArray Slice(long offset) + { + return Slice(offset, Length - offset); + } + + // TODO: Flatten for Structs + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Column.cs b/src/arrow/csharp/src/Apache.Arrow/Column.cs new file mode 100644 index 000000000..6e76e6745 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Column.cs @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + /// <summary> + /// A Column data structure that logically represents a column in a dataset + /// </summary> + public class Column + { + public Field Field { get; } + public ChunkedArray Data { get; } + + public Column(Field field, IList<Array> arrays) + { + Data = new ChunkedArray(arrays); + Field = field; + if (!ValidateArrayDataTypes()) + { + throw new ArgumentException($"{Field.DataType} must match {Data.DataType}"); + } + } + + private Column(Field field, ChunkedArray arrays) + { + Field = field; + Data = arrays; + } + + public long Length => Data.Length; + public long NullCount => Data.NullCount; + public string Name => Field.Name; + public IArrowType Type => Field.DataType; + + public Column Slice(int offset, int length) + { + return new Column(Field, Data.Slice(offset, length)); + } + + public Column Slice(int offset) + { + return new Column(Field, Data.Slice(offset)); + } + + private bool ValidateArrayDataTypes() + { + for (int i = 0; i < Data.ArrayCount; i++) + { + if (Data.Array(i).Data.DataType != Field.DataType) + { + return false; + } + } + return true; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/DecimalUtility.cs b/src/arrow/csharp/src/Apache.Arrow/DecimalUtility.cs new file mode 100644 index 000000000..b7ee6b9a8 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/DecimalUtility.cs @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Numerics; + +namespace Apache.Arrow +{ + /// <summary> + /// This is semi-optimised best attempt at converting to / from decimal and the buffers + /// </summary> + internal static class DecimalUtility + { + private static readonly BigInteger _maxDecimal = new BigInteger(decimal.MaxValue); + private static readonly BigInteger _minDecimal = new BigInteger(decimal.MinValue); + private static readonly ulong[] s_powersOfTen = + { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, + 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, + 1000000000000000000, 10000000000000000000 + }; + + private static int PowersOfTenLength => s_powersOfTen.Length - 1; + + internal static decimal GetDecimal(in ArrowBuffer valueBuffer, int index, int scale, int byteWidth) + { + int startIndex = index * byteWidth; + ReadOnlySpan<byte> value = valueBuffer.Span.Slice(startIndex, byteWidth); + BigInteger integerValue; + +#if NETCOREAPP + integerValue = new BigInteger(value); +#else + integerValue = new BigInteger(value.ToArray()); +#endif + + if (integerValue > _maxDecimal || integerValue < _minDecimal) + { + BigInteger scaleBy = BigInteger.Pow(10, scale); + BigInteger integerPart = BigInteger.DivRem(integerValue, scaleBy, out BigInteger fractionalPart); + if (integerPart > _maxDecimal || integerPart < _minDecimal) // decimal overflow, not much we can do here - C# needs a BigDecimal + { + throw new OverflowException($"Value: {integerPart} too big or too small to be represented as a decimal"); + } + return (decimal)integerPart + DivideByScale(fractionalPart, scale); + } + else + { + return DivideByScale(integerValue, scale); + } + } + + private static decimal DivideByScale(BigInteger integerValue, int scale) + { + decimal result = (decimal)integerValue; // this cast is safe here + int drop = scale; + while (drop > PowersOfTenLength) + { + result /= s_powersOfTen[PowersOfTenLength]; + drop -= PowersOfTenLength; + } + + result /= s_powersOfTen[drop]; + return result; + } + + internal static void GetBytes(decimal value, int precision, int scale, int byteWidth, Span<byte> bytes) + { + // create BigInteger from decimal + BigInteger bigInt; + int[] decimalBits = decimal.GetBits(value); + int decScale = (decimalBits[3] >> 16) & 0x7F; +#if NETCOREAPP + Span<byte> bigIntBytes = stackalloc byte[12]; + + for (int i = 0; i < 3; i++) + { + int bit = decimalBits[i]; + Span<byte> intBytes = stackalloc byte[4]; + if (!BitConverter.TryWriteBytes(intBytes, bit)) + throw new OverflowException($"Could not extract bytes from int {bit}"); + + for (int j = 0; j < 4; j++) + { + bigIntBytes[4 * i + j] = intBytes[j]; + } + } + bigInt = new BigInteger(bigIntBytes); +#else + byte[] bigIntBytes = new byte[12]; + for (int i = 0; i < 3; i++) + { + int bit = decimalBits[i]; + byte[] intBytes = BitConverter.GetBytes(bit); + for (int j = 0; j < intBytes.Length; j++) + { + bigIntBytes[4 * i + j] = intBytes[j]; + } + } + bigInt = new BigInteger(bigIntBytes); +#endif + + if (value < 0) + { + bigInt = -bigInt; + } + + // validate precision and scale + if (decScale > scale) + throw new OverflowException($"Decimal scale cannot be greater than that in the Arrow vector: {decScale} != {scale}"); + + if (bigInt >= BigInteger.Pow(10, precision)) + throw new OverflowException($"Decimal precision cannot be greater than that in the Arrow vector: {value} has precision > {precision}"); + + if (decScale < scale) // pad with trailing zeros + { + bigInt *= BigInteger.Pow(10, scale - decScale); + } + + // extract bytes from BigInteger + if (bytes.Length != byteWidth) + { + throw new OverflowException($"ValueBuffer size not equal to {byteWidth} byte width: {bytes.Length}"); + } + + int bytesWritten; +#if NETCOREAPP + if (!bigInt.TryWriteBytes(bytes, out bytesWritten, false, !BitConverter.IsLittleEndian)) + throw new OverflowException("Could not extract bytes from integer value " + bigInt); +#else + byte[] tempBytes = bigInt.ToByteArray(); + tempBytes.CopyTo(bytes); + bytesWritten = tempBytes.Length; +#endif + + if (bytes.Length > byteWidth) + { + throw new OverflowException($"Decimal size greater than {byteWidth} bytes: {bytes.Length}"); + } + + if (bigInt.Sign == -1) + { + for (int i = bytesWritten; i < byteWidth; i++) + { + bytes[i] = 255; + } + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs new file mode 100644 index 000000000..399d9bf5e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; + +namespace Apache.Arrow +{ + internal static class ArrayDataExtensions + { + public static void EnsureBufferCount(this ArrayData data, int count) + { + if (data.Buffers.Length != count) + { + // TODO: Use localizable string resource + throw new ArgumentException( + $"Buffer count <{data.Buffers.Length}> must be at least <{count}>", + nameof(data.Buffers.Length)); + } + } + + public static void EnsureDataType(this ArrayData data, ArrowTypeId id) + { + if (data.DataType.TypeId != id) + { + // TODO: Use localizable string resource + throw new ArgumentException( + $"Specified array type <{data.DataType.TypeId}> does not match expected type(s) <{id}>", + nameof(data.DataType.TypeId)); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs new file mode 100644 index 000000000..9dd9589c0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; + +namespace Apache.Arrow +{ + internal static class ArrayPoolExtensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void RentReturn(this ArrayPool<byte> pool, int length, Action<Memory<byte>> action) + { + byte[] array = null; + + try + { + array = pool.Rent(length); + action(array.AsMemory(0, length)); + } + finally + { + if (array != null) + { + pool.Return(array); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ValueTask RentReturnAsync(this ArrayPool<byte> pool, int length, Func<Memory<byte>, ValueTask> action) + { + byte[] array = null; + + try + { + array = pool.Rent(length); + return action(array.AsMemory(0, length)); + } + finally + { + if (array != null) + { + pool.Return(array); + } + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrowTypeExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrowTypeExtensions.cs new file mode 100644 index 000000000..5b0407451 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/ArrowTypeExtensions.cs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public static class ArrowTypeExtensions + { + private static readonly ISet<ArrowTypeId> s_integralTypes = + new HashSet<ArrowTypeId>(new[] + { + ArrowTypeId.Int8, ArrowTypeId.Int16, ArrowTypeId.Int32, ArrowTypeId.Int64, + ArrowTypeId.UInt8, ArrowTypeId.UInt16, ArrowTypeId.UInt32, ArrowTypeId.UInt64, + }); + + private static readonly ISet<ArrowTypeId> s_floatingPointTypes = + new HashSet<ArrowTypeId>(new[] + { + ArrowTypeId.HalfFloat, ArrowTypeId.Float, ArrowTypeId.Double + }); + + public static bool IsIntegral(this IArrowType type) + => s_integralTypes.Contains(type.TypeId); + + public static bool IsFloatingPoint(this IArrowType type) + => s_floatingPointTypes.Contains(type.TypeId); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs new file mode 100644 index 000000000..d2a70bca9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow +{ + internal static class FlatbufExtensions + { + public static bool IsFixedPrimitive(this Flatbuf.Type t) + { + if (t == Flatbuf.Type.Utf8 || t == Flatbuf.Type.Binary) + return false; + return true; + } + + public static bool IsFixedPrimitive(this Types.IArrowType t) + { + return t.TypeId.IsFixedPrimitive(); + } + + public static bool IsFixedPrimitive(this Types.ArrowTypeId t) + { + if (t == Types.ArrowTypeId.String || t == Types.ArrowTypeId.Binary) + return false; + return true; + } + + public static Types.IntervalUnit ToArrow(this Flatbuf.IntervalUnit unit) + { + switch (unit) + { + case Flatbuf.IntervalUnit.DAY_TIME: + return Types.IntervalUnit.DayTime; + case Flatbuf.IntervalUnit.YEAR_MONTH: + return Types.IntervalUnit.YearMonth; + default: + throw new ArgumentException($"Unexpected Flatbuf IntervalUnit", nameof(unit)); + } + } + + public static Types.DateUnit ToArrow(this Flatbuf.DateUnit unit) + { + switch (unit) + { + case Flatbuf.DateUnit.DAY: + return Types.DateUnit.Day; + case Flatbuf.DateUnit.MILLISECOND: + return Types.DateUnit.Milliseconds; + default: + throw new ArgumentException($"Unexpected Flatbuf IntervalUnit", nameof(unit)); + } + } + + public static Types.TimeUnit ToArrow(this Flatbuf.TimeUnit unit) + { + switch (unit) + { + case Flatbuf.TimeUnit.MICROSECOND: + return Types.TimeUnit.Microsecond; + case Flatbuf.TimeUnit.MILLISECOND: + return Types.TimeUnit.Millisecond; + case Flatbuf.TimeUnit.NANOSECOND: + return Types.TimeUnit.Nanosecond; + case Flatbuf.TimeUnit.SECOND: + return Types.TimeUnit.Second; + default: + throw new ArgumentException($"Unexpected Flatbuf TimeUnit", nameof(unit)); + } + } + } +} + diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs new file mode 100644 index 000000000..b759f3806 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/SpanExtensions.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Runtime.InteropServices; + +namespace Apache.Arrow +{ + public static class SpanExtensions + { + public static Span<T> CastTo<T>(this Span<byte> span) + where T: struct => + MemoryMarshal.Cast<byte, T>(span); + + public static ReadOnlySpan<T> CastTo<T>(this ReadOnlySpan<byte> span) + where T: struct => + MemoryMarshal.Cast<byte, T>(span); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.cs new file mode 100644 index 000000000..1767d23e3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.cs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow +{ + internal static partial class StreamExtensions + { + public static async ValueTask<int> ReadFullBufferAsync(this Stream stream, Memory<byte> buffer, CancellationToken cancellationToken = default) + { + int totalBytesRead = 0; + do + { + int bytesRead = + await stream.ReadAsync( + buffer.Slice(totalBytesRead, buffer.Length - totalBytesRead), + cancellationToken) + .ConfigureAwait(false); + + if (bytesRead == 0) + { + // reached the end of the stream + return totalBytesRead; + } + + totalBytesRead += bytesRead; + } + while (totalBytesRead < buffer.Length); + + return totalBytesRead; + } + + public static int ReadFullBuffer(this Stream stream, Memory<byte> buffer) + { + int totalBytesRead = 0; + do + { + int bytesRead = stream.Read( + buffer.Slice(totalBytesRead, buffer.Length - totalBytesRead)); + + if (bytesRead == 0) + { + // reached the end of the stream + return totalBytesRead; + } + + totalBytesRead += bytesRead; + } + while (totalBytesRead < buffer.Length); + + return totalBytesRead; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netcoreapp2.1.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netcoreapp2.1.cs new file mode 100644 index 000000000..efcacdc84 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netcoreapp2.1.cs @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; + +namespace Apache.Arrow +{ + // Helpers to read from Stream to Memory<byte> on netcoreapp + internal static partial class StreamExtensions + { + public static int Read(this Stream stream, Memory<byte> buffer) + { + return stream.Read(buffer.Span); + } + + public static void Write(this Stream stream, ReadOnlyMemory<byte> buffer) + { + stream.Write(buffer.Span); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netstandard.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netstandard.cs new file mode 100644 index 000000000..b983be0fd --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/StreamExtensions.netstandard.cs @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.IO; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow +{ + // Helpers to write Memory<byte> to Stream on netstandard + internal static partial class StreamExtensions + { + public static int Read(this Stream stream, Memory<byte> buffer) + { + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment<byte> array)) + { + return stream.Read(array.Array, array.Offset, array.Count); + } + else + { + byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length); + try + { + int result = stream.Read(sharedBuffer, 0, buffer.Length); + new Span<byte>(sharedBuffer, 0, result).CopyTo(buffer.Span); + return result; + } + finally + { + ArrayPool<byte>.Shared.Return(sharedBuffer); + } + } + } + + public static ValueTask<int> ReadAsync(this Stream stream, Memory<byte> buffer, CancellationToken cancellationToken = default) + { + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment<byte> array)) + { + return new ValueTask<int>(stream.ReadAsync(array.Array, array.Offset, array.Count, cancellationToken)); + } + else + { + byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length); + return FinishReadAsync(stream.ReadAsync(sharedBuffer, 0, buffer.Length, cancellationToken), sharedBuffer, buffer); + + async ValueTask<int> FinishReadAsync(Task<int> readTask, byte[] localBuffer, Memory<byte> localDestination) + { + try + { + int result = await readTask.ConfigureAwait(false); + new Span<byte>(localBuffer, 0, result).CopyTo(localDestination.Span); + return result; + } + finally + { + ArrayPool<byte>.Shared.Return(localBuffer); + } + } + } + } + + public static void Write(this Stream stream, ReadOnlyMemory<byte> buffer) + { + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment<byte> array)) + { + stream.Write(array.Array, array.Offset, array.Count); + } + else + { + byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length); + try + { + buffer.Span.CopyTo(sharedBuffer); + stream.Write(sharedBuffer, 0, buffer.Length); + } + finally + { + ArrayPool<byte>.Shared.Return(sharedBuffer); + } + } + } + + public static ValueTask WriteAsync(this Stream stream, ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default) + { + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment<byte> array)) + { + return new ValueTask(stream.WriteAsync(array.Array, array.Offset, array.Count, cancellationToken)); + } + else + { + byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length); + buffer.Span.CopyTo(sharedBuffer); + return FinishWriteAsync(stream.WriteAsync(sharedBuffer, 0, buffer.Length, cancellationToken), sharedBuffer); + } + } + + private static async ValueTask FinishWriteAsync(Task writeTask, byte[] localBuffer) + { + try + { + await writeTask.ConfigureAwait(false); + } + finally + { + ArrayPool<byte>.Shared.Return(localBuffer); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/TimeSpanExtensions.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/TimeSpanExtensions.cs new file mode 100644 index 000000000..133156d7b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/TimeSpanExtensions.cs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow +{ + public static class TimeSpanExtensions + { + /// <summary> + /// Formats a TimeSpan into an ISO 8601 compliant time offset string. + /// </summary> + /// <param name="timeSpan">timeSpan to format</param> + /// <returns>ISO 8601 offset string</returns> + public static string ToTimeZoneOffsetString(this TimeSpan timeSpan) + { + string sign = timeSpan.Hours >= 0 ? "+" : "-"; + int hours = Math.Abs(timeSpan.Hours); + int minutes = Math.Abs(timeSpan.Minutes); + return sign + hours.ToString("00") + ":" + minutes.ToString("00"); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs b/src/arrow/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs new file mode 100644 index 000000000..fe42075f1 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow +{ + // Helpers to Deconstruct Tuples on netstandard + internal static partial class TupleExtensions + { + public static void Deconstruct<T1, T2>(this Tuple<T1, T2> value, out T1 item1, out T2 item2) + { + item1 = value.Item1; + item2 = value.Item2; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Field.Builder.cs b/src/arrow/csharp/src/Apache.Arrow/Field.Builder.cs new file mode 100644 index 000000000..1e7aa192e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Field.Builder.cs @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + public partial class Field + { + public class Builder + { + private Dictionary<string, string> _metadata; + private string _name; + private IArrowType _type; + private bool _nullable; + + public Builder() + { + _name = string.Empty; + _type = NullType.Default; + _nullable = true; + } + + public Builder Name(string value) + { + if (string.IsNullOrWhiteSpace(value)) + { + throw new ArgumentNullException(nameof(value)); + } + + _name = value; + return this; + } + + public Builder DataType(IArrowType type) + { + _type = type ?? NullType.Default; + return this; + } + + public Builder Nullable(bool value) + { + _nullable = value; + return this; + } + + public Builder Metadata(string key, string value) + { + if (string.IsNullOrWhiteSpace(key)) + { + throw new ArgumentNullException(nameof(key)); + } + + _metadata ??= new Dictionary<string, string>(); + + _metadata[key] = value; + return this; + } + + public Builder Metadata(IEnumerable<KeyValuePair<string, string>> dictionary) + { + if (dictionary == null) + { + throw new ArgumentNullException(nameof(dictionary)); + } + foreach (KeyValuePair<string, string> entry in dictionary) + { + Metadata(entry.Key, entry.Value); + } + return this; + } + + public Field Build() + { + return new Field(_name, _type, _nullable, _metadata); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Field.cs b/src/arrow/csharp/src/Apache.Arrow/Field.cs new file mode 100644 index 000000000..6e507b642 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Field.cs @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public partial class Field + { + public IArrowType DataType { get; } + + public string Name { get; } + + public bool IsNullable { get; } + + public bool HasMetadata => Metadata?.Count > 0; + + public IReadOnlyDictionary<string, string> Metadata { get; } + + public Field(string name, IArrowType dataType, bool nullable, + IEnumerable<KeyValuePair<string, string>> metadata = default) + : this(name, dataType, nullable) + { + Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value); + + } + + internal Field(string name, IArrowType dataType, bool nullable, + IReadOnlyDictionary<string, string> metadata, bool copyCollections) + : this(name, dataType, nullable) + { + Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections."); + + Metadata = metadata; + } + + private Field(string name, IArrowType dataType, bool nullable) + { + if (string.IsNullOrWhiteSpace(name)) + { + throw new ArgumentNullException(nameof(name)); + } + + Name = name; + DataType = dataType ?? NullType.Default; + IsNullable = nullable; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Block.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Block.cs new file mode 100644 index 000000000..89c065b20 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Block.cs @@ -0,0 +1,37 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Block : IFlatbufferObject +{ + private Struct __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Index to the start of the RecordBlock (note this is past the Message header) + public long Offset { get { return __p.bb.GetLong(__p.bb_pos + 0); } } + /// Length of the metadata + public int MetaDataLength { get { return __p.bb.GetInt(__p.bb_pos + 8); } } + /// Length of the data (this is aligned so there can be a gap between this and + /// the metatdata). + public long BodyLength { get { return __p.bb.GetLong(__p.bb_pos + 16); } } + + public static Offset<Block> CreateBlock(FlatBufferBuilder builder, long Offset, int MetaDataLength, long BodyLength) { + builder.Prep(8, 24); + builder.PutLong(BodyLength); + builder.Pad(4); + builder.PutInt(MetaDataLength); + builder.PutLong(Offset); + return new Offset<Block>(builder.Offset); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs new file mode 100644 index 000000000..dda0dd403 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs @@ -0,0 +1,47 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types +internal struct BodyCompression : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static BodyCompression GetRootAsBodyCompression(ByteBuffer _bb) { return GetRootAsBodyCompression(_bb, new BodyCompression()); } + public static BodyCompression GetRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Compressor library + public CompressionType Codec { get { int o = __p.__offset(4); return o != 0 ? (CompressionType)__p.bb.GetSbyte(o + __p.bb_pos) : CompressionType.LZ4_FRAME; } } + /// Indicates the way the record batch body was compressed + public BodyCompressionMethod Method { get { int o = __p.__offset(6); return o != 0 ? (BodyCompressionMethod)__p.bb.GetSbyte(o + __p.bb_pos) : BodyCompressionMethod.BUFFER; } } + + public static Offset<BodyCompression> CreateBodyCompression(FlatBufferBuilder builder, + CompressionType codec = CompressionType.LZ4_FRAME, + BodyCompressionMethod method = BodyCompressionMethod.BUFFER) { + builder.StartObject(2); + BodyCompression.AddMethod(builder, method); + BodyCompression.AddCodec(builder, codec); + return BodyCompression.EndBodyCompression(builder); + } + + public static void StartBodyCompression(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddCodec(FlatBufferBuilder builder, CompressionType codec) { builder.AddSbyte(0, (sbyte)codec, 0); } + public static void AddMethod(FlatBufferBuilder builder, BodyCompressionMethod method) { builder.AddSbyte(1, (sbyte)method, 0); } + public static Offset<BodyCompression> EndBodyCompression(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<BodyCompression>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs new file mode 100644 index 000000000..7b2315cab --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs @@ -0,0 +1,36 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment +internal struct Buffer : IFlatbufferObject +{ + private Struct __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + public long Offset { get { return __p.bb.GetLong(__p.bb_pos + 0); } } + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). + public long Length { get { return __p.bb.GetLong(__p.bb_pos + 8); } } + + public static Offset<Buffer> CreateBuffer(FlatBufferBuilder builder, long Offset, long Length) { + builder.Prep(8, 16); + builder.PutLong(Length); + builder.PutLong(Offset); + return new Offset<Buffer>(builder.Offset); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs new file mode 100644 index 000000000..e3afafdd5 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs @@ -0,0 +1,54 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag +internal struct DictionaryBatch : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static DictionaryBatch GetRootAsDictionaryBatch(ByteBuffer _bb) { return GetRootAsDictionaryBatch(_bb, new DictionaryBatch()); } + public static DictionaryBatch GetRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public long Id { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } + public RecordBatch? Data { get { int o = __p.__offset(6); return o != 0 ? (RecordBatch?)(new RecordBatch()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } + /// If isDelta is true the values in the dictionary are to be appended to a + /// dictionary with the indicated id + public bool IsDelta { get { int o = __p.__offset(8); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + + public static Offset<DictionaryBatch> CreateDictionaryBatch(FlatBufferBuilder builder, + long id = 0, + Offset<RecordBatch> dataOffset = default(Offset<RecordBatch>), + bool isDelta = false) { + builder.StartObject(3); + DictionaryBatch.AddId(builder, id); + DictionaryBatch.AddData(builder, dataOffset); + DictionaryBatch.AddIsDelta(builder, isDelta); + return DictionaryBatch.EndDictionaryBatch(builder); + } + + public static void StartDictionaryBatch(FlatBufferBuilder builder) { builder.StartObject(3); } + public static void AddId(FlatBufferBuilder builder, long id) { builder.AddLong(0, id, 0); } + public static void AddData(FlatBufferBuilder builder, Offset<RecordBatch> dataOffset) { builder.AddOffset(1, dataOffset.Value, 0); } + public static void AddIsDelta(FlatBufferBuilder builder, bool isDelta) { builder.AddBool(2, isDelta, false); } + public static Offset<DictionaryBatch> EndDictionaryBatch(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<DictionaryBatch>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs new file mode 100644 index 000000000..02a35fdd4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs @@ -0,0 +1,57 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +internal struct DictionaryEncoding : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static DictionaryEncoding GetRootAsDictionaryEncoding(ByteBuffer _bb) { return GetRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); } + public static DictionaryEncoding GetRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// The known dictionary id in the application where this data is used. In + /// the file or streaming formats, the dictionary ids are found in the + /// DictionaryBatch messages + public long Id { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } + /// The dictionary indices are constrained to be positive integers. If this + /// field is null, the indices must be signed int32 + public Int? IndexType { get { int o = __p.__offset(6); return o != 0 ? (Int?)(new Int()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } + /// By default, dictionaries are not ordered, or the order does not have + /// semantic meaning. In some statistical, applications, dictionary-encoding + /// is used to represent ordered categorical data, and we provide a way to + /// preserve that metadata here + public bool IsOrdered { get { int o = __p.__offset(8); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + + public static Offset<DictionaryEncoding> CreateDictionaryEncoding(FlatBufferBuilder builder, + long id = 0, + Offset<Int> indexTypeOffset = default(Offset<Int>), + bool isOrdered = false) { + builder.StartObject(3); + DictionaryEncoding.AddId(builder, id); + DictionaryEncoding.AddIndexType(builder, indexTypeOffset); + DictionaryEncoding.AddIsOrdered(builder, isOrdered); + return DictionaryEncoding.EndDictionaryEncoding(builder); + } + + public static void StartDictionaryEncoding(FlatBufferBuilder builder) { builder.StartObject(3); } + public static void AddId(FlatBufferBuilder builder, long id) { builder.AddLong(0, id, 0); } + public static void AddIndexType(FlatBufferBuilder builder, Offset<Int> indexTypeOffset) { builder.AddOffset(1, indexTypeOffset.Value, 0); } + public static void AddIsOrdered(FlatBufferBuilder builder, bool isOrdered) { builder.AddBool(2, isOrdered, false); } + public static Offset<DictionaryEncoding> EndDictionaryEncoding(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<DictionaryEncoding>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs new file mode 100644 index 000000000..e9f6b6e83 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs @@ -0,0 +1,24 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future +internal enum BodyCompressionMethod : sbyte +{ + /// Each constituent buffer is first compressed with the indicated + /// compressor, and then written with the uncompressed length in the first 8 + /// bytes as a 64-bit little-endian signed integer followed by the compressed + /// buffer bytes (and then padding as required by the protocol). The + /// uncompressed length may be set to -1 to indicate that the data that + /// follows is not compressed, which can be useful for cases where + /// compression does not yield appreciable savings. + BUFFER = 0, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/CompressionType.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/CompressionType.cs new file mode 100644 index 000000000..3d886c508 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/CompressionType.cs @@ -0,0 +1,15 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum CompressionType : sbyte +{ + LZ4_FRAME = 0, + ZSTD = 1, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/DateUnit.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/DateUnit.cs new file mode 100644 index 000000000..46fd0cc4c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/DateUnit.cs @@ -0,0 +1,15 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum DateUnit : short +{ + DAY = 0, + MILLISECOND = 1, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Endianness.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Endianness.cs new file mode 100644 index 000000000..a0e64f4ff --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Endianness.cs @@ -0,0 +1,17 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data +internal enum Endianness : short +{ + Little = 0, + Big = 1, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Feature.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Feature.cs new file mode 100644 index 000000000..a05b6cf49 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Feature.cs @@ -0,0 +1,39 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intented to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. +internal enum Feature : long +{ + /// Needed to make flatbuffers happy. + UNUSED = 0, + /// The stream makes use of multiple full dictionaries with the + /// same ID and assumes clients implement dictionary replacement + /// correctly. + DICTIONARY_REPLACEMENT = 1, + /// The stream makes use of compressed bodies as described + /// in Message.fbs. + COMPRESSED_BODY = 2, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs new file mode 100644 index 000000000..d1363968d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs @@ -0,0 +1,15 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum IntervalUnit : short +{ + YEAR_MONTH = 0, + DAY_TIME = 1, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs new file mode 100644 index 000000000..94d239bfa --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs @@ -0,0 +1,26 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +/// ---------------------------------------------------------------------- +/// The root Message type +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch +internal enum MessageHeader : byte +{ + NONE = 0, + Schema = 1, + DictionaryBatch = 2, + RecordBatch = 3, + Tensor = 4, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs new file mode 100644 index 000000000..1e893e8cb --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs @@ -0,0 +1,29 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum MetadataVersion : short +{ + /// 0.1.0 (October 2016). + V1 = 0, + /// 0.2.0 (February 2017). Non-backwards compatible with V1. + V2 = 1, + /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + V3 = 2, + /// >= 0.8.0 (December 2017). Non-backwards compatible with V3. + V4 = 3, + /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4 + /// metadata and IPC messages). Implementations are recommended to provide a + /// V4 compatibility mode with V5 format changes disabled. + /// + /// Incompatible changes between V4 and V5: + /// - Union buffer layout has changed. In V5, Unions don't have a validity + /// bitmap buffer. + V5 = 4, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Precision.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Precision.cs new file mode 100644 index 000000000..3f47a2c0b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Precision.cs @@ -0,0 +1,16 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum Precision : short +{ + HALF = 0, + SINGLE = 1, + DOUBLE = 2, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/TimeUnit.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/TimeUnit.cs new file mode 100644 index 000000000..300b835d9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/TimeUnit.cs @@ -0,0 +1,17 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum TimeUnit : short +{ + SECOND = 0, + MILLISECOND = 1, + MICROSECOND = 2, + NANOSECOND = 3, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs new file mode 100644 index 000000000..e8a7932a7 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs @@ -0,0 +1,38 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility +internal enum Type : byte +{ + NONE = 0, + Null = 1, + Int = 2, + FloatingPoint = 3, + Binary = 4, + Utf8 = 5, + Bool = 6, + Decimal = 7, + Date = 8, + Time = 9, + Timestamp = 10, + Interval = 11, + List = 12, + Struct_ = 13, + Union = 14, + FixedSizeBinary = 15, + FixedSizeList = 16, + Map = 17, + Duration = 18, + LargeBinary = 19, + LargeUtf8 = 20, + LargeList = 21, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/UnionMode.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/UnionMode.cs new file mode 100644 index 000000000..724ff4ac0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Enums/UnionMode.cs @@ -0,0 +1,15 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +internal enum UnionMode : short +{ + Sparse = 0, + Dense = 1, +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Field.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Field.cs new file mode 100644 index 000000000..a4f9e3057 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Field.cs @@ -0,0 +1,83 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// A field represents a named column in a record / row batch or child of a +/// nested type. +/// +/// - children is only for nested Arrow arrays +/// - For primitive types, children will have length 0 +/// - nullable should default to true in general +internal struct Field : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Field GetRootAsField(ByteBuffer _bb) { return GetRootAsField(_bb, new Field()); } + public static Field GetRootAsField(ByteBuffer _bb, Field obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public string Name { get { int o = __p.__offset(4); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } +#if ENABLE_SPAN_T + public Span<byte> GetNameBytes() { return __p.__vector_as_span(4); } +#else + public ArraySegment<byte>? GetNameBytes() { return __p.__vector_as_arraysegment(4); } +#endif + public byte[] GetNameArray() { return __p.__vector_as_array<byte>(4); } + public bool Nullable { get { int o = __p.__offset(6); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + public Type TypeType { get { int o = __p.__offset(8); return o != 0 ? (Type)__p.bb.Get(o + __p.bb_pos) : Flatbuf.Type.NONE; } } + public TTable? Type<TTable>() where TTable : struct, IFlatbufferObject { int o = __p.__offset(10); return o != 0 ? (TTable?)__p.__union<TTable>(o) : null; } + public DictionaryEncoding? Dictionary { get { int o = __p.__offset(12); return o != 0 ? (DictionaryEncoding?)(new DictionaryEncoding()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } + public Field? Children(int j) { int o = __p.__offset(14); return o != 0 ? (Field?)(new Field()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int ChildrenLength { get { int o = __p.__offset(14); return o != 0 ? __p.__vector_len(o) : 0; } } + public KeyValue? CustomMetadata(int j) { int o = __p.__offset(16); return o != 0 ? (KeyValue?)(new KeyValue()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int CustomMetadataLength { get { int o = __p.__offset(16); return o != 0 ? __p.__vector_len(o) : 0; } } + + public static Offset<Field> CreateField(FlatBufferBuilder builder, + StringOffset nameOffset = default(StringOffset), + bool nullable = false, + Type type_type = Flatbuf.Type.NONE, + int typeOffset = 0, + Offset<DictionaryEncoding> dictionaryOffset = default(Offset<DictionaryEncoding>), + VectorOffset childrenOffset = default(VectorOffset), + VectorOffset custom_metadataOffset = default(VectorOffset)) { + builder.StartObject(7); + Field.AddCustomMetadata(builder, custom_metadataOffset); + Field.AddChildren(builder, childrenOffset); + Field.AddDictionary(builder, dictionaryOffset); + Field.AddType(builder, typeOffset); + Field.AddName(builder, nameOffset); + Field.AddTypeType(builder, type_type); + Field.AddNullable(builder, nullable); + return Field.EndField(builder); + } + + public static void StartField(FlatBufferBuilder builder) { builder.StartObject(7); } + public static void AddName(FlatBufferBuilder builder, StringOffset nameOffset) { builder.AddOffset(0, nameOffset.Value, 0); } + public static void AddNullable(FlatBufferBuilder builder, bool nullable) { builder.AddBool(1, nullable, false); } + public static void AddTypeType(FlatBufferBuilder builder, Type typeType) { builder.AddByte(2, (byte)typeType, 0); } + public static void AddType(FlatBufferBuilder builder, int typeOffset) { builder.AddOffset(3, typeOffset, 0); } + public static void AddDictionary(FlatBufferBuilder builder, Offset<DictionaryEncoding> dictionaryOffset) { builder.AddOffset(4, dictionaryOffset.Value, 0); } + public static void AddChildren(FlatBufferBuilder builder, VectorOffset childrenOffset) { builder.AddOffset(5, childrenOffset.Value, 0); } + public static VectorOffset CreateChildrenVector(FlatBufferBuilder builder, Offset<Field>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateChildrenVectorBlock(FlatBufferBuilder builder, Offset<Field>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartChildrenVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static void AddCustomMetadata(FlatBufferBuilder builder, VectorOffset customMetadataOffset) { builder.AddOffset(6, customMetadataOffset.Value, 0); } + public static VectorOffset CreateCustomMetadataVector(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateCustomMetadataVectorBlock(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static Offset<Field> EndField(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Field>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FieldNode.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FieldNode.cs new file mode 100644 index 000000000..811e10ea4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FieldNode.cs @@ -0,0 +1,44 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null] +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +internal struct FieldNode : IFlatbufferObject +{ + private Struct __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public FieldNode __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// The number of value slots in the Arrow array at this level of a nested + /// tree + public long Length { get { return __p.bb.GetLong(__p.bb_pos + 0); } } + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical validity bitmap out as a materialized buffer, + /// instead setting the length of the bitmap buffer to 0. + public long NullCount { get { return __p.bb.GetLong(__p.bb_pos + 8); } } + + public static Offset<FieldNode> CreateFieldNode(FlatBufferBuilder builder, long Length, long NullCount) { + builder.Prep(8, 16); + builder.PutLong(NullCount); + builder.PutLong(Length); + return new Offset<FieldNode>(builder.Offset); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeBinary.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeBinary.cs new file mode 100644 index 000000000..b6414a23b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeBinary.cs @@ -0,0 +1,39 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct FixedSizeBinary : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static FixedSizeBinary GetRootAsFixedSizeBinary(ByteBuffer _bb) { return GetRootAsFixedSizeBinary(_bb, new FixedSizeBinary()); } + public static FixedSizeBinary GetRootAsFixedSizeBinary(ByteBuffer _bb, FixedSizeBinary obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public FixedSizeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Number of bytes per value + public int ByteWidth { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)0; } } + + public static Offset<FixedSizeBinary> CreateFixedSizeBinary(FlatBufferBuilder builder, + int byteWidth = 0) { + builder.StartObject(1); + FixedSizeBinary.AddByteWidth(builder, byteWidth); + return FixedSizeBinary.EndFixedSizeBinary(builder); + } + + public static void StartFixedSizeBinary(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddByteWidth(FlatBufferBuilder builder, int byteWidth) { builder.AddInt(0, byteWidth, 0); } + public static Offset<FixedSizeBinary> EndFixedSizeBinary(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<FixedSizeBinary>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeList.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeList.cs new file mode 100644 index 000000000..0ca69b7a1 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FixedSizeList.cs @@ -0,0 +1,39 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct FixedSizeList : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static FixedSizeList GetRootAsFixedSizeList(ByteBuffer _bb) { return GetRootAsFixedSizeList(_bb, new FixedSizeList()); } + public static FixedSizeList GetRootAsFixedSizeList(ByteBuffer _bb, FixedSizeList obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public FixedSizeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Number of list items per value + public int ListSize { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)0; } } + + public static Offset<FixedSizeList> CreateFixedSizeList(FlatBufferBuilder builder, + int listSize = 0) { + builder.StartObject(1); + FixedSizeList.AddListSize(builder, listSize); + return FixedSizeList.EndFixedSizeList(builder); + } + + public static void StartFixedSizeList(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddListSize(FlatBufferBuilder builder, int listSize) { builder.AddInt(0, listSize, 0); } + public static Offset<FixedSizeList> EndFixedSizeList(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<FixedSizeList>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs new file mode 100644 index 000000000..91cd5cccb --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs @@ -0,0 +1,891 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// There are 3 #defines that have an impact on performance / features of this ByteBuffer implementation +// +// UNSAFE_BYTEBUFFER +// This will use unsafe code to manipulate the underlying byte array. This +// can yield a reasonable performance increase. +// +// BYTEBUFFER_NO_BOUNDS_CHECK +// This will disable the bounds check asserts to the byte array. This can +// yield a small performance gain in normal code.. +// +// ENABLE_SPAN_T +// This will enable reading and writing blocks of memory with a Span<T> instead if just +// T[]. You can also enable writing directly to shared memory or other types of memory +// by providing a custom implementation of ByteBufferAllocator. +// ENABLE_SPAN_T also requires UNSAFE_BYTEBUFFER to be defined +// +// Using UNSAFE_BYTEBUFFER and BYTEBUFFER_NO_BOUNDS_CHECK together can yield a +// performance gain of ~15% for some operations, however doing so is potentially +// dangerous. Do so at your own risk! +// + +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; + +#if ENABLE_SPAN_T +using System.Buffers.Binary; +#endif + +#if ENABLE_SPAN_T && !UNSAFE_BYTEBUFFER +#error ENABLE_SPAN_T requires UNSAFE_BYTEBUFFER to also be defined +#endif + +namespace FlatBuffers +{ + internal abstract class ByteBufferAllocator + { +#if ENABLE_SPAN_T + public abstract Span<byte> Span { get; } + public abstract ReadOnlySpan<byte> ReadOnlySpan { get; } + public abstract Memory<byte> Memory { get; } + public abstract ReadOnlyMemory<byte> ReadOnlyMemory { get; } + +#else + public byte[] Buffer + { + get; + protected set; + } +#endif + + public int Length + { + get; + protected set; + } + + public abstract void GrowFront(int newSize); + } + + internal sealed class ByteArrayAllocator : ByteBufferAllocator + { + private byte[] _buffer; + + public ByteArrayAllocator(byte[] buffer) + { + _buffer = buffer; + InitBuffer(); + } + + public override void GrowFront(int newSize) + { + if ((Length & 0xC0000000) != 0) + throw new Exception( + "ByteBuffer: cannot grow buffer beyond 2 gigabytes."); + + if (newSize < Length) + throw new Exception("ByteBuffer: cannot truncate buffer."); + + byte[] newBuffer = new byte[newSize]; + System.Buffer.BlockCopy(_buffer, 0, newBuffer, newSize - Length, Length); + _buffer = newBuffer; + InitBuffer(); + } + +#if ENABLE_SPAN_T + public override Span<byte> Span => _buffer; + public override ReadOnlySpan<byte> ReadOnlySpan => _buffer; + public override Memory<byte> Memory => _buffer; + public override ReadOnlyMemory<byte> ReadOnlyMemory => _buffer; +#endif + + private void InitBuffer() + { + Length = _buffer.Length; +#if !ENABLE_SPAN_T + Buffer = _buffer; +#endif + } + } + + /// <summary> + /// Class to mimic Java's ByteBuffer which is used heavily in Flatbuffers. + /// </summary> + internal class ByteBuffer + { + private ByteBufferAllocator _buffer; + private int _pos; // Must track start of the buffer. + + public ByteBuffer(ByteBufferAllocator allocator, int position) + { + _buffer = allocator; + _pos = position; + } + + public ByteBuffer(int size) : this(new byte[size]) { } + + public ByteBuffer(byte[] buffer) : this(buffer, 0) { } + + public ByteBuffer(byte[] buffer, int pos) + { + _buffer = new ByteArrayAllocator(buffer); + _pos = pos; + } + + public int Position + { + get { return _pos; } + set { _pos = value; } + } + + public int Length { get { return _buffer.Length; } } + + public void Reset() + { + _pos = 0; + } + + // Create a new ByteBuffer on the same underlying data. + // The new ByteBuffer's position will be same as this buffer's. + public ByteBuffer Duplicate() + { + return new ByteBuffer(_buffer, Position); + } + + // Increases the size of the ByteBuffer, and copies the old data towards + // the end of the new buffer. + public void GrowFront(int newSize) + { + _buffer.GrowFront(newSize); + } + + public byte[] ToArray(int pos, int len) + { + return ToArray<byte>(pos, len); + } + + /// <summary> + /// A lookup of type sizes. Used instead of Marshal.SizeOf() which has additional + /// overhead, but also is compatible with generic functions for simplified code. + /// </summary> + private static Dictionary<Type, int> genericSizes = new Dictionary<Type, int>() + { + { typeof(bool), sizeof(bool) }, + { typeof(float), sizeof(float) }, + { typeof(double), sizeof(double) }, + { typeof(sbyte), sizeof(sbyte) }, + { typeof(byte), sizeof(byte) }, + { typeof(short), sizeof(short) }, + { typeof(ushort), sizeof(ushort) }, + { typeof(int), sizeof(int) }, + { typeof(uint), sizeof(uint) }, + { typeof(ulong), sizeof(ulong) }, + { typeof(long), sizeof(long) }, + }; + + /// <summary> + /// Get the wire-size (in bytes) of a type supported by flatbuffers. + /// </summary> + /// <param name="t">The type to get the wire size of</param> + /// <returns></returns> + public static int SizeOf<T>() + { + return genericSizes[typeof(T)]; + } + + /// <summary> + /// Checks if the Type provided is supported as scalar value + /// </summary> + /// <typeparam name="T">The Type to check</typeparam> + /// <returns>True if the type is a scalar type that is supported, falsed otherwise</returns> + public static bool IsSupportedType<T>() + { + return genericSizes.ContainsKey(typeof(T)); + } + + /// <summary> + /// Get the wire-size (in bytes) of a typed array + /// </summary> + /// <typeparam name="T">The type of the array</typeparam> + /// <param name="x">The array to get the size of</param> + /// <returns>The number of bytes the array takes on wire</returns> + public static int ArraySize<T>(T[] x) + { + return SizeOf<T>() * x.Length; + } + +#if ENABLE_SPAN_T + public static int ArraySize<T>(Span<T> x) + { + return SizeOf<T>() * x.Length; + } +#endif + + // Get a portion of the buffer casted into an array of type T, given + // the buffer position and length. +#if ENABLE_SPAN_T + public T[] ToArray<T>(int pos, int len) + where T : struct + { + AssertOffsetAndLength(pos, len); + return MemoryMarshal.Cast<byte, T>(_buffer.ReadOnlySpan.Slice(pos)).Slice(0, len).ToArray(); + } +#else + public T[] ToArray<T>(int pos, int len) + where T : struct + { + AssertOffsetAndLength(pos, len); + T[] arr = new T[len]; + Buffer.BlockCopy(_buffer.Buffer, pos, arr, 0, ArraySize(arr)); + return arr; + } +#endif + + public byte[] ToSizedArray() + { + return ToArray<byte>(Position, Length - Position); + } + + public byte[] ToFullArray() + { + return ToArray<byte>(0, Length); + } + +#if ENABLE_SPAN_T + public ReadOnlyMemory<byte> ToReadOnlyMemory(int pos, int len) + { + return _buffer.ReadOnlyMemory.Slice(pos, len); + } + + public Memory<byte> ToMemory(int pos, int len) + { + return _buffer.Memory.Slice(pos, len); + } + + public Span<byte> ToSpan(int pos, int len) + { + return _buffer.Span.Slice(pos, len); + } +#else + public ArraySegment<byte> ToArraySegment(int pos, int len) + { + return new ArraySegment<byte>(_buffer.Buffer, pos, len); + } + + public MemoryStream ToMemoryStream(int pos, int len) + { + return new MemoryStream(_buffer.Buffer, pos, len); + } +#endif + +#if !UNSAFE_BYTEBUFFER + // Pre-allocated helper arrays for conversion. + private float[] floathelper = new[] { 0.0f }; + private int[] inthelper = new[] { 0 }; + private double[] doublehelper = new[] { 0.0 }; + private ulong[] ulonghelper = new[] { 0UL }; +#endif // !UNSAFE_BYTEBUFFER + + // Helper functions for the unsafe version. + static public ushort ReverseBytes(ushort input) + { + return (ushort)(((input & 0x00FFU) << 8) | + ((input & 0xFF00U) >> 8)); + } + static public uint ReverseBytes(uint input) + { + return ((input & 0x000000FFU) << 24) | + ((input & 0x0000FF00U) << 8) | + ((input & 0x00FF0000U) >> 8) | + ((input & 0xFF000000U) >> 24); + } + static public ulong ReverseBytes(ulong input) + { + return (((input & 0x00000000000000FFUL) << 56) | + ((input & 0x000000000000FF00UL) << 40) | + ((input & 0x0000000000FF0000UL) << 24) | + ((input & 0x00000000FF000000UL) << 8) | + ((input & 0x000000FF00000000UL) >> 8) | + ((input & 0x0000FF0000000000UL) >> 24) | + ((input & 0x00FF000000000000UL) >> 40) | + ((input & 0xFF00000000000000UL) >> 56)); + } + +#if !UNSAFE_BYTEBUFFER + // Helper functions for the safe (but slower) version. + protected void WriteLittleEndian(int offset, int count, ulong data) + { + if (BitConverter.IsLittleEndian) + { + for (int i = 0; i < count; i++) + { + _buffer.Buffer[offset + i] = (byte)(data >> i * 8); + } + } + else + { + for (int i = 0; i < count; i++) + { + _buffer.Buffer[offset + count - 1 - i] = (byte)(data >> i * 8); + } + } + } + + protected ulong ReadLittleEndian(int offset, int count) + { + AssertOffsetAndLength(offset, count); + ulong r = 0; + if (BitConverter.IsLittleEndian) + { + for (int i = 0; i < count; i++) + { + r |= (ulong)_buffer.Buffer[offset + i] << i * 8; + } + } + else + { + for (int i = 0; i < count; i++) + { + r |= (ulong)_buffer.Buffer[offset + count - 1 - i] << i * 8; + } + } + return r; + } +#endif // !UNSAFE_BYTEBUFFER + + private void AssertOffsetAndLength(int offset, int length) + { +#if !BYTEBUFFER_NO_BOUNDS_CHECK + if (offset < 0 || + offset > _buffer.Length - length) + throw new ArgumentOutOfRangeException(); +#endif + } + +#if ENABLE_SPAN_T + + public void PutSbyte(int offset, sbyte value) + { + AssertOffsetAndLength(offset, sizeof(sbyte)); + _buffer.Span[offset] = (byte)value; + } + + public void PutByte(int offset, byte value) + { + AssertOffsetAndLength(offset, sizeof(byte)); + _buffer.Span[offset] = value; + } + + public void PutByte(int offset, byte value, int count) + { + AssertOffsetAndLength(offset, sizeof(byte) * count); + Span<byte> span = _buffer.Span.Slice(offset, count); + for (var i = 0; i < span.Length; ++i) + span[i] = value; + } +#else + public void PutSbyte(int offset, sbyte value) + { + AssertOffsetAndLength(offset, sizeof(sbyte)); + _buffer.Buffer[offset] = (byte)value; + } + + public void PutByte(int offset, byte value) + { + AssertOffsetAndLength(offset, sizeof(byte)); + _buffer.Buffer[offset] = value; + } + + public void PutByte(int offset, byte value, int count) + { + AssertOffsetAndLength(offset, sizeof(byte) * count); + for (var i = 0; i < count; ++i) + _buffer.Buffer[offset + i] = value; + } +#endif + + // this method exists in order to conform with Java ByteBuffer standards + public void Put(int offset, byte value) + { + PutByte(offset, value); + } + +#if ENABLE_SPAN_T + public unsafe void PutStringUTF8(int offset, string value) + { + AssertOffsetAndLength(offset, value.Length); + fixed (char* s = value) + { + fixed (byte* buffer = &MemoryMarshal.GetReference(_buffer.Span)) + { + Encoding.UTF8.GetBytes(s, value.Length, buffer + offset, Length - offset); + } + } + } +#else + public void PutStringUTF8(int offset, string value) + { + AssertOffsetAndLength(offset, value.Length); + Encoding.UTF8.GetBytes(value, 0, value.Length, + _buffer.Buffer, offset); + } +#endif + +#if UNSAFE_BYTEBUFFER + // Unsafe but more efficient versions of Put*. + public void PutShort(int offset, short value) + { + PutUshort(offset, (ushort)value); + } + + public unsafe void PutUshort(int offset, ushort value) + { + AssertOffsetAndLength(offset, sizeof(ushort)); +#if ENABLE_SPAN_T + Span<byte> span = _buffer.Span.Slice(offset); + BinaryPrimitives.WriteUInt16LittleEndian(span, value); +#else + fixed (byte* ptr = _buffer.Buffer) + { + *(ushort*)(ptr + offset) = BitConverter.IsLittleEndian + ? value + : ReverseBytes(value); + } +#endif + } + + public void PutInt(int offset, int value) + { + PutUint(offset, (uint)value); + } + + public unsafe void PutUint(int offset, uint value) + { + AssertOffsetAndLength(offset, sizeof(uint)); +#if ENABLE_SPAN_T + Span<byte> span = _buffer.Span.Slice(offset); + BinaryPrimitives.WriteUInt32LittleEndian(span, value); +#else + fixed (byte* ptr = _buffer.Buffer) + { + *(uint*)(ptr + offset) = BitConverter.IsLittleEndian + ? value + : ReverseBytes(value); + } +#endif + } + + public unsafe void PutLong(int offset, long value) + { + PutUlong(offset, (ulong)value); + } + + public unsafe void PutUlong(int offset, ulong value) + { + AssertOffsetAndLength(offset, sizeof(ulong)); +#if ENABLE_SPAN_T + Span<byte> span = _buffer.Span.Slice(offset); + BinaryPrimitives.WriteUInt64LittleEndian(span, value); +#else + fixed (byte* ptr = _buffer.Buffer) + { + *(ulong*)(ptr + offset) = BitConverter.IsLittleEndian + ? value + : ReverseBytes(value); + } +#endif + } + + public unsafe void PutFloat(int offset, float value) + { + AssertOffsetAndLength(offset, sizeof(float)); +#if ENABLE_SPAN_T + fixed (byte* ptr = &MemoryMarshal.GetReference(_buffer.Span)) +#else + fixed (byte* ptr = _buffer.Buffer) +#endif + { + if (BitConverter.IsLittleEndian) + { + *(float*)(ptr + offset) = value; + } + else + { + *(uint*)(ptr + offset) = ReverseBytes(*(uint*)(&value)); + } + } + } + + public unsafe void PutDouble(int offset, double value) + { + AssertOffsetAndLength(offset, sizeof(double)); +#if ENABLE_SPAN_T + fixed (byte* ptr = &MemoryMarshal.GetReference(_buffer.Span)) +#else + fixed (byte* ptr = _buffer.Buffer) +#endif + { + if (BitConverter.IsLittleEndian) + { + *(double*)(ptr + offset) = value; + } + else + { + *(ulong*)(ptr + offset) = ReverseBytes(*(ulong*)(&value)); + } + } + } +#else // !UNSAFE_BYTEBUFFER + // Slower versions of Put* for when unsafe code is not allowed. + public void PutShort(int offset, short value) + { + AssertOffsetAndLength(offset, sizeof(short)); + WriteLittleEndian(offset, sizeof(short), (ulong)value); + } + + public void PutUshort(int offset, ushort value) + { + AssertOffsetAndLength(offset, sizeof(ushort)); + WriteLittleEndian(offset, sizeof(ushort), (ulong)value); + } + + public void PutInt(int offset, int value) + { + AssertOffsetAndLength(offset, sizeof(int)); + WriteLittleEndian(offset, sizeof(int), (ulong)value); + } + + public void PutUint(int offset, uint value) + { + AssertOffsetAndLength(offset, sizeof(uint)); + WriteLittleEndian(offset, sizeof(uint), (ulong)value); + } + + public void PutLong(int offset, long value) + { + AssertOffsetAndLength(offset, sizeof(long)); + WriteLittleEndian(offset, sizeof(long), (ulong)value); + } + + public void PutUlong(int offset, ulong value) + { + AssertOffsetAndLength(offset, sizeof(ulong)); + WriteLittleEndian(offset, sizeof(ulong), value); + } + + public void PutFloat(int offset, float value) + { + AssertOffsetAndLength(offset, sizeof(float)); + floathelper[0] = value; + Buffer.BlockCopy(floathelper, 0, inthelper, 0, sizeof(float)); + WriteLittleEndian(offset, sizeof(float), (ulong)inthelper[0]); + } + + public void PutDouble(int offset, double value) + { + AssertOffsetAndLength(offset, sizeof(double)); + doublehelper[0] = value; + Buffer.BlockCopy(doublehelper, 0, ulonghelper, 0, sizeof(double)); + WriteLittleEndian(offset, sizeof(double), ulonghelper[0]); + } + +#endif // UNSAFE_BYTEBUFFER + +#if ENABLE_SPAN_T + public sbyte GetSbyte(int index) + { + AssertOffsetAndLength(index, sizeof(sbyte)); + return (sbyte)_buffer.ReadOnlySpan[index]; + } + + public byte Get(int index) + { + AssertOffsetAndLength(index, sizeof(byte)); + return _buffer.ReadOnlySpan[index]; + } +#else + public sbyte GetSbyte(int index) + { + AssertOffsetAndLength(index, sizeof(sbyte)); + return (sbyte)_buffer.Buffer[index]; + } + + public byte Get(int index) + { + AssertOffsetAndLength(index, sizeof(byte)); + return _buffer.Buffer[index]; + } +#endif + +#if ENABLE_SPAN_T + public unsafe string GetStringUTF8(int startPos, int len) + { + fixed (byte* buffer = &MemoryMarshal.GetReference(_buffer.ReadOnlySpan.Slice(startPos))) + { + return Encoding.UTF8.GetString(buffer, len); + } + } +#else + public string GetStringUTF8(int startPos, int len) + { + return Encoding.UTF8.GetString(_buffer.Buffer, startPos, len); + } +#endif + +#if UNSAFE_BYTEBUFFER + // Unsafe but more efficient versions of Get*. + public short GetShort(int offset) + { + return (short)GetUshort(offset); + } + + public unsafe ushort GetUshort(int offset) + { + AssertOffsetAndLength(offset, sizeof(ushort)); +#if ENABLE_SPAN_T + ReadOnlySpan<byte> span = _buffer.ReadOnlySpan.Slice(offset); + return BinaryPrimitives.ReadUInt16LittleEndian(span); +#else + fixed (byte* ptr = _buffer.Buffer) + { + return BitConverter.IsLittleEndian + ? *(ushort*)(ptr + offset) + : ReverseBytes(*(ushort*)(ptr + offset)); + } +#endif + } + + public int GetInt(int offset) + { + return (int)GetUint(offset); + } + + public unsafe uint GetUint(int offset) + { + AssertOffsetAndLength(offset, sizeof(uint)); +#if ENABLE_SPAN_T + ReadOnlySpan<byte> span = _buffer.ReadOnlySpan.Slice(offset); + return BinaryPrimitives.ReadUInt32LittleEndian(span); +#else + fixed (byte* ptr = _buffer.Buffer) + { + return BitConverter.IsLittleEndian + ? *(uint*)(ptr + offset) + : ReverseBytes(*(uint*)(ptr + offset)); + } +#endif + } + + public long GetLong(int offset) + { + return (long)GetUlong(offset); + } + + public unsafe ulong GetUlong(int offset) + { + AssertOffsetAndLength(offset, sizeof(ulong)); +#if ENABLE_SPAN_T + ReadOnlySpan<byte> span = _buffer.ReadOnlySpan.Slice(offset); + return BinaryPrimitives.ReadUInt64LittleEndian(span); +#else + fixed (byte* ptr = _buffer.Buffer) + { + return BitConverter.IsLittleEndian + ? *(ulong*)(ptr + offset) + : ReverseBytes(*(ulong*)(ptr + offset)); + } +#endif + } + + public unsafe float GetFloat(int offset) + { + AssertOffsetAndLength(offset, sizeof(float)); +#if ENABLE_SPAN_T + fixed (byte* ptr = &MemoryMarshal.GetReference(_buffer.ReadOnlySpan)) +#else + fixed (byte* ptr = _buffer.Buffer) +#endif + { + if (BitConverter.IsLittleEndian) + { + return *(float*)(ptr + offset); + } + else + { + uint uvalue = ReverseBytes(*(uint*)(ptr + offset)); + return *(float*)(&uvalue); + } + } + } + + public unsafe double GetDouble(int offset) + { + AssertOffsetAndLength(offset, sizeof(double)); +#if ENABLE_SPAN_T + fixed (byte* ptr = &MemoryMarshal.GetReference(_buffer.ReadOnlySpan)) +#else + fixed (byte* ptr = _buffer.Buffer) +#endif + { + if (BitConverter.IsLittleEndian) + { + return *(double*)(ptr + offset); + } + else + { + ulong uvalue = ReverseBytes(*(ulong*)(ptr + offset)); + return *(double*)(&uvalue); + } + } + } +#else // !UNSAFE_BYTEBUFFER + // Slower versions of Get* for when unsafe code is not allowed. + public short GetShort(int index) + { + return (short)ReadLittleEndian(index, sizeof(short)); + } + + public ushort GetUshort(int index) + { + return (ushort)ReadLittleEndian(index, sizeof(ushort)); + } + + public int GetInt(int index) + { + return (int)ReadLittleEndian(index, sizeof(int)); + } + + public uint GetUint(int index) + { + return (uint)ReadLittleEndian(index, sizeof(uint)); + } + + public long GetLong(int index) + { + return (long)ReadLittleEndian(index, sizeof(long)); + } + + public ulong GetUlong(int index) + { + return ReadLittleEndian(index, sizeof(ulong)); + } + + public float GetFloat(int index) + { + int i = (int)ReadLittleEndian(index, sizeof(float)); + inthelper[0] = i; + Buffer.BlockCopy(inthelper, 0, floathelper, 0, sizeof(float)); + return floathelper[0]; + } + + public double GetDouble(int index) + { + ulong i = ReadLittleEndian(index, sizeof(double)); + // There's Int64BitsToDouble but it uses unsafe code internally. + ulonghelper[0] = i; + Buffer.BlockCopy(ulonghelper, 0, doublehelper, 0, sizeof(double)); + return doublehelper[0]; + } +#endif // UNSAFE_BYTEBUFFER + + /// <summary> + /// Copies an array of type T into this buffer, ending at the given + /// offset into this buffer. The starting offset is calculated based on the length + /// of the array and is the value returned. + /// </summary> + /// <typeparam name="T">The type of the input data (must be a struct)</typeparam> + /// <param name="offset">The offset into this buffer where the copy will end</param> + /// <param name="x">The array to copy data from</param> + /// <returns>The 'start' location of this buffer now, after the copy completed</returns> + public int Put<T>(int offset, T[] x) + where T : struct + { + if (x == null) + { + throw new ArgumentNullException("Cannot put a null array"); + } + + if (x.Length == 0) + { + throw new ArgumentException("Cannot put an empty array"); + } + + if (!IsSupportedType<T>()) + { + throw new ArgumentException("Cannot put an array of type " + + typeof(T) + " into this buffer"); + } + + if (BitConverter.IsLittleEndian) + { + int numBytes = ByteBuffer.ArraySize(x); + offset -= numBytes; + AssertOffsetAndLength(offset, numBytes); + // if we are LE, just do a block copy +#if ENABLE_SPAN_T + MemoryMarshal.Cast<T, byte>(x).CopyTo(_buffer.Span.Slice(offset, numBytes)); +#else + Buffer.BlockCopy(x, 0, _buffer.Buffer, offset, numBytes); +#endif + } + else + { + throw new NotImplementedException("Big Endian Support not implemented yet " + + "for putting typed arrays"); + // if we are BE, we have to swap each element by itself + //for(int i = x.Length - 1; i >= 0; i--) + //{ + // todo: low priority, but need to genericize the Put<T>() functions + //} + } + return offset; + } + +#if ENABLE_SPAN_T + public int Put<T>(int offset, Span<T> x) + where T : struct + { + if (x.Length == 0) + { + throw new ArgumentException("Cannot put an empty array"); + } + + if (!IsSupportedType<T>()) + { + throw new ArgumentException("Cannot put an array of type " + + typeof(T) + " into this buffer"); + } + + if (BitConverter.IsLittleEndian) + { + int numBytes = ByteBuffer.ArraySize(x); + offset -= numBytes; + AssertOffsetAndLength(offset, numBytes); + // if we are LE, just do a block copy + MemoryMarshal.Cast<T, byte>(x).CopyTo(_buffer.Span.Slice(offset, numBytes)); + } + else + { + throw new NotImplementedException("Big Endian Support not implemented yet " + + "for putting typed arrays"); + // if we are BE, we have to swap each element by itself + //for(int i = x.Length - 1; i >= 0; i--) + //{ + // todo: low priority, but need to genericize the Put<T>() functions + //} + } + return offset; + } +#endif + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBufferUtil.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBufferUtil.cs new file mode 100644 index 000000000..cfba4305e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBufferUtil.cs @@ -0,0 +1,39 @@ +/* + * Copyright 2017 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace FlatBuffers +{ + /// <summary> + /// Class that collects utility functions around `ByteBuffer`. + /// </summary> + internal class ByteBufferUtil + { + // Extract the size prefix from a `ByteBuffer`. + public static int GetSizePrefix(ByteBuffer bb) { + return bb.GetInt(bb.Position); + } + + // Create a duplicate of a size-prefixed `ByteBuffer` that has its position + // advanced just past the size prefix. + public static ByteBuffer RemoveSizePrefix(ByteBuffer bb) { + ByteBuffer s = bb.Duplicate(); + s.Position += FlatBufferConstants.SizePrefixLength; + return s; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferBuilder.cs new file mode 100644 index 000000000..65873a634 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferBuilder.cs @@ -0,0 +1,812 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +using System; +using System.Text; + +/// @file +/// @addtogroup flatbuffers_csharp_api +/// @{ + +namespace FlatBuffers +{ + /// <summary> + /// Responsible for building up and accessing a FlatBuffer formatted byte + /// array (via ByteBuffer). + /// </summary> + internal class FlatBufferBuilder + { + private int _space; + private ByteBuffer _bb; + private int _minAlign = 1; + + // The vtable for the current table (if _vtableSize >= 0) + private int[] _vtable = new int[16]; + // The size of the vtable. -1 indicates no vtable + private int _vtableSize = -1; + // Starting offset of the current struct/table. + private int _objectStart; + // List of offsets of all vtables. + private int[] _vtables = new int[16]; + // Number of entries in `vtables` in use. + private int _numVtables = 0; + // For the current vector being built. + private int _vectorNumElems = 0; + + /// <summary> + /// Create a FlatBufferBuilder with a given initial size. + /// </summary> + /// <param name="initialSize"> + /// The initial size to use for the internal buffer. + /// </param> + public FlatBufferBuilder(int initialSize) + { + if (initialSize <= 0) + throw new ArgumentOutOfRangeException("initialSize", + initialSize, "Must be greater than zero"); + _space = initialSize; + _bb = new ByteBuffer(initialSize); + } + + /// <summary> + /// Create a FlatBufferBuilder backed by the passed in ByteBuffer + /// </summary> + /// <param name="buffer">The ByteBuffer to write to</param> + public FlatBufferBuilder(ByteBuffer buffer) + { + _bb = buffer; + _space = buffer.Length; + buffer.Reset(); + } + + /// <summary> + /// Reset the FlatBufferBuilder by purging all data that it holds. + /// </summary> + public void Clear() + { + _space = _bb.Length; + _bb.Reset(); + _minAlign = 1; + while (_vtableSize > 0) _vtable[--_vtableSize] = 0; + _vtableSize = -1; + _objectStart = 0; + _numVtables = 0; + _vectorNumElems = 0; + } + + /// <summary> + /// Gets and sets a Boolean to disable the optimization when serializing + /// default values to a Table. + /// + /// In order to save space, fields that are set to their default value + /// don't get serialized into the buffer. + /// </summary> + public bool ForceDefaults { get; set; } + + /// @cond FLATBUFFERS_INTERNAL + + public int Offset { get { return _bb.Length - _space; } } + + public void Pad(int size) + { + _bb.PutByte(_space -= size, 0, size); + } + + // Doubles the size of the ByteBuffer, and copies the old data towards + // the end of the new buffer (since we build the buffer backwards). + void GrowBuffer() + { + _bb.GrowFront(_bb.Length << 1); + } + + // Prepare to write an element of `size` after `additional_bytes` + // have been written, e.g. if you write a string, you need to align + // such the int length field is aligned to SIZEOF_INT, and the string + // data follows it directly. + // If all you need to do is align, `additional_bytes` will be 0. + public void Prep(int size, int additionalBytes) + { + // Track the biggest thing we've ever aligned to. + if (size > _minAlign) + _minAlign = size; + // Find the amount of alignment needed such that `size` is properly + // aligned after `additional_bytes` + var alignSize = + ((~((int)_bb.Length - _space + additionalBytes)) + 1) & + (size - 1); + // Reallocate the buffer if needed. + while (_space < alignSize + size + additionalBytes) + { + var oldBufSize = (int)_bb.Length; + GrowBuffer(); + _space += (int)_bb.Length - oldBufSize; + + } + if (alignSize > 0) + Pad(alignSize); + } + + public void PutBool(bool x) + { + _bb.PutByte(_space -= sizeof(byte), (byte)(x ? 1 : 0)); + } + + public void PutSbyte(sbyte x) + { + _bb.PutSbyte(_space -= sizeof(sbyte), x); + } + + public void PutByte(byte x) + { + _bb.PutByte(_space -= sizeof(byte), x); + } + + public void PutShort(short x) + { + _bb.PutShort(_space -= sizeof(short), x); + } + + public void PutUshort(ushort x) + { + _bb.PutUshort(_space -= sizeof(ushort), x); + } + + public void PutInt(int x) + { + _bb.PutInt(_space -= sizeof(int), x); + } + + public void PutUint(uint x) + { + _bb.PutUint(_space -= sizeof(uint), x); + } + + public void PutLong(long x) + { + _bb.PutLong(_space -= sizeof(long), x); + } + + public void PutUlong(ulong x) + { + _bb.PutUlong(_space -= sizeof(ulong), x); + } + + public void PutFloat(float x) + { + _bb.PutFloat(_space -= sizeof(float), x); + } + + /// <summary> + /// Puts an array of type T into this builder at the + /// current offset + /// </summary> + /// <typeparam name="T">The type of the input data </typeparam> + /// <param name="x">The array to copy data from</param> + public void Put<T>(T[] x) + where T : struct + { + _space = _bb.Put(_space, x); + } + +#if ENABLE_SPAN_T + /// <summary> + /// Puts a span of type T into this builder at the + /// current offset + /// </summary> + /// <typeparam name="T">The type of the input data </typeparam> + /// <param name="x">The span to copy data from</param> + public void Put<T>(Span<T> x) + where T : struct + { + _space = _bb.Put(_space, x); + } +#endif + + public void PutDouble(double x) + { + _bb.PutDouble(_space -= sizeof(double), x); + } + /// @endcond + + /// <summary> + /// Add a `bool` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `bool` to add to the buffer.</param> + public void AddBool(bool x) { Prep(sizeof(byte), 0); PutBool(x); } + + /// <summary> + /// Add a `sbyte` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `sbyte` to add to the buffer.</param> + public void AddSbyte(sbyte x) { Prep(sizeof(sbyte), 0); PutSbyte(x); } + + /// <summary> + /// Add a `byte` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `byte` to add to the buffer.</param> + public void AddByte(byte x) { Prep(sizeof(byte), 0); PutByte(x); } + + /// <summary> + /// Add a `short` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `short` to add to the buffer.</param> + public void AddShort(short x) { Prep(sizeof(short), 0); PutShort(x); } + + /// <summary> + /// Add an `ushort` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `ushort` to add to the buffer.</param> + public void AddUshort(ushort x) { Prep(sizeof(ushort), 0); PutUshort(x); } + + /// <summary> + /// Add an `int` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `int` to add to the buffer.</param> + public void AddInt(int x) { Prep(sizeof(int), 0); PutInt(x); } + + /// <summary> + /// Add an `uint` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `uint` to add to the buffer.</param> + public void AddUint(uint x) { Prep(sizeof(uint), 0); PutUint(x); } + + /// <summary> + /// Add a `long` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `long` to add to the buffer.</param> + public void AddLong(long x) { Prep(sizeof(long), 0); PutLong(x); } + + /// <summary> + /// Add an `ulong` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `ulong` to add to the buffer.</param> + public void AddUlong(ulong x) { Prep(sizeof(ulong), 0); PutUlong(x); } + + /// <summary> + /// Add a `float` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `float` to add to the buffer.</param> + public void AddFloat(float x) { Prep(sizeof(float), 0); PutFloat(x); } + + /// <summary> + /// Add an array of type T to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <typeparam name="T">The type of the input data</typeparam> + /// <param name="x">The array to copy data from</param> + public void Add<T>(T[] x) + where T : struct + { + if (x == null) + { + throw new ArgumentNullException("Cannot add a null array"); + } + + if( x.Length == 0) + { + // don't do anything if the array is empty + return; + } + + if(!ByteBuffer.IsSupportedType<T>()) + { + throw new ArgumentException("Cannot add this Type array to the builder"); + } + + int size = ByteBuffer.SizeOf<T>(); + // Need to prep on size (for data alignment) and then we pass the + // rest of the length (minus 1) as additional bytes + Prep(size, size * (x.Length - 1)); + Put(x); + } + +#if ENABLE_SPAN_T + /// <summary> + /// Add a span of type T to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <typeparam name="T">The type of the input data</typeparam> + /// <param name="x">The span to copy data from</param> + public void Add<T>(Span<T> x) + where T : struct + { + if (!ByteBuffer.IsSupportedType<T>()) + { + throw new ArgumentException("Cannot add this Type array to the builder"); + } + + int size = ByteBuffer.SizeOf<T>(); + // Need to prep on size (for data alignment) and then we pass the + // rest of the length (minus 1) as additional bytes + Prep(size, size * (x.Length - 1)); + Put(x); + } +#endif + + /// <summary> + /// Add a `double` to the buffer (aligns the data and grows if necessary). + /// </summary> + /// <param name="x">The `double` to add to the buffer.</param> + public void AddDouble(double x) { Prep(sizeof(double), 0); + PutDouble(x); } + + /// <summary> + /// Adds an offset, relative to where it will be written. + /// </summary> + /// <param name="off">The offset to add to the buffer.</param> + public void AddOffset(int off) + { + Prep(sizeof(int), 0); // Ensure alignment is already done. + if (off > Offset) + throw new ArgumentException(); + + off = Offset - off + sizeof(int); + PutInt(off); + } + + /// @cond FLATBUFFERS_INTERNAL + public void StartVector(int elemSize, int count, int alignment) + { + NotNested(); + _vectorNumElems = count; + Prep(sizeof(int), elemSize * count); + Prep(alignment, elemSize * count); // Just in case alignment > int. + } + /// @endcond + + /// <summary> + /// Writes data necessary to finish a vector construction. + /// </summary> + public VectorOffset EndVector() + { + PutInt(_vectorNumElems); + return new VectorOffset(Offset); + } + + /// <summary> + /// Creates a vector of tables. + /// </summary> + /// <param name="offsets">Offsets of the tables.</param> + public VectorOffset CreateVectorOfTables<T>(Offset<T>[] offsets) where T : struct + { + NotNested(); + StartVector(sizeof(int), offsets.Length, sizeof(int)); + for (int i = offsets.Length - 1; i >= 0; i--) AddOffset(offsets[i].Value); + return EndVector(); + } + + /// @cond FLATBUFFERS_INTERNAL + public void Nested(int obj) + { + // Structs are always stored inline, so need to be created right + // where they are used. You'll get this assert if you created it + // elsewhere. + if (obj != Offset) + throw new Exception( + "FlatBuffers: struct must be serialized inline."); + } + + public void NotNested() + { + // You should not be creating any other objects or strings/vectors + // while an object is being constructed + if (_vtableSize >= 0) + throw new Exception( + "FlatBuffers: object serialization must not be nested."); + } + + public void StartObject(int numfields) + { + if (numfields < 0) + throw new ArgumentOutOfRangeException("Flatbuffers: invalid numfields"); + + NotNested(); + + if (_vtable.Length < numfields) + _vtable = new int[numfields]; + + _vtableSize = numfields; + _objectStart = Offset; + } + + + // Set the current vtable at `voffset` to the current location in the + // buffer. + public void Slot(int voffset) + { + if (voffset >= _vtableSize) + throw new IndexOutOfRangeException("Flatbuffers: invalid voffset"); + + _vtable[voffset] = Offset; + } + + /// <summary> + /// Adds a Boolean to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddBool(int o, bool x, bool d) { if (ForceDefaults || x != d) { AddBool(x); Slot(o); } } + + /// <summary> + /// Adds a SByte to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddSbyte(int o, sbyte x, sbyte d) { if (ForceDefaults || x != d) { AddSbyte(x); Slot(o); } } + + /// <summary> + /// Adds a Byte to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddByte(int o, byte x, byte d) { if (ForceDefaults || x != d) { AddByte(x); Slot(o); } } + + /// <summary> + /// Adds a Int16 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddShort(int o, short x, int d) { if (ForceDefaults || x != d) { AddShort(x); Slot(o); } } + + /// <summary> + /// Adds a UInt16 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddUshort(int o, ushort x, ushort d) { if (ForceDefaults || x != d) { AddUshort(x); Slot(o); } } + + /// <summary> + /// Adds an Int32 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddInt(int o, int x, int d) { if (ForceDefaults || x != d) { AddInt(x); Slot(o); } } + + /// <summary> + /// Adds a UInt32 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddUint(int o, uint x, uint d) { if (ForceDefaults || x != d) { AddUint(x); Slot(o); } } + + /// <summary> + /// Adds an Int64 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddLong(int o, long x, long d) { if (ForceDefaults || x != d) { AddLong(x); Slot(o); } } + + /// <summary> + /// Adds a UInt64 to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddUlong(int o, ulong x, ulong d) { if (ForceDefaults || x != d) { AddUlong(x); Slot(o); } } + + /// <summary> + /// Adds a Single to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddFloat(int o, float x, double d) { if (ForceDefaults || x != d) { AddFloat(x); Slot(o); } } + + /// <summary> + /// Adds a Double to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddDouble(int o, double x, double d) { if (ForceDefaults || x != d) { AddDouble(x); Slot(o); } } + + /// <summary> + /// Adds a buffer offset to the Table at index `o` in its vtable using the value `x` and default `d` + /// </summary> + /// <param name="o">The index into the vtable</param> + /// <param name="x">The value to put into the buffer. If the value is equal to the default + /// and <see cref="ForceDefaults"/> is false, the value will be skipped.</param> + /// <param name="d">The default value to compare the value against</param> + public void AddOffset(int o, int x, int d) { if (ForceDefaults || x != d) { AddOffset(x); Slot(o); } } + /// @endcond + + /// <summary> + /// Encode the string `s` in the buffer using UTF-8. + /// </summary> + /// <param name="s">The string to encode.</param> + /// <returns> + /// The offset in the buffer where the encoded string starts. + /// </returns> + public StringOffset CreateString(string s) + { + NotNested(); + AddByte(0); + var utf8StringLen = Encoding.UTF8.GetByteCount(s); + StartVector(1, utf8StringLen, 1); + _bb.PutStringUTF8(_space -= utf8StringLen, s); + return new StringOffset(EndVector().Value); + } + + +#if ENABLE_SPAN_T + /// <summary> + /// Creates a string in the buffer from a Span containing + /// a UTF8 string. + /// </summary> + /// <param name="chars">the UTF8 string to add to the buffer</param> + /// <returns> + /// The offset in the buffer where the encoded string starts. + /// </returns> + public StringOffset CreateUTF8String(Span<byte> chars) + { + NotNested(); + AddByte(0); + var utf8StringLen = chars.Length; + StartVector(1, utf8StringLen, 1); + _space = _bb.Put(_space, chars); + return new StringOffset(EndVector().Value); + } +#endif + + /// @cond FLATBUFFERS_INTERNAL + // Structs are stored inline, so nothing additional is being added. + // `d` is always 0. + public void AddStruct(int voffset, int x, int d) + { + if (x != d) + { + Nested(x); + Slot(voffset); + } + } + + public int EndObject() + { + if (_vtableSize < 0) + throw new InvalidOperationException( + "Flatbuffers: calling endObject without a startObject"); + + AddInt((int)0); + var vtableloc = Offset; + // Write out the current vtable. + int i = _vtableSize - 1; + // Trim trailing zeroes. + for (; i >= 0 && _vtable[i] == 0; i--) {} + int trimmedSize = i + 1; + for (; i >= 0 ; i--) { + // Offset relative to the start of the table. + short off = (short)(_vtable[i] != 0 + ? vtableloc - _vtable[i] + : 0); + AddShort(off); + + // clear out written entry + _vtable[i] = 0; + } + + const int standardFields = 2; // The fields below: + AddShort((short)(vtableloc - _objectStart)); + AddShort((short)((trimmedSize + standardFields) * + sizeof(short))); + + // Search for an existing vtable that matches the current one. + int existingVtable = 0; + for (i = 0; i < _numVtables; i++) { + int vt1 = _bb.Length - _vtables[i]; + int vt2 = _space; + short len = _bb.GetShort(vt1); + if (len == _bb.GetShort(vt2)) { + for (int j = sizeof(short); j < len; j += sizeof(short)) { + if (_bb.GetShort(vt1 + j) != _bb.GetShort(vt2 + j)) { + goto endLoop; + } + } + existingVtable = _vtables[i]; + break; + } + + endLoop: { } + } + + if (existingVtable != 0) { + // Found a match: + // Remove the current vtable. + _space = _bb.Length - vtableloc; + // Point table to existing vtable. + _bb.PutInt(_space, existingVtable - vtableloc); + } else { + // No match: + // Add the location of the current vtable to the list of + // vtables. + if (_numVtables == _vtables.Length) + { + // Arrays.CopyOf(vtables num_vtables * 2); + var newvtables = new int[ _numVtables * 2]; + Array.Copy(_vtables, newvtables, _vtables.Length); + + _vtables = newvtables; + }; + _vtables[_numVtables++] = Offset; + // Point table to current vtable. + _bb.PutInt(_bb.Length - vtableloc, Offset - vtableloc); + } + + _vtableSize = -1; + return vtableloc; + } + + // This checks a required field has been set in a given table that has + // just been constructed. + public void Required(int table, int field) + { + int table_start = _bb.Length - table; + int vtable_start = table_start - _bb.GetInt(table_start); + bool ok = _bb.GetShort(vtable_start + field) != 0; + // If this fails, the caller will show what field needs to be set. + if (!ok) + throw new InvalidOperationException("FlatBuffers: field " + field + + " must be set"); + } + /// @endcond + + /// <summary> + /// Finalize a buffer, pointing to the given `root_table`. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + /// <param name="sizePrefix"> + /// Whether to prefix the size to the buffer. + /// </param> + protected void Finish(int rootTable, bool sizePrefix) + { + Prep(_minAlign, sizeof(int) + (sizePrefix ? sizeof(int) : 0)); + AddOffset(rootTable); + if (sizePrefix) { + AddInt(_bb.Length - _space); + } + _bb.Position = _space; + } + + /// <summary> + /// Finalize a buffer, pointing to the given `root_table`. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + public void Finish(int rootTable) + { + Finish(rootTable, false); + } + + /// <summary> + /// Finalize a buffer, pointing to the given `root_table`, with the size prefixed. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + public void FinishSizePrefixed(int rootTable) + { + Finish(rootTable, true); + } + + /// <summary> + /// Get the ByteBuffer representing the FlatBuffer. + /// </summary> + /// <remarks> + /// This is typically only called after you call `Finish()`. + /// The actual data starts at the ByteBuffer's current position, + /// not necessarily at `0`. + /// </remarks> + /// <returns> + /// Returns the ByteBuffer for this FlatBuffer. + /// </returns> + public ByteBuffer DataBuffer { get { return _bb; } } + + /// <summary> + /// A utility function to copy and return the ByteBuffer data as a + /// `byte[]`. + /// </summary> + /// <returns> + /// A full copy of the FlatBuffer data. + /// </returns> + public byte[] SizedByteArray() + { + return _bb.ToSizedArray(); + } + + /// <summary> + /// Finalize a buffer, pointing to the given `rootTable`. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + /// <param name="fileIdentifier"> + /// A FlatBuffer file identifier to be added to the buffer before + /// `root_table`. + /// </param> + /// <param name="sizePrefix"> + /// Whether to prefix the size to the buffer. + /// </param> + protected void Finish(int rootTable, string fileIdentifier, bool sizePrefix) + { + Prep(_minAlign, sizeof(int) + (sizePrefix ? sizeof(int) : 0) + + FlatBufferConstants.FileIdentifierLength); + if (fileIdentifier.Length != + FlatBufferConstants.FileIdentifierLength) + throw new ArgumentException( + "FlatBuffers: file identifier must be length " + + FlatBufferConstants.FileIdentifierLength, + "fileIdentifier"); + for (int i = FlatBufferConstants.FileIdentifierLength - 1; i >= 0; + i--) + { + AddByte((byte)fileIdentifier[i]); + } + Finish(rootTable, sizePrefix); + } + + /// <summary> + /// Finalize a buffer, pointing to the given `rootTable`. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + /// <param name="fileIdentifier"> + /// A FlatBuffer file identifier to be added to the buffer before + /// `root_table`. + /// </param> + public void Finish(int rootTable, string fileIdentifier) + { + Finish(rootTable, fileIdentifier, false); + } + + /// <summary> + /// Finalize a buffer, pointing to the given `rootTable`, with the size prefixed. + /// </summary> + /// <param name="rootTable"> + /// An offset to be added to the buffer. + /// </param> + /// <param name="fileIdentifier"> + /// A FlatBuffer file identifier to be added to the buffer before + /// `root_table`. + /// </param> + public void FinishSizePrefixed(int rootTable, string fileIdentifier) + { + Finish(rootTable, fileIdentifier, true); + } + } +} + +/// @} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferConstants.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferConstants.cs new file mode 100644 index 000000000..68a4e9cad --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferConstants.cs @@ -0,0 +1,29 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace FlatBuffers +{ + internal static class FlatBufferConstants + { + public const int FileIdentifierLength = 4; + public const int SizePrefixLength = 4; + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/IFlatbufferObject.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/IFlatbufferObject.cs new file mode 100644 index 000000000..1be932c98 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/IFlatbufferObject.cs @@ -0,0 +1,28 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace FlatBuffers +{ + /// <summary> + /// This is the base for both structs and tables. + /// </summary> + internal interface IFlatbufferObject + { + void __init(int _i, ByteBuffer _bb); + + ByteBuffer ByteBuffer { get; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Offset.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Offset.cs new file mode 100644 index 000000000..70b75b6de --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Offset.cs @@ -0,0 +1,48 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace FlatBuffers +{ + /// <summary> + /// Offset class for typesafe assignments. + /// </summary> + internal struct Offset<T> where T : struct + { + public int Value; + public Offset(int value) + { + Value = value; + } + } + + internal struct StringOffset + { + public int Value; + public StringOffset(int value) + { + Value = value; + } + } + + internal struct VectorOffset + { + public int Value; + public VectorOffset(int value) + { + Value = value; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Struct.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Struct.cs new file mode 100644 index 000000000..ca44ff920 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Struct.cs @@ -0,0 +1,27 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace FlatBuffers +{ + /// <summary> + /// All structs in the generated code derive from this class, and add their own accessors. + /// </summary> + internal struct Struct + { + public int bb_pos; + public ByteBuffer bb; + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Table.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Table.cs new file mode 100644 index 000000000..f809072ba --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/Table.cs @@ -0,0 +1,195 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Text; + +namespace FlatBuffers +{ + /// <summary> + /// All tables in the generated code derive from this struct, and add their own accessors. + /// </summary> + internal struct Table + { + public int bb_pos; + public ByteBuffer bb; + + public ByteBuffer ByteBuffer { get { return bb; } } + + // Look up a field in the vtable, return an offset into the object, or 0 if the field is not + // present. + public int __offset(int vtableOffset) + { + int vtable = bb_pos - bb.GetInt(bb_pos); + return vtableOffset < bb.GetShort(vtable) ? (int)bb.GetShort(vtable + vtableOffset) : 0; + } + + public static int __offset(int vtableOffset, int offset, ByteBuffer bb) + { + int vtable = bb.Length - offset; + return (int)bb.GetShort(vtable + vtableOffset - bb.GetInt(vtable)) + vtable; + } + + // Retrieve the relative offset stored at "offset" + public int __indirect(int offset) + { + return offset + bb.GetInt(offset); + } + + public static int __indirect(int offset, ByteBuffer bb) + { + return offset + bb.GetInt(offset); + } + + // Create a .NET String from UTF-8 data stored inside the flatbuffer. + public string __string(int offset) + { + offset += bb.GetInt(offset); + var len = bb.GetInt(offset); + var startPos = offset + sizeof(int); + return bb.GetStringUTF8(startPos, len); + } + + // Get the length of a vector whose offset is stored at "offset" in this object. + public int __vector_len(int offset) + { + offset += bb_pos; + offset += bb.GetInt(offset); + return bb.GetInt(offset); + } + + // Get the start of data of a vector whose offset is stored at "offset" in this object. + public int __vector(int offset) + { + offset += bb_pos; + return offset + bb.GetInt(offset) + sizeof(int); // data starts after the length + } + +#if ENABLE_SPAN_T + // Get the data of a vector whoses offset is stored at "offset" in this object as an + // Spant<byte>. If the vector is not present in the ByteBuffer, + // then an empty span will be returned. + public Span<byte> __vector_as_span(int offset) + { + var o = this.__offset(offset); + if (0 == o) + { + return new Span<byte>(); + } + + var pos = this.__vector(o); + var len = this.__vector_len(o); + return bb.ToSpan(pos, len); + } +#else + // Get the data of a vector whoses offset is stored at "offset" in this object as an + // ArraySegment<byte>. If the vector is not present in the ByteBuffer, + // then a null value will be returned. + public ArraySegment<byte>? __vector_as_arraysegment(int offset) + { + var o = this.__offset(offset); + if (0 == o) + { + return null; + } + + var pos = this.__vector(o); + var len = this.__vector_len(o); + return bb.ToArraySegment(pos, len); + } +#endif + + // Get the data of a vector whoses offset is stored at "offset" in this object as an + // T[]. If the vector is not present in the ByteBuffer, then a null value will be + // returned. + public T[] __vector_as_array<T>(int offset) + where T : struct + { + if(!BitConverter.IsLittleEndian) + { + throw new NotSupportedException("Getting typed arrays on a Big Endian " + + "system is not support"); + } + + var o = this.__offset(offset); + if (0 == o) + { + return null; + } + + var pos = this.__vector(o); + var len = this.__vector_len(o); + return bb.ToArray<T>(pos, len); + } + + // Initialize any Table-derived type to point to the union at the given offset. + public T __union<T>(int offset) where T : struct, IFlatbufferObject + { + offset += bb_pos; + T t = new T(); + t.__init(offset + bb.GetInt(offset), bb); + return t; + } + + public static bool __has_identifier(ByteBuffer bb, string ident) + { + if (ident.Length != FlatBufferConstants.FileIdentifierLength) + throw new ArgumentException("FlatBuffers: file identifier must be length " + FlatBufferConstants.FileIdentifierLength, "ident"); + + for (var i = 0; i < FlatBufferConstants.FileIdentifierLength; i++) + { + if (ident[i] != (char)bb.Get(bb.Position + sizeof(int) + i)) return false; + } + + return true; + } + + // Compare strings in the ByteBuffer. + public static int CompareStrings(int offset_1, int offset_2, ByteBuffer bb) + { + offset_1 += bb.GetInt(offset_1); + offset_2 += bb.GetInt(offset_2); + var len_1 = bb.GetInt(offset_1); + var len_2 = bb.GetInt(offset_2); + var startPos_1 = offset_1 + sizeof(int); + var startPos_2 = offset_2 + sizeof(int); + var len = Math.Min(len_1, len_2); + for(int i = 0; i < len; i++) { + byte b1 = bb.Get(i + startPos_1); + byte b2 = bb.Get(i + startPos_2); + if (b1 != b2) + return b1 - b2; + } + return len_1 - len_2; + } + + // Compare string from the ByteBuffer with the string object + public static int CompareStrings(int offset_1, byte[] key, ByteBuffer bb) + { + offset_1 += bb.GetInt(offset_1); + var len_1 = bb.GetInt(offset_1); + var len_2 = key.Length; + var startPos_1 = offset_1 + sizeof(int); + var len = Math.Min(len_1, len_2); + for (int i = 0; i < len; i++) { + byte b = bb.Get(i + startPos_1); + if (b != key[i]) + return b - key[i]; + } + return len_1 - len_2; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Footer.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Footer.cs new file mode 100644 index 000000000..37dbfef94 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Footer.cs @@ -0,0 +1,68 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// Arrow File metadata +/// +internal struct Footer : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Footer GetRootAsFooter(ByteBuffer _bb) { return GetRootAsFooter(_bb, new Footer()); } + public static Footer GetRootAsFooter(ByteBuffer _bb, Footer obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Footer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public MetadataVersion Version { get { int o = __p.__offset(4); return o != 0 ? (MetadataVersion)__p.bb.GetShort(o + __p.bb_pos) : MetadataVersion.V1; } } + public Schema? Schema { get { int o = __p.__offset(6); return o != 0 ? (Schema?)(new Schema()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } + public Block? Dictionaries(int j) { int o = __p.__offset(8); return o != 0 ? (Block?)(new Block()).__assign(__p.__vector(o) + j * 24, __p.bb) : null; } + public int DictionariesLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } } + public Block? RecordBatches(int j) { int o = __p.__offset(10); return o != 0 ? (Block?)(new Block()).__assign(__p.__vector(o) + j * 24, __p.bb) : null; } + public int RecordBatchesLength { get { int o = __p.__offset(10); return o != 0 ? __p.__vector_len(o) : 0; } } + /// User-defined metadata + public KeyValue? CustomMetadata(int j) { int o = __p.__offset(12); return o != 0 ? (KeyValue?)(new KeyValue()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int CustomMetadataLength { get { int o = __p.__offset(12); return o != 0 ? __p.__vector_len(o) : 0; } } + + public static Offset<Footer> CreateFooter(FlatBufferBuilder builder, + MetadataVersion version = MetadataVersion.V1, + Offset<Schema> schemaOffset = default(Offset<Schema>), + VectorOffset dictionariesOffset = default(VectorOffset), + VectorOffset recordBatchesOffset = default(VectorOffset), + VectorOffset custom_metadataOffset = default(VectorOffset)) { + builder.StartObject(5); + Footer.AddCustomMetadata(builder, custom_metadataOffset); + Footer.AddRecordBatches(builder, recordBatchesOffset); + Footer.AddDictionaries(builder, dictionariesOffset); + Footer.AddSchema(builder, schemaOffset); + Footer.AddVersion(builder, version); + return Footer.EndFooter(builder); + } + + public static void StartFooter(FlatBufferBuilder builder) { builder.StartObject(5); } + public static void AddVersion(FlatBufferBuilder builder, MetadataVersion version) { builder.AddShort(0, (short)version, 0); } + public static void AddSchema(FlatBufferBuilder builder, Offset<Schema> schemaOffset) { builder.AddOffset(1, schemaOffset.Value, 0); } + public static void AddDictionaries(FlatBufferBuilder builder, VectorOffset dictionariesOffset) { builder.AddOffset(2, dictionariesOffset.Value, 0); } + public static void StartDictionariesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(24, numElems, 8); } + public static void AddRecordBatches(FlatBufferBuilder builder, VectorOffset recordBatchesOffset) { builder.AddOffset(3, recordBatchesOffset.Value, 0); } + public static void StartRecordBatchesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(24, numElems, 8); } + public static void AddCustomMetadata(FlatBufferBuilder builder, VectorOffset customMetadataOffset) { builder.AddOffset(4, customMetadataOffset.Value, 0); } + public static VectorOffset CreateCustomMetadataVector(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateCustomMetadataVectorBlock(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static Offset<Footer> EndFooter(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Footer>(o); + } + public static void FinishFooterBuffer(FlatBufferBuilder builder, Offset<Footer> offset) { builder.Finish(offset.Value); } + public static void FinishSizePrefixedFooterBuffer(FlatBufferBuilder builder, Offset<Footer> offset) { builder.FinishSizePrefixed(offset.Value); } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/KeyValue.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/KeyValue.cs new file mode 100644 index 000000000..5b8c2efca --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/KeyValue.cs @@ -0,0 +1,57 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// user defined key value pairs to add custom metadata to arrow +/// key namespacing is the responsibility of the user +internal struct KeyValue : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static KeyValue GetRootAsKeyValue(ByteBuffer _bb) { return GetRootAsKeyValue(_bb, new KeyValue()); } + public static KeyValue GetRootAsKeyValue(ByteBuffer _bb, KeyValue obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public KeyValue __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public string Key { get { int o = __p.__offset(4); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } +#if ENABLE_SPAN_T + public Span<byte> GetKeyBytes() { return __p.__vector_as_span(4); } +#else + public ArraySegment<byte>? GetKeyBytes() { return __p.__vector_as_arraysegment(4); } +#endif + public byte[] GetKeyArray() { return __p.__vector_as_array<byte>(4); } + public string Value { get { int o = __p.__offset(6); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } +#if ENABLE_SPAN_T + public Span<byte> GetValueBytes() { return __p.__vector_as_span(6); } +#else + public ArraySegment<byte>? GetValueBytes() { return __p.__vector_as_arraysegment(6); } +#endif + public byte[] GetValueArray() { return __p.__vector_as_array<byte>(6); } + + public static Offset<KeyValue> CreateKeyValue(FlatBufferBuilder builder, + StringOffset keyOffset = default(StringOffset), + StringOffset valueOffset = default(StringOffset)) { + builder.StartObject(2); + KeyValue.AddValue(builder, valueOffset); + KeyValue.AddKey(builder, keyOffset); + return KeyValue.EndKeyValue(builder); + } + + public static void StartKeyValue(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddKey(FlatBufferBuilder builder, StringOffset keyOffset) { builder.AddOffset(0, keyOffset.Value, 0); } + public static void AddValue(FlatBufferBuilder builder, StringOffset valueOffset) { builder.AddOffset(1, valueOffset.Value, 0); } + public static Offset<KeyValue> EndKeyValue(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<KeyValue>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Map.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Map.cs new file mode 100644 index 000000000..9141f4c93 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Map.cs @@ -0,0 +1,63 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// A Map is a logical nested type that is represented as +/// +/// List<entry: Struct<key: K, value: V>> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field +/// +/// In a Field with Map type, the Field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entry", "key", and "value", but this is +/// not enforced +/// +/// Map +/// - child[0] entry: Struct +/// - child[0] key: K +/// - child[1] value: V +/// +/// Neither the "entry" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. +internal struct Map : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Map GetRootAsMap(ByteBuffer _bb) { return GetRootAsMap(_bb, new Map()); } + public static Map GetRootAsMap(ByteBuffer _bb, Map obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Map __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Set to true if the keys within each value are sorted + public bool KeysSorted { get { int o = __p.__offset(4); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + + public static Offset<Map> CreateMap(FlatBufferBuilder builder, + bool keysSorted = false) { + builder.StartObject(1); + Map.AddKeysSorted(builder, keysSorted); + return Map.EndMap(builder); + } + + public static void StartMap(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddKeysSorted(FlatBufferBuilder builder, bool keysSorted) { builder.AddBool(0, keysSorted, false); } + public static Offset<Map> EndMap(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Map>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Message.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Message.cs new file mode 100644 index 000000000..db54e6ab3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Message.cs @@ -0,0 +1,60 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Message : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Message GetRootAsMessage(ByteBuffer _bb) { return GetRootAsMessage(_bb, new Message()); } + public static Message GetRootAsMessage(ByteBuffer _bb, Message obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Message __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public MetadataVersion Version { get { int o = __p.__offset(4); return o != 0 ? (MetadataVersion)__p.bb.GetShort(o + __p.bb_pos) : MetadataVersion.V1; } } + public MessageHeader HeaderType { get { int o = __p.__offset(6); return o != 0 ? (MessageHeader)__p.bb.Get(o + __p.bb_pos) : MessageHeader.NONE; } } + public TTable? Header<TTable>() where TTable : struct, IFlatbufferObject { int o = __p.__offset(8); return o != 0 ? (TTable?)__p.__union<TTable>(o) : null; } + public long BodyLength { get { int o = __p.__offset(10); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } + public KeyValue? CustomMetadata(int j) { int o = __p.__offset(12); return o != 0 ? (KeyValue?)(new KeyValue()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int CustomMetadataLength { get { int o = __p.__offset(12); return o != 0 ? __p.__vector_len(o) : 0; } } + + public static Offset<Message> CreateMessage(FlatBufferBuilder builder, + MetadataVersion version = MetadataVersion.V1, + MessageHeader header_type = MessageHeader.NONE, + int headerOffset = 0, + long bodyLength = 0, + VectorOffset custom_metadataOffset = default(VectorOffset)) { + builder.StartObject(5); + Message.AddBodyLength(builder, bodyLength); + Message.AddCustomMetadata(builder, custom_metadataOffset); + Message.AddHeader(builder, headerOffset); + Message.AddVersion(builder, version); + Message.AddHeaderType(builder, header_type); + return Message.EndMessage(builder); + } + + public static void StartMessage(FlatBufferBuilder builder) { builder.StartObject(5); } + public static void AddVersion(FlatBufferBuilder builder, MetadataVersion version) { builder.AddShort(0, (short)version, 0); } + public static void AddHeaderType(FlatBufferBuilder builder, MessageHeader headerType) { builder.AddByte(1, (byte)headerType, 0); } + public static void AddHeader(FlatBufferBuilder builder, int headerOffset) { builder.AddOffset(2, headerOffset, 0); } + public static void AddBodyLength(FlatBufferBuilder builder, long bodyLength) { builder.AddLong(3, bodyLength, 0); } + public static void AddCustomMetadata(FlatBufferBuilder builder, VectorOffset customMetadataOffset) { builder.AddOffset(4, customMetadataOffset.Value, 0); } + public static VectorOffset CreateCustomMetadataVector(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateCustomMetadataVectorBlock(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static Offset<Message> EndMessage(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Message>(o); + } + public static void FinishMessageBuffer(FlatBufferBuilder builder, Offset<Message> offset) { builder.Finish(offset.Value); } + public static void FinishSizePrefixedMessageBuffer(FlatBufferBuilder builder, Offset<Message> offset) { builder.FinishSizePrefixed(offset.Value); } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs new file mode 100644 index 000000000..4c95acde9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs @@ -0,0 +1,67 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +internal struct RecordBatch : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static RecordBatch GetRootAsRecordBatch(ByteBuffer _bb) { return GetRootAsRecordBatch(_bb, new RecordBatch()); } + public static RecordBatch GetRootAsRecordBatch(ByteBuffer _bb, RecordBatch obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public RecordBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// number of records / rows. The arrays in the batch should all have this + /// length + public long Length { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } + /// Nodes correspond to the pre-ordered flattened logical schema + public FieldNode? Nodes(int j) { int o = __p.__offset(6); return o != 0 ? (FieldNode?)(new FieldNode()).__assign(__p.__vector(o) + j * 16, __p.bb) : null; } + public int NodesLength { get { int o = __p.__offset(6); return o != 0 ? __p.__vector_len(o) : 0; } } + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + public Buffer? Buffers(int j) { int o = __p.__offset(8); return o != 0 ? (Buffer?)(new Buffer()).__assign(__p.__vector(o) + j * 16, __p.bb) : null; } + public int BuffersLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } } + /// Optional compression of the message body + public BodyCompression? Compression { get { int o = __p.__offset(10); return o != 0 ? (BodyCompression?)(new BodyCompression()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } + + public static Offset<RecordBatch> CreateRecordBatch(FlatBufferBuilder builder, + long length = 0, + VectorOffset nodesOffset = default(VectorOffset), + VectorOffset buffersOffset = default(VectorOffset), + Offset<BodyCompression> compressionOffset = default(Offset<BodyCompression>)) { + builder.StartObject(4); + RecordBatch.AddLength(builder, length); + RecordBatch.AddCompression(builder, compressionOffset); + RecordBatch.AddBuffers(builder, buffersOffset); + RecordBatch.AddNodes(builder, nodesOffset); + return RecordBatch.EndRecordBatch(builder); + } + + public static void StartRecordBatch(FlatBufferBuilder builder) { builder.StartObject(4); } + public static void AddLength(FlatBufferBuilder builder, long length) { builder.AddLong(0, length, 0); } + public static void AddNodes(FlatBufferBuilder builder, VectorOffset nodesOffset) { builder.AddOffset(1, nodesOffset.Value, 0); } + public static void StartNodesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); } + public static void AddBuffers(FlatBufferBuilder builder, VectorOffset buffersOffset) { builder.AddOffset(2, buffersOffset.Value, 0); } + public static void StartBuffersVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); } + public static void AddCompression(FlatBufferBuilder builder, Offset<BodyCompression> compressionOffset) { builder.AddOffset(3, compressionOffset.Value, 0); } + public static Offset<RecordBatch> EndRecordBatch(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<RecordBatch>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Schema.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Schema.cs new file mode 100644 index 000000000..3764e7ba3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Schema.cs @@ -0,0 +1,76 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// A Schema describes the columns in a row batch +internal struct Schema : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Schema GetRootAsSchema(ByteBuffer _bb) { return GetRootAsSchema(_bb, new Schema()); } + public static Schema GetRootAsSchema(ByteBuffer _bb, Schema obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Schema __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// endianness of the buffer + /// it is Little Endian by default + /// if endianness doesn't match the underlying system then the vectors need to be converted + public Endianness Endianness { get { int o = __p.__offset(4); return o != 0 ? (Endianness)__p.bb.GetShort(o + __p.bb_pos) : Endianness.Little; } } + public Field? Fields(int j) { int o = __p.__offset(6); return o != 0 ? (Field?)(new Field()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int FieldsLength { get { int o = __p.__offset(6); return o != 0 ? __p.__vector_len(o) : 0; } } + public KeyValue? CustomMetadata(int j) { int o = __p.__offset(8); return o != 0 ? (KeyValue?)(new KeyValue()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int CustomMetadataLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } } + /// Features used in the stream/file. + public Feature Features(int j) { int o = __p.__offset(10); return o != 0 ? (Feature)__p.bb.GetLong(__p.__vector(o) + j * 8) : (Feature)0; } + public int FeaturesLength { get { int o = __p.__offset(10); return o != 0 ? __p.__vector_len(o) : 0; } } +#if ENABLE_SPAN_T + public Span<byte> GetFeaturesBytes() { return __p.__vector_as_span(10); } +#else + public ArraySegment<byte>? GetFeaturesBytes() { return __p.__vector_as_arraysegment(10); } +#endif + public Feature[] GetFeaturesArray() { return __p.__vector_as_array<Feature>(10); } + + public static Offset<Schema> CreateSchema(FlatBufferBuilder builder, + Endianness endianness = Endianness.Little, + VectorOffset fieldsOffset = default(VectorOffset), + VectorOffset custom_metadataOffset = default(VectorOffset), + VectorOffset featuresOffset = default(VectorOffset)) { + builder.StartObject(4); + Schema.AddFeatures(builder, featuresOffset); + Schema.AddCustomMetadata(builder, custom_metadataOffset); + Schema.AddFields(builder, fieldsOffset); + Schema.AddEndianness(builder, endianness); + return Schema.EndSchema(builder); + } + + public static void StartSchema(FlatBufferBuilder builder) { builder.StartObject(4); } + public static void AddEndianness(FlatBufferBuilder builder, Endianness endianness) { builder.AddShort(0, (short)endianness, 0); } + public static void AddFields(FlatBufferBuilder builder, VectorOffset fieldsOffset) { builder.AddOffset(1, fieldsOffset.Value, 0); } + public static VectorOffset CreateFieldsVector(FlatBufferBuilder builder, Offset<Field>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateFieldsVectorBlock(FlatBufferBuilder builder, Offset<Field>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartFieldsVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static void AddCustomMetadata(FlatBufferBuilder builder, VectorOffset customMetadataOffset) { builder.AddOffset(2, customMetadataOffset.Value, 0); } + public static VectorOffset CreateCustomMetadataVector(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateCustomMetadataVectorBlock(FlatBufferBuilder builder, Offset<KeyValue>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static void AddFeatures(FlatBufferBuilder builder, VectorOffset featuresOffset) { builder.AddOffset(3, featuresOffset.Value, 0); } + public static VectorOffset CreateFeaturesVector(FlatBufferBuilder builder, Feature[] data) { builder.StartVector(8, data.Length, 8); for (int i = data.Length - 1; i >= 0; i--) builder.AddLong((long)data[i]); return builder.EndVector(); } + public static VectorOffset CreateFeaturesVectorBlock(FlatBufferBuilder builder, Feature[] data) { builder.StartVector(8, data.Length, 8); builder.Add(data); return builder.EndVector(); } + public static void StartFeaturesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(8, numElems, 8); } + public static Offset<Schema> EndSchema(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Schema>(o); + } + public static void FinishSchemaBuffer(FlatBufferBuilder builder, Offset<Schema> offset) { builder.Finish(offset.Value); } + public static void FinishSizePrefixedSchemaBuffer(FlatBufferBuilder builder, Offset<Schema> offset) { builder.FinishSizePrefixed(offset.Value); } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs new file mode 100644 index 000000000..918d7d6ea --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs @@ -0,0 +1,60 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Tensor : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Tensor GetRootAsTensor(ByteBuffer _bb) { return GetRootAsTensor(_bb, new Tensor()); } + public static Tensor GetRootAsTensor(ByteBuffer _bb, Tensor obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Tensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public Type TypeType { get { int o = __p.__offset(4); return o != 0 ? (Type)__p.bb.Get(o + __p.bb_pos) : Flatbuf.Type.NONE; } } + /// The type of data contained in a value cell. Currently only fixed-width + /// value types are supported, no strings or nested types + public TTable? Type<TTable>() where TTable : struct, IFlatbufferObject { int o = __p.__offset(6); return o != 0 ? (TTable?)__p.__union<TTable>(o) : null; } + /// The dimensions of the tensor, optionally named + public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ? (TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; } + public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } } + /// Non-negative byte offsets to advance one value cell along each dimension + public long Strides(int j) { int o = __p.__offset(10); return o != 0 ? __p.bb.GetLong(__p.__vector(o) + j * 8) : (long)0; } + public int StridesLength { get { int o = __p.__offset(10); return o != 0 ? __p.__vector_len(o) : 0; } } +#if ENABLE_SPAN_T + public Span<byte> GetStridesBytes() { return __p.__vector_as_span(10); } +#else + public ArraySegment<byte>? GetStridesBytes() { return __p.__vector_as_arraysegment(10); } +#endif + public long[] GetStridesArray() { return __p.__vector_as_array<long>(10); } + /// The location and size of the tensor's data + public Buffer? Data { get { int o = __p.__offset(12); return o != 0 ? (Buffer?)(new Buffer()).__assign(o + __p.bb_pos, __p.bb) : null; } } + + public static void StartTensor(FlatBufferBuilder builder) { builder.StartObject(5); } + public static void AddTypeType(FlatBufferBuilder builder, Type typeType) { builder.AddByte(0, (byte)typeType, 0); } + public static void AddType(FlatBufferBuilder builder, int typeOffset) { builder.AddOffset(1, typeOffset, 0); } + public static void AddShape(FlatBufferBuilder builder, VectorOffset shapeOffset) { builder.AddOffset(2, shapeOffset.Value, 0); } + public static VectorOffset CreateShapeVector(FlatBufferBuilder builder, Offset<TensorDim>[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddOffset(data[i].Value); return builder.EndVector(); } + public static VectorOffset CreateShapeVectorBlock(FlatBufferBuilder builder, Offset<TensorDim>[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartShapeVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static void AddStrides(FlatBufferBuilder builder, VectorOffset stridesOffset) { builder.AddOffset(3, stridesOffset.Value, 0); } + public static VectorOffset CreateStridesVector(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); for (int i = data.Length - 1; i >= 0; i--) builder.AddLong(data[i]); return builder.EndVector(); } + public static VectorOffset CreateStridesVectorBlock(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); builder.Add(data); return builder.EndVector(); } + public static void StartStridesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(8, numElems, 8); } + public static void AddData(FlatBufferBuilder builder, Offset<Buffer> dataOffset) { builder.AddStruct(4, dataOffset.Value, 0); } + public static Offset<Tensor> EndTensor(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Tensor>(o); + } + public static void FinishTensorBuffer(FlatBufferBuilder builder, Offset<Tensor> offset) { builder.Finish(offset.Value); } + public static void FinishSizePrefixedTensorBuffer(FlatBufferBuilder builder, Offset<Tensor> offset) { builder.FinishSizePrefixed(offset.Value); } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/TensorDim.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/TensorDim.cs new file mode 100644 index 000000000..a4c51824b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/TensorDim.cs @@ -0,0 +1,53 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// ---------------------------------------------------------------------- +/// Data structures for dense tensors +/// Shape data for a single axis in a tensor +internal struct TensorDim : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static TensorDim GetRootAsTensorDim(ByteBuffer _bb) { return GetRootAsTensorDim(_bb, new TensorDim()); } + public static TensorDim GetRootAsTensorDim(ByteBuffer _bb, TensorDim obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public TensorDim __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Length of dimension + public long Size { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } + /// Name of the dimension, optional + public string Name { get { int o = __p.__offset(6); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } +#if ENABLE_SPAN_T + public Span<byte> GetNameBytes() { return __p.__vector_as_span(6); } +#else + public ArraySegment<byte>? GetNameBytes() { return __p.__vector_as_arraysegment(6); } +#endif + public byte[] GetNameArray() { return __p.__vector_as_array<byte>(6); } + + public static Offset<TensorDim> CreateTensorDim(FlatBufferBuilder builder, + long size = 0, + StringOffset nameOffset = default(StringOffset)) { + builder.StartObject(2); + TensorDim.AddSize(builder, size); + TensorDim.AddName(builder, nameOffset); + return TensorDim.EndTensorDim(builder); + } + + public static void StartTensorDim(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddSize(FlatBufferBuilder builder, long size) { builder.AddLong(0, size, 0); } + public static void AddName(FlatBufferBuilder builder, StringOffset nameOffset) { builder.AddOffset(1, nameOffset.Value, 0); } + public static Offset<TensorDim> EndTensorDim(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<TensorDim>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Binary.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Binary.cs new file mode 100644 index 000000000..0738d11e9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Binary.cs @@ -0,0 +1,29 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Binary : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Binary GetRootAsBinary(ByteBuffer _bb) { return GetRootAsBinary(_bb, new Binary()); } + public static Binary GetRootAsBinary(ByteBuffer _bb, Binary obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Binary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartBinary(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<Binary> EndBinary(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Binary>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Bool.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Bool.cs new file mode 100644 index 000000000..da488abfe --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Bool.cs @@ -0,0 +1,29 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Bool : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Bool GetRootAsBool(ByteBuffer _bb) { return GetRootAsBool(_bb, new Bool()); } + public static Bool GetRootAsBool(ByteBuffer _bb, Bool obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Bool __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartBool(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<Bool> EndBool(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Bool>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Date.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Date.cs new file mode 100644 index 000000000..e9b7fb3cc --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Date.cs @@ -0,0 +1,44 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX +/// epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch +internal struct Date : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Date GetRootAsDate(ByteBuffer _bb) { return GetRootAsDate(_bb, new Date()); } + public static Date GetRootAsDate(ByteBuffer _bb, Date obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public DateUnit Unit { get { int o = __p.__offset(4); return o != 0 ? (DateUnit)__p.bb.GetShort(o + __p.bb_pos) : DateUnit.MILLISECOND; } } + + public static Offset<Date> CreateDate(FlatBufferBuilder builder, + DateUnit unit = DateUnit.MILLISECOND) { + builder.StartObject(1); + Date.AddUnit(builder, unit); + return Date.EndDate(builder); + } + + public static void StartDate(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddUnit(FlatBufferBuilder builder, DateUnit unit) { builder.AddShort(0, (short)unit, 1); } + public static Offset<Date> EndDate(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Date>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Decimal.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Decimal.cs new file mode 100644 index 000000000..97f62b676 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Decimal.cs @@ -0,0 +1,54 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. +internal struct Decimal: IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Decimal GetRootAsDecimal(ByteBuffer _bb) { return GetRootAsDecimal(_bb, new Decimal()); } + public static Decimal GetRootAsDecimal(ByteBuffer _bb, Decimal obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Decimal __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + /// Total number of decimal digits + public int Precision { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)0; } } + /// Number of digits after the decimal point "." + public int Scale { get { int o = __p.__offset(6); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)0; } } + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. + public int BitWidth { get { int o = __p.__offset(8); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)128; } } + + public static Offset<Decimal> CreateDecimal(FlatBufferBuilder builder, + int precision = 0, + int scale = 0, + int bitWidth = 128) { + builder.StartObject(3); + Decimal.AddBitWidth(builder, bitWidth); + Decimal.AddScale(builder, scale); + Decimal.AddPrecision(builder, precision); + return Decimal.EndDecimal(builder); + } + + public static void StartDecimal(FlatBufferBuilder builder) { builder.StartObject(3); } + public static void AddPrecision(FlatBufferBuilder builder, int precision) { builder.AddInt(0, precision, 0); } + public static void AddScale(FlatBufferBuilder builder, int scale) { builder.AddInt(1, scale, 0); } + public static void AddBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.AddInt(2, bitWidth, 128); } + public static Offset<Decimal> EndDecimal(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Decimal>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Duration.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Duration.cs new file mode 100644 index 000000000..1f3a9e410 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Duration.cs @@ -0,0 +1,38 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Duration : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Duration GetRootAsDuration(ByteBuffer _bb) { return GetRootAsDuration(_bb, new Duration()); } + public static Duration GetRootAsDuration(ByteBuffer _bb, Duration obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Duration __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public TimeUnit Unit { get { int o = __p.__offset(4); return o != 0 ? (TimeUnit)__p.bb.GetShort(o + __p.bb_pos) : TimeUnit.MILLISECOND; } } + + public static Offset<Duration> CreateDuration(FlatBufferBuilder builder, + TimeUnit unit = TimeUnit.MILLISECOND) { + builder.StartObject(1); + Duration.AddUnit(builder, unit); + return Duration.EndDuration(builder); + } + + public static void StartDuration(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddUnit(FlatBufferBuilder builder, TimeUnit unit) { builder.AddShort(0, (short)unit, 1); } + public static Offset<Duration> EndDuration(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Duration>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/FloatingPoint.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/FloatingPoint.cs new file mode 100644 index 000000000..0cb58ecf4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/FloatingPoint.cs @@ -0,0 +1,38 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct FloatingPoint : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static FloatingPoint GetRootAsFloatingPoint(ByteBuffer _bb) { return GetRootAsFloatingPoint(_bb, new FloatingPoint()); } + public static FloatingPoint GetRootAsFloatingPoint(ByteBuffer _bb, FloatingPoint obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public FloatingPoint __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public Precision Precision { get { int o = __p.__offset(4); return o != 0 ? (Precision)__p.bb.GetShort(o + __p.bb_pos) : Precision.HALF; } } + + public static Offset<FloatingPoint> CreateFloatingPoint(FlatBufferBuilder builder, + Precision precision = Precision.HALF) { + builder.StartObject(1); + FloatingPoint.AddPrecision(builder, precision); + return FloatingPoint.EndFloatingPoint(builder); + } + + public static void StartFloatingPoint(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddPrecision(FlatBufferBuilder builder, Precision precision) { builder.AddShort(0, (short)precision, 0); } + public static Offset<FloatingPoint> EndFloatingPoint(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<FloatingPoint>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Int.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Int.cs new file mode 100644 index 000000000..754080279 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Int.cs @@ -0,0 +1,42 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Int : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Int GetRootAsInt(ByteBuffer _bb) { return GetRootAsInt(_bb, new Int()); } + public static Int GetRootAsInt(ByteBuffer _bb, Int obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Int __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public int BitWidth { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)0; } } + public bool IsSigned { get { int o = __p.__offset(6); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + + public static Offset<Int> CreateInt(FlatBufferBuilder builder, + int bitWidth = 0, + bool is_signed = false) { + builder.StartObject(2); + Int.AddBitWidth(builder, bitWidth); + Int.AddIsSigned(builder, is_signed); + return Int.EndInt(builder); + } + + public static void StartInt(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.AddInt(0, bitWidth, 0); } + public static void AddIsSigned(FlatBufferBuilder builder, bool isSigned) { builder.AddBool(1, isSigned, false); } + public static Offset<Int> EndInt(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Int>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Interval.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Interval.cs new file mode 100644 index 000000000..bb92448ac --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Interval.cs @@ -0,0 +1,38 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct Interval : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Interval GetRootAsInterval(ByteBuffer _bb) { return GetRootAsInterval(_bb, new Interval()); } + public static Interval GetRootAsInterval(ByteBuffer _bb, Interval obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Interval __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public IntervalUnit Unit { get { int o = __p.__offset(4); return o != 0 ? (IntervalUnit)__p.bb.GetShort(o + __p.bb_pos) : IntervalUnit.YEAR_MONTH; } } + + public static Offset<Interval> CreateInterval(FlatBufferBuilder builder, + IntervalUnit unit = IntervalUnit.YEAR_MONTH) { + builder.StartObject(1); + Interval.AddUnit(builder, unit); + return Interval.EndInterval(builder); + } + + public static void StartInterval(FlatBufferBuilder builder) { builder.StartObject(1); } + public static void AddUnit(FlatBufferBuilder builder, IntervalUnit unit) { builder.AddShort(0, (short)unit, 0); } + public static Offset<Interval> EndInterval(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Interval>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeBinary.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeBinary.cs new file mode 100644 index 000000000..8ac6aa2cd --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeBinary.cs @@ -0,0 +1,31 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. +internal struct LargeBinary : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static LargeBinary GetRootAsLargeBinary(ByteBuffer _bb) { return GetRootAsLargeBinary(_bb, new LargeBinary()); } + public static LargeBinary GetRootAsLargeBinary(ByteBuffer _bb, LargeBinary obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public LargeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartLargeBinary(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<LargeBinary> EndLargeBinary(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<LargeBinary>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeList.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeList.cs new file mode 100644 index 000000000..49e69a87f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeList.cs @@ -0,0 +1,31 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. +internal struct LargeList : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static LargeList GetRootAsLargeList(ByteBuffer _bb) { return GetRootAsLargeList(_bb, new LargeList()); } + public static LargeList GetRootAsLargeList(ByteBuffer _bb, LargeList obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public LargeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartLargeList(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<LargeList> EndLargeList(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<LargeList>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeUtf8.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeUtf8.cs new file mode 100644 index 000000000..ebb64390f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/LargeUtf8.cs @@ -0,0 +1,31 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. +internal struct LargeUtf8 : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static LargeUtf8 GetRootAsLargeUtf8(ByteBuffer _bb) { return GetRootAsLargeUtf8(_bb, new LargeUtf8()); } + public static LargeUtf8 GetRootAsLargeUtf8(ByteBuffer _bb, LargeUtf8 obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public LargeUtf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartLargeUtf8(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<LargeUtf8> EndLargeUtf8(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<LargeUtf8>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/List.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/List.cs new file mode 100644 index 000000000..8f4985bb5 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/List.cs @@ -0,0 +1,29 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +internal struct List : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static List GetRootAsList(ByteBuffer _bb) { return GetRootAsList(_bb, new List()); } + public static List GetRootAsList(ByteBuffer _bb, List obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public List __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartList(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<List> EndList(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<List>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Null.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Null.cs new file mode 100644 index 000000000..85fa5bb99 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Null.cs @@ -0,0 +1,30 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// These are stored in the flatbuffer in the Type union below +internal struct Null : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Null GetRootAsNull(ByteBuffer _bb) { return GetRootAsNull(_bb, new Null()); } + public static Null GetRootAsNull(ByteBuffer _bb, Null obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Null __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartNull(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<Null> EndNull(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Null>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Struct_.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Struct_.cs new file mode 100644 index 000000000..8f3d708cd --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Struct_.cs @@ -0,0 +1,32 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers +internal struct Struct_ : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Struct_ GetRootAsStruct_(ByteBuffer _bb) { return GetRootAsStruct_(_bb, new Struct_()); } + public static Struct_ GetRootAsStruct_(ByteBuffer _bb, Struct_ obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Struct_ __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartStruct_(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<Struct_> EndStruct_(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Struct_>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Time.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Time.cs new file mode 100644 index 000000000..1d7c0881c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Time.cs @@ -0,0 +1,45 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Time type. The physical storage type depends on the unit +/// - SECOND and MILLISECOND: 32 bits +/// - MICROSECOND and NANOSECOND: 64 bits +internal struct Time : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Time GetRootAsTime(ByteBuffer _bb) { return GetRootAsTime(_bb, new Time()); } + public static Time GetRootAsTime(ByteBuffer _bb, Time obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Time __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public TimeUnit Unit { get { int o = __p.__offset(4); return o != 0 ? (TimeUnit)__p.bb.GetShort(o + __p.bb_pos) : TimeUnit.MILLISECOND; } } + public int BitWidth { get { int o = __p.__offset(6); return o != 0 ? __p.bb.GetInt(o + __p.bb_pos) : (int)32; } } + + public static Offset<Time> CreateTime(FlatBufferBuilder builder, + TimeUnit unit = TimeUnit.MILLISECOND, + int bitWidth = 32) { + builder.StartObject(2); + Time.AddBitWidth(builder, bitWidth); + Time.AddUnit(builder, unit); + return Time.EndTime(builder); + } + + public static void StartTime(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddUnit(FlatBufferBuilder builder, TimeUnit unit) { builder.AddShort(0, (short)unit, 1); } + public static void AddBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.AddInt(1, bitWidth, 32); } + public static Offset<Time> EndTime(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Time>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Timestamp.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Timestamp.cs new file mode 100644 index 000000000..c0168b4cb --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Timestamp.cs @@ -0,0 +1,74 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding +/// leap seconds, as a 64-bit integer. Note that UNIX time does not include +/// leap seconds. +/// +/// The Timestamp metadata supports both "time zone naive" and "time zone +/// aware" timestamps. Read about the timezone attribute for more detail +internal struct Timestamp : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Timestamp GetRootAsTimestamp(ByteBuffer _bb) { return GetRootAsTimestamp(_bb, new Timestamp()); } + public static Timestamp GetRootAsTimestamp(ByteBuffer _bb, Timestamp obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Timestamp __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public TimeUnit Unit { get { int o = __p.__offset(4); return o != 0 ? (TimeUnit)__p.bb.GetShort(o + __p.bb_pos) : TimeUnit.SECOND; } } + /// The time zone is a string indicating the name of a time zone, one of: + /// + /// * As used in the Olson time zone database (the "tz database" or + /// "tzdata"), such as "America/New_York" + /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 + /// + /// Whether a timezone string is present indicates different semantics about + /// the data: + /// + /// * If the time zone is null or equal to an empty string, the data is "time + /// zone naive" and shall be displayed *as is* to the user, not localized + /// to the locale of the user. This data can be though of as UTC but + /// without having "UTC" as the time zone, it is not considered to be + /// localized to any time zone + /// + /// * If the time zone is set to a valid value, values can be displayed as + /// "localized" to that time zone, even though the underlying 64-bit + /// integers are identical to the same data stored in UTC. Converting + /// between time zones is a metadata-only operation and does not change the + /// underlying values + public string Timezone { get { int o = __p.__offset(6); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } +#if ENABLE_SPAN_T + public Span<byte> GetTimezoneBytes() { return __p.__vector_as_span(6); } +#else + public ArraySegment<byte>? GetTimezoneBytes() { return __p.__vector_as_arraysegment(6); } +#endif + public byte[] GetTimezoneArray() { return __p.__vector_as_array<byte>(6); } + + public static Offset<Timestamp> CreateTimestamp(FlatBufferBuilder builder, + TimeUnit unit = TimeUnit.SECOND, + StringOffset timezoneOffset = default(StringOffset)) { + builder.StartObject(2); + Timestamp.AddTimezone(builder, timezoneOffset); + Timestamp.AddUnit(builder, unit); + return Timestamp.EndTimestamp(builder); + } + + public static void StartTimestamp(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddUnit(FlatBufferBuilder builder, TimeUnit unit) { builder.AddShort(0, (short)unit, 0); } + public static void AddTimezone(FlatBufferBuilder builder, StringOffset timezoneOffset) { builder.AddOffset(1, timezoneOffset.Value, 0); } + public static Offset<Timestamp> EndTimestamp(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Timestamp>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Union.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Union.cs new file mode 100644 index 000000000..de2a85a17 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Union.cs @@ -0,0 +1,56 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child typeIds[offset] is the id used in the type vector +internal struct Union : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Union GetRootAsUnion(ByteBuffer _bb) { return GetRootAsUnion(_bb, new Union()); } + public static Union GetRootAsUnion(ByteBuffer _bb, Union obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Union __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + public UnionMode Mode { get { int o = __p.__offset(4); return o != 0 ? (UnionMode)__p.bb.GetShort(o + __p.bb_pos) : UnionMode.Sparse; } } + public int TypeIds(int j) { int o = __p.__offset(6); return o != 0 ? __p.bb.GetInt(__p.__vector(o) + j * 4) : (int)0; } + public int TypeIdsLength { get { int o = __p.__offset(6); return o != 0 ? __p.__vector_len(o) : 0; } } +#if ENABLE_SPAN_T + public Span<byte> GetTypeIdsBytes() { return __p.__vector_as_span(6); } +#else + public ArraySegment<byte>? GetTypeIdsBytes() { return __p.__vector_as_arraysegment(6); } +#endif + public int[] GetTypeIdsArray() { return __p.__vector_as_array<int>(6); } + + public static Offset<Union> CreateUnion(FlatBufferBuilder builder, + UnionMode mode = UnionMode.Sparse, + VectorOffset typeIdsOffset = default(VectorOffset)) { + builder.StartObject(2); + Union.AddTypeIds(builder, typeIdsOffset); + Union.AddMode(builder, mode); + return Union.EndUnion(builder); + } + + public static void StartUnion(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void AddMode(FlatBufferBuilder builder, UnionMode mode) { builder.AddShort(0, (short)mode, 0); } + public static void AddTypeIds(FlatBufferBuilder builder, VectorOffset typeIdsOffset) { builder.AddOffset(1, typeIdsOffset.Value, 0); } + public static VectorOffset CreateTypeIdsVector(FlatBufferBuilder builder, int[] data) { builder.StartVector(4, data.Length, 4); for (int i = data.Length - 1; i >= 0; i--) builder.AddInt(data[i]); return builder.EndVector(); } + public static VectorOffset CreateTypeIdsVectorBlock(FlatBufferBuilder builder, int[] data) { builder.StartVector(4, data.Length, 4); builder.Add(data); return builder.EndVector(); } + public static void StartTypeIdsVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(4, numElems, 4); } + public static Offset<Union> EndUnion(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Union>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Utf8.cs b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Utf8.cs new file mode 100644 index 000000000..e2b80f407 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Flatbuf/Types/Utf8.cs @@ -0,0 +1,30 @@ +// <auto-generated> +// automatically generated by the FlatBuffers compiler, do not modify +// </auto-generated> + +namespace Apache.Arrow.Flatbuf +{ + +using global::System; +using global::FlatBuffers; + +/// Unicode with UTF-8 encoding +internal struct Utf8 : IFlatbufferObject +{ + private Table __p; + public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static Utf8 GetRootAsUtf8(ByteBuffer _bb) { return GetRootAsUtf8(_bb, new Utf8()); } + public static Utf8 GetRootAsUtf8(ByteBuffer _bb, Utf8 obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } + public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public Utf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + + + public static void StartUtf8(FlatBufferBuilder builder) { builder.StartObject(0); } + public static Offset<Utf8> EndUtf8(FlatBufferBuilder builder) { + int o = builder.EndObject(); + return new Offset<Utf8>(o); + } +}; + + +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs new file mode 100644 index 000000000..50fbc3af6 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow +{ + public interface IArrowArray : IDisposable + { + bool IsNull(int index); + + bool IsValid(int index); + + int Length { get; } + + int Offset { get; } + + int NullCount { get; } + + ArrayData Data { get; } + + void Accept(IArrowArrayVisitor visitor); + + //IArrowArray Slice(int offset); + + //IArrowArray Slice(int offset, int length); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayBuilder.cs new file mode 100644 index 000000000..12f9ec1fd --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayBuilder.cs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + public interface IArrowArrayBuilder + { + int Length { get; } + } + + public interface IArrowArrayBuilder<out TArray> : IArrowArrayBuilder + where TArray : IArrowArray + { + TArray Build(MemoryAllocator allocator); + } + + public interface IArrowArrayBuilder<out TArray, out TBuilder> : IArrowArrayBuilder<TArray> + where TArray : IArrowArray + where TBuilder : IArrowArrayBuilder<TArray> + { + TBuilder Reserve(int capacity); + TBuilder Resize(int length); + TBuilder Clear(); + } + + + public interface IArrowArrayBuilder<T, out TArray, out TBuilder> : IArrowArrayBuilder<TArray, TBuilder> + where TArray : IArrowArray + where TBuilder : IArrowArrayBuilder<TArray> + { + TBuilder Append(T value); + TBuilder Append(ReadOnlySpan<T> span); + TBuilder AppendRange(IEnumerable<T> values); + TBuilder AppendNull(); + TBuilder Swap(int i, int j); + TBuilder Set(int index, T value); + } +}
\ No newline at end of file diff --git a/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayVisitor.cs b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayVisitor.cs new file mode 100644 index 000000000..c93bbb483 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Interfaces/IArrowArrayVisitor.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow +{ + // NOTE: Acyclic Visitor Pattern + + public interface IArrowArrayVisitor + { + void Visit(IArrowArray array); + } + + public interface IArrowArrayVisitor<T>: IArrowArrayVisitor + where T: IArrowArray + { + void Visit(T array); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileConstants.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileConstants.cs new file mode 100644 index 000000000..6b308d693 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileConstants.cs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Text; + +namespace Apache.Arrow.Ipc +{ + internal static class ArrowFileConstants + { + public static readonly byte[] Magic = Encoding.UTF8.GetBytes("ARROW1"); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs new file mode 100644 index 000000000..e0064be37 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + /// <summary> + /// Implements an <see cref="ArrowStreamReader"/> that can read Arrow files. + /// </summary> + public class ArrowFileReader : ArrowStreamReader + { + private ArrowFileReaderImplementation Implementation => + (ArrowFileReaderImplementation)_implementation; + + public bool IsFileValid => Implementation.IsFileValid; + + public ArrowFileReader(Stream stream) + : this(stream, leaveOpen: false) + { + } + + public ArrowFileReader(Stream stream, MemoryAllocator allocator) + : this(stream, allocator, leaveOpen: false) + { + } + + public ArrowFileReader(Stream stream, bool leaveOpen) + : this(stream, allocator: null, leaveOpen) + { + } + + public ArrowFileReader(Stream stream, MemoryAllocator allocator, bool leaveOpen) + : base(new ArrowFileReaderImplementation(stream, allocator, leaveOpen)) + { + } + + public static ArrowFileReader FromFile(string filename) + { + var stream = new FileStream(filename, FileMode.Open, FileAccess.Read); + return new ArrowFileReader(stream); + } + + public ValueTask<int> RecordBatchCountAsync() + { + return Implementation.RecordBatchCountAsync(); + } + + public ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken = default) + { + return Implementation.ReadRecordBatchAsync(index, cancellationToken); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs new file mode 100644 index 000000000..36cd4ddf9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs @@ -0,0 +1,308 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.Buffers; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + internal sealed class ArrowFileReaderImplementation : ArrowStreamReaderImplementation + { + public bool IsFileValid { get; private set; } + + /// <summary> + /// When using GetNextRecordBatch this value + /// is to remember what index is next + /// </summary> + private int _recordBatchIndex; + + private ArrowFooter _footer; + + public ArrowFileReaderImplementation(Stream stream, MemoryAllocator allocator, bool leaveOpen) + : base(stream, allocator, leaveOpen) + { + } + + public async ValueTask<int> RecordBatchCountAsync() + { + if (!HasReadSchema) + { + await ReadSchemaAsync().ConfigureAwait(false); + } + + return _footer.RecordBatchCount; + } + + protected override async ValueTask ReadSchemaAsync() + { + if (HasReadSchema) + { + return; + } + + await ValidateFileAsync().ConfigureAwait(false); + + int footerLength = 0; + await ArrayPool<byte>.Shared.RentReturnAsync(4, async (buffer) => + { + BaseStream.Position = GetFooterLengthPosition(); + + int bytesRead = await BaseStream.ReadFullBufferAsync(buffer).ConfigureAwait(false); + EnsureFullRead(buffer, bytesRead); + + footerLength = ReadFooterLength(buffer); + }).ConfigureAwait(false); + + await ArrayPool<byte>.Shared.RentReturnAsync(footerLength, async (buffer) => + { + long footerStartPosition = GetFooterLengthPosition() - footerLength; + + BaseStream.Position = footerStartPosition; + + int bytesRead = await BaseStream.ReadFullBufferAsync(buffer).ConfigureAwait(false); + EnsureFullRead(buffer, bytesRead); + + ReadSchema(buffer); + }).ConfigureAwait(false); + } + + protected override void ReadSchema() + { + if (HasReadSchema) + { + return; + } + + ValidateFile(); + + int footerLength = 0; + ArrayPool<byte>.Shared.RentReturn(4, (buffer) => + { + BaseStream.Position = GetFooterLengthPosition(); + + int bytesRead = BaseStream.ReadFullBuffer(buffer); + EnsureFullRead(buffer, bytesRead); + + footerLength = ReadFooterLength(buffer); + }); + + ArrayPool<byte>.Shared.RentReturn(footerLength, (buffer) => + { + long footerStartPosition = GetFooterLengthPosition() - footerLength; + + BaseStream.Position = footerStartPosition; + + int bytesRead = BaseStream.ReadFullBuffer(buffer); + EnsureFullRead(buffer, bytesRead); + + ReadSchema(buffer); + }); + } + + private long GetFooterLengthPosition() + { + return BaseStream.Length - ArrowFileConstants.Magic.Length - 4; + } + + private static int ReadFooterLength(Memory<byte> buffer) + { + int footerLength = BitUtility.ReadInt32(buffer); + + if (footerLength <= 0) + throw new InvalidDataException( + $"Footer length has invalid size <{footerLength}>"); + + return footerLength; + } + + private void ReadSchema(Memory<byte> buffer) + { + // Deserialize the footer from the footer flatbuffer + _footer = new ArrowFooter(Flatbuf.Footer.GetRootAsFooter(CreateByteBuffer(buffer)), ref _dictionaryMemo); + + Schema = _footer.Schema; + } + + public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken) + { + await ReadSchemaAsync().ConfigureAwait(false); + + if (index >= _footer.RecordBatchCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + Block block = _footer.GetRecordBatchBlock(index); + + BaseStream.Position = block.Offset; + + return await ReadRecordBatchAsync(cancellationToken).ConfigureAwait(false); + } + + public RecordBatch ReadRecordBatch(int index) + { + ReadSchema(); + + if (index >= _footer.RecordBatchCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + Block block = _footer.GetRecordBatchBlock(index); + + BaseStream.Position = block.Offset; + + return ReadRecordBatch(); + } + + public override async ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken) + { + await ReadSchemaAsync().ConfigureAwait(false); + + if (_recordBatchIndex >= _footer.RecordBatchCount) + { + return null; + } + + RecordBatch result = await ReadRecordBatchAsync(_recordBatchIndex, cancellationToken).ConfigureAwait(false); + _recordBatchIndex++; + + return result; + } + + public override RecordBatch ReadNextRecordBatch() + { + ReadSchema(); + + if (_recordBatchIndex >= _footer.RecordBatchCount) + { + return null; + } + + RecordBatch result = ReadRecordBatch(_recordBatchIndex); + _recordBatchIndex++; + + return result; + } + + /// <summary> + /// Check if file format is valid. If it's valid don't run the validation again. + /// </summary> + private async ValueTask ValidateFileAsync() + { + if (IsFileValid) + { + return; + } + + await ValidateMagicAsync().ConfigureAwait(false); + + IsFileValid = true; + } + + /// <summary> + /// Check if file format is valid. If it's valid don't run the validation again. + /// </summary> + private void ValidateFile() + { + if (IsFileValid) + { + return; + } + + ValidateMagic(); + + IsFileValid = true; + } + + private async ValueTask ValidateMagicAsync() + { + long startingPosition = BaseStream.Position; + int magicLength = ArrowFileConstants.Magic.Length; + + try + { + await ArrayPool<byte>.Shared.RentReturnAsync(magicLength, async (buffer) => + { + // Seek to the beginning of the stream + BaseStream.Position = 0; + + // Read beginning of stream + await BaseStream.ReadAsync(buffer).ConfigureAwait(false); + + VerifyMagic(buffer); + + // Move stream position to magic-length bytes away from the end of the stream + BaseStream.Position = BaseStream.Length - magicLength; + + // Read the end of the stream + await BaseStream.ReadAsync(buffer).ConfigureAwait(false); + + VerifyMagic(buffer); + }).ConfigureAwait(false); + } + finally + { + BaseStream.Position = startingPosition; + } + } + + private void ValidateMagic() + { + long startingPosition = BaseStream.Position; + int magicLength = ArrowFileConstants.Magic.Length; + + try + { + ArrayPool<byte>.Shared.RentReturn(magicLength, buffer => + { + // Seek to the beginning of the stream + BaseStream.Position = 0; + + // Read beginning of stream + BaseStream.Read(buffer); + + VerifyMagic(buffer); + + // Move stream position to magic-length bytes away from the end of the stream + BaseStream.Position = BaseStream.Length - magicLength; + + // Read the end of the stream + BaseStream.Read(buffer); + + VerifyMagic(buffer); + }); + } + finally + { + BaseStream.Position = startingPosition; + } + } + + private void VerifyMagic(Memory<byte> buffer) + { + if (!ArrowFileConstants.Magic.AsSpan().SequenceEqual(buffer.Span)) + { + throw new InvalidDataException( + $"Invalid magic at offset <{BaseStream.Position}>"); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs new file mode 100644 index 000000000..3d8171503 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs @@ -0,0 +1,273 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + public class ArrowFileWriter: ArrowStreamWriter + { + private long _currentRecordBatchOffset = -1; + + private List<Block> RecordBatchBlocks { get; } + + public ArrowFileWriter(Stream stream, Schema schema) + : this(stream, schema, leaveOpen: false) + { + } + + public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen) + : this(stream, schema, leaveOpen, options: null) + { + } + + public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options) + : base(stream, schema, leaveOpen, options) + { + if (!stream.CanWrite) + { + throw new ArgumentException("stream must be writable", nameof(stream)); + } + + // TODO: Remove seek requirement + + if (!stream.CanSeek) + { + throw new ArgumentException("stream must be seekable", nameof(stream)); + } + + RecordBatchBlocks = new List<Block>(); + } + + public override void WriteRecordBatch(RecordBatch recordBatch) + { + // TODO: Compare record batch schema + + WriteStart(); + + WriteRecordBatchInternal(recordBatch); + } + + public override async Task WriteRecordBatchAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) + { + // TODO: Compare record batch schema + + await WriteStartAsync(cancellationToken).ConfigureAwait(false); + + cancellationToken.ThrowIfCancellationRequested(); + + await WriteRecordBatchInternalAsync(recordBatch, cancellationToken) + .ConfigureAwait(false); + } + + private protected override void StartingWritingRecordBatch() + { + _currentRecordBatchOffset = BaseStream.Position; + } + + private protected override void FinishedWritingRecordBatch(long bodyLength, long metadataLength) + { + // Record batches only appear after a Schema is written, so the record batch offsets must + // always be greater than 0. + Debug.Assert(_currentRecordBatchOffset > 0, "_currentRecordBatchOffset must be positive."); + + int metadataLengthInt = checked((int)metadataLength); + + Debug.Assert(BitUtility.IsMultipleOf8(_currentRecordBatchOffset)); + Debug.Assert(BitUtility.IsMultipleOf8(metadataLengthInt)); + Debug.Assert(BitUtility.IsMultipleOf8(bodyLength)); + + var block = new Block( + offset: _currentRecordBatchOffset, + length: bodyLength, + metadataLength: metadataLengthInt); + + RecordBatchBlocks.Add(block); + + _currentRecordBatchOffset = -1; + } + + private protected override void WriteEndInternal() + { + base.WriteEndInternal(); + + WriteFooter(Schema); + } + + private protected override async ValueTask WriteEndInternalAsync(CancellationToken cancellationToken) + { + await base.WriteEndInternalAsync(cancellationToken); + + await WriteFooterAsync(Schema, cancellationToken); + } + + private protected override void WriteStartInternal() + { + // Write magic number and empty padding up to the 8-byte boundary + + WriteMagic(); + WritePadding(CalculatePadding(ArrowFileConstants.Magic.Length)); + } + + private protected async override ValueTask WriteStartInternalAsync(CancellationToken cancellationToken) + { + // Write magic number and empty padding up to the 8-byte boundary + + await WriteMagicAsync(cancellationToken).ConfigureAwait(false); + await WritePaddingAsync(CalculatePadding(ArrowFileConstants.Magic.Length)) + .ConfigureAwait(false); + } + + private void WriteFooter(Schema schema) + { + Builder.Clear(); + + long offset = BaseStream.Position; + + // Serialize the schema + + FlatBuffers.Offset<Flatbuf.Schema> schemaOffset = SerializeSchema(schema); + + // Serialize all record batches + + Flatbuf.Footer.StartRecordBatchesVector(Builder, RecordBatchBlocks.Count); + + // flatbuffer struct vectors have to be created in reverse order + for (int i = RecordBatchBlocks.Count - 1; i >= 0; i--) + { + Block recordBatch = RecordBatchBlocks[i]; + Flatbuf.Block.CreateBlock( + Builder, recordBatch.Offset, recordBatch.MetadataLength, recordBatch.BodyLength); + } + + FlatBuffers.VectorOffset recordBatchesVectorOffset = Builder.EndVector(); + + // Serialize all dictionaries + // NOTE: Currently unsupported. + + Flatbuf.Footer.StartDictionariesVector(Builder, 0); + + FlatBuffers.VectorOffset dictionaryBatchesOffset = Builder.EndVector(); + + // Serialize and write the footer flatbuffer + + FlatBuffers.Offset<Flatbuf.Footer> footerOffset = Flatbuf.Footer.CreateFooter(Builder, CurrentMetadataVersion, + schemaOffset, dictionaryBatchesOffset, recordBatchesVectorOffset); + + Builder.Finish(footerOffset.Value); + + WriteFlatBuffer(); + + // Write footer length + + Buffers.RentReturn(4, (buffer) => + { + int footerLength; + checked + { + footerLength = (int)(BaseStream.Position - offset); + } + + BinaryPrimitives.WriteInt32LittleEndian(buffer.Span, footerLength); + + BaseStream.Write(buffer); + }); + + // Write magic + + WriteMagic(); + } + + private async Task WriteFooterAsync(Schema schema, CancellationToken cancellationToken) + { + Builder.Clear(); + + long offset = BaseStream.Position; + + // Serialize the schema + + FlatBuffers.Offset<Flatbuf.Schema> schemaOffset = SerializeSchema(schema); + + // Serialize all record batches + + Flatbuf.Footer.StartRecordBatchesVector(Builder, RecordBatchBlocks.Count); + + // flatbuffer struct vectors have to be created in reverse order + for (int i = RecordBatchBlocks.Count - 1; i >= 0; i--) + { + Block recordBatch = RecordBatchBlocks[i]; + Flatbuf.Block.CreateBlock( + Builder, recordBatch.Offset, recordBatch.MetadataLength, recordBatch.BodyLength); + } + + FlatBuffers.VectorOffset recordBatchesVectorOffset = Builder.EndVector(); + + // Serialize all dictionaries + // NOTE: Currently unsupported. + + Flatbuf.Footer.StartDictionariesVector(Builder, 0); + + FlatBuffers.VectorOffset dictionaryBatchesOffset = Builder.EndVector(); + + // Serialize and write the footer flatbuffer + + FlatBuffers.Offset<Flatbuf.Footer> footerOffset = Flatbuf.Footer.CreateFooter(Builder, CurrentMetadataVersion, + schemaOffset, dictionaryBatchesOffset, recordBatchesVectorOffset); + + Builder.Finish(footerOffset.Value); + + cancellationToken.ThrowIfCancellationRequested(); + + await WriteFlatBufferAsync(cancellationToken).ConfigureAwait(false); + + // Write footer length + + cancellationToken.ThrowIfCancellationRequested(); + + await Buffers.RentReturnAsync(4, async (buffer) => + { + int footerLength; + checked + { + footerLength = (int)(BaseStream.Position - offset); + } + + BinaryPrimitives.WriteInt32LittleEndian(buffer.Span, footerLength); + + await BaseStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + }).ConfigureAwait(false); + + // Write magic + + await WriteMagicAsync(cancellationToken).ConfigureAwait(false); + } + + private void WriteMagic() + { + BaseStream.Write(ArrowFileConstants.Magic); + } + + private ValueTask WriteMagicAsync(CancellationToken cancellationToken) + { + return BaseStream.WriteAsync(ArrowFileConstants.Magic, cancellationToken); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs new file mode 100644 index 000000000..db269ae01 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Apache.Arrow.Ipc +{ + internal class ArrowFooter + { + public Schema Schema { get; } + private readonly List<Block> _dictionaries; + private readonly List<Block> _recordBatches; + + public IEnumerable<Block> Dictionaries => _dictionaries; + public IEnumerable<Block> RecordBatches => _recordBatches; + + public Block GetRecordBatchBlock(int i) => _recordBatches[i]; + + public Block GetDictionaryBlock(int i) => _dictionaries[i]; + + public int RecordBatchCount => _recordBatches.Count; + public int DictionaryCount => _dictionaries.Count; + + public ArrowFooter(Schema schema, IEnumerable<Block> dictionaries, IEnumerable<Block> recordBatches) + { + Schema = schema; + + _dictionaries = dictionaries.ToList(); + _recordBatches = recordBatches.ToList(); + +#if DEBUG + for (int i = 0; i < _dictionaries.Count; i++) + { + Block block = _dictionaries[i]; + Debug.Assert(BitUtility.IsMultipleOf8(block.Offset)); + Debug.Assert(BitUtility.IsMultipleOf8(block.MetadataLength)); + Debug.Assert(BitUtility.IsMultipleOf8(block.BodyLength)); + } + + for (int i = 0; i < _recordBatches.Count; i++) + { + Block block = _recordBatches[i]; + Debug.Assert(BitUtility.IsMultipleOf8(block.Offset)); + Debug.Assert(BitUtility.IsMultipleOf8(block.MetadataLength)); + Debug.Assert(BitUtility.IsMultipleOf8(block.BodyLength)); + } +#endif + } + + public ArrowFooter(Flatbuf.Footer footer, ref DictionaryMemo dictionaryMemo) + : this(Ipc.MessageSerializer.GetSchema(footer.Schema.GetValueOrDefault(), ref dictionaryMemo), GetDictionaries(footer), + GetRecordBatches(footer)) + { } + + private static IEnumerable<Block> GetDictionaries(Flatbuf.Footer footer) + { + for (int i = 0; i < footer.DictionariesLength; i++) + { + Flatbuf.Block? block = footer.Dictionaries(i); + + if (block.HasValue) + { + yield return new Block(block.Value); + } + } + } + + private static IEnumerable<Block> GetRecordBatches(Flatbuf.Footer footer) + { + for (int i = 0; i < footer.RecordBatchesLength; i++) + { + Flatbuf.Block? block = footer.RecordBatches(i); + + if (block.HasValue) + { + yield return new Block(block.Value); + } + } + } + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs new file mode 100644 index 000000000..9e3db0ec3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Flatbuf; +using FlatBuffers; +using System; +using System.Buffers.Binary; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + internal sealed class ArrowMemoryReaderImplementation : ArrowReaderImplementation + { + private readonly ReadOnlyMemory<byte> _buffer; + private int _bufferPosition; + + public ArrowMemoryReaderImplementation(ReadOnlyMemory<byte> buffer) : base() + { + _buffer = buffer; + } + + public override ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken) + { + cancellationToken.ThrowIfCancellationRequested(); + return new ValueTask<RecordBatch>(ReadNextRecordBatch()); + } + + public override RecordBatch ReadNextRecordBatch() + { + ReadSchema(); + + if (_buffer.Length <= _bufferPosition + sizeof(int)) + { + // reached the end + return null; + } + + // Get Length of record batch for message header. + int messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition)); + _bufferPosition += sizeof(int); + + if (messageLength == 0) + { + //reached the end + return null; + } + else if (messageLength == MessageSerializer.IpcContinuationToken) + { + // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length + if (_buffer.Length <= _bufferPosition + sizeof(int)) + { + throw new InvalidDataException("Corrupted IPC message. Received a continuation token at the end of the message."); + } + + messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition)); + _bufferPosition += sizeof(int); + + if (messageLength == 0) + { + //reached the end + return null; + } + } + + Message message = Message.GetRootAsMessage( + CreateByteBuffer(_buffer.Slice(_bufferPosition, messageLength))); + _bufferPosition += messageLength; + + int bodyLength = (int)message.BodyLength; + ByteBuffer bodybb = CreateByteBuffer(_buffer.Slice(_bufferPosition, bodyLength)); + _bufferPosition += bodyLength; + + return CreateArrowObjectFromMessage(message, bodybb, memoryOwner: null); + } + + private void ReadSchema() + { + if (HasReadSchema) + { + return; + } + + // Figure out length of schema + int schemaMessageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition)); + _bufferPosition += sizeof(int); + + if (schemaMessageLength == MessageSerializer.IpcContinuationToken) + { + // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length + if (_buffer.Length <= _bufferPosition + sizeof(int)) + { + throw new InvalidDataException("Corrupted IPC message. Received a continuation token at the end of the message."); + } + + schemaMessageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition)); + _bufferPosition += sizeof(int); + } + + ByteBuffer schemaBuffer = CreateByteBuffer(_buffer.Slice(_bufferPosition)); + Schema = MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemaBuffer), ref _dictionaryMemo); + _bufferPosition += schemaMessageLength; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs new file mode 100644 index 000000000..35199477b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -0,0 +1,357 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using FlatBuffers; +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Types; +using Apache.Arrow.Memory; + +namespace Apache.Arrow.Ipc +{ + internal abstract class ArrowReaderImplementation : IDisposable + { + public Schema Schema { get; protected set; } + protected bool HasReadSchema => Schema != null; + + private protected DictionaryMemo _dictionaryMemo; + private protected DictionaryMemo DictionaryMemo => _dictionaryMemo ??= new DictionaryMemo(); + private protected readonly MemoryAllocator _allocator; + + private protected ArrowReaderImplementation() : this(null) + { } + + private protected ArrowReaderImplementation(MemoryAllocator allocator) + { + _allocator = allocator ?? MemoryAllocator.Default.Value; + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + } + + public abstract ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken); + public abstract RecordBatch ReadNextRecordBatch(); + + internal static T ReadMessage<T>(ByteBuffer bb) + where T : struct, IFlatbufferObject + { + Type returnType = typeof(T); + Flatbuf.Message msg = Flatbuf.Message.GetRootAsMessage(bb); + + if (MatchEnum(msg.HeaderType, returnType)) + { + return msg.Header<T>().Value; + } + else + { + throw new Exception($"Requested type '{returnType.Name}' " + + $"did not match type found at offset => '{msg.HeaderType}'"); + } + } + + private static bool MatchEnum(Flatbuf.MessageHeader messageHeader, Type flatBuffType) + { + switch (messageHeader) + { + case Flatbuf.MessageHeader.RecordBatch: + return flatBuffType == typeof(Flatbuf.RecordBatch); + case Flatbuf.MessageHeader.DictionaryBatch: + return flatBuffType == typeof(Flatbuf.DictionaryBatch); + case Flatbuf.MessageHeader.Schema: + return flatBuffType == typeof(Flatbuf.Schema); + case Flatbuf.MessageHeader.Tensor: + return flatBuffType == typeof(Flatbuf.Tensor); + case Flatbuf.MessageHeader.NONE: + throw new ArgumentException("MessageHeader NONE has no matching flatbuf types", nameof(messageHeader)); + default: + throw new ArgumentException($"Unexpected MessageHeader value", nameof(messageHeader)); + } + } + + /// <summary> + /// Create a record batch or dictionary batch from Flatbuf.Message. + /// </summary> + /// <remarks> + /// This method adds data to _dictionaryMemo and returns null when the message type is DictionaryBatch. + /// </remarks>> + /// <returns> + /// The record batch when the message type is RecordBatch. + /// Null when the message type is not RecordBatch. + /// </returns> + protected RecordBatch CreateArrowObjectFromMessage( + Flatbuf.Message message, ByteBuffer bodyByteBuffer, IMemoryOwner<byte> memoryOwner) + { + switch (message.HeaderType) + { + case Flatbuf.MessageHeader.Schema: + // TODO: Read schema and verify equality? + break; + case Flatbuf.MessageHeader.DictionaryBatch: + Flatbuf.DictionaryBatch dictionaryBatch = message.Header<Flatbuf.DictionaryBatch>().Value; + ReadDictionaryBatch(dictionaryBatch, bodyByteBuffer, memoryOwner); + break; + case Flatbuf.MessageHeader.RecordBatch: + Flatbuf.RecordBatch rb = message.Header<Flatbuf.RecordBatch>().Value; + List<IArrowArray> arrays = BuildArrays(Schema, bodyByteBuffer, rb); + return new RecordBatch(Schema, memoryOwner, arrays, (int)rb.Length); + default: + // NOTE: Skip unsupported message type + Debug.WriteLine($"Skipping unsupported message type '{message.HeaderType}'"); + break; + } + + return null; + } + + internal static ByteBuffer CreateByteBuffer(ReadOnlyMemory<byte> buffer) + { + return new ByteBuffer(new ReadOnlyMemoryBufferAllocator(buffer), 0); + } + + private void ReadDictionaryBatch(Flatbuf.DictionaryBatch dictionaryBatch, ByteBuffer bodyByteBuffer, IMemoryOwner<byte> memoryOwner) + { + long id = dictionaryBatch.Id; + IArrowType valueType = DictionaryMemo.GetDictionaryType(id); + Flatbuf.RecordBatch? recordBatch = dictionaryBatch.Data; + + if (!recordBatch.HasValue) + { + throw new InvalidDataException("Dictionary must contain RecordBatch"); + } + + Field valueField = new Field("dummy", valueType, true); + var schema = new Schema(new[] { valueField }, default); + IList<IArrowArray> arrays = BuildArrays(schema, bodyByteBuffer, recordBatch.Value); + + if (arrays.Count != 1) + { + throw new InvalidDataException("Dictionary record batch must contain only one field"); + } + + if (dictionaryBatch.IsDelta) + { + DictionaryMemo.AddDeltaDictionary(id, arrays[0], _allocator); + } + else + { + DictionaryMemo.AddOrReplaceDictionary(id, arrays[0]); + } + } + + private List<IArrowArray> BuildArrays( + Schema schema, + ByteBuffer messageBuffer, + Flatbuf.RecordBatch recordBatchMessage) + { + var arrays = new List<IArrowArray>(recordBatchMessage.NodesLength); + + if (recordBatchMessage.NodesLength == 0) + { + return arrays; + } + + var recordBatchEnumerator = new RecordBatchEnumerator(in recordBatchMessage); + int schemaFieldIndex = 0; + do + { + Field field = schema.GetFieldByIndex(schemaFieldIndex++); + Flatbuf.FieldNode fieldNode = recordBatchEnumerator.CurrentNode; + + ArrayData arrayData = field.DataType.IsFixedPrimitive() + ? LoadPrimitiveField(ref recordBatchEnumerator, field, in fieldNode, messageBuffer) + : LoadVariableField(ref recordBatchEnumerator, field, in fieldNode, messageBuffer); + + arrays.Add(ArrowArrayFactory.BuildArray(arrayData)); + } while (recordBatchEnumerator.MoveNextNode()); + + return arrays; + } + + private ArrayData LoadPrimitiveField( + ref RecordBatchEnumerator recordBatchEnumerator, + Field field, + in Flatbuf.FieldNode fieldNode, + ByteBuffer bodyData) + { + + ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } + + int fieldLength = (int)fieldNode.Length; + int fieldNullCount = (int)fieldNode.NullCount; + + if (fieldLength < 0) + { + throw new InvalidDataException("Field length must be >= 0"); // TODO:Localize exception message + } + + if (fieldNullCount < 0) + { + throw new InvalidDataException("Null count length must be >= 0"); // TODO:Localize exception message + } + + ArrowBuffer[] arrowBuff; + if (field.DataType.TypeId == ArrowTypeId.Struct) + { + arrowBuff = new[] { nullArrowBuffer }; + } + else + { + ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + recordBatchEnumerator.MoveNextBuffer(); + + arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; + } + + ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData); + + IArrowArray dictionary = null; + if (field.DataType.TypeId == ArrowTypeId.Dictionary) + { + long id = DictionaryMemo.GetId(field); + dictionary = DictionaryMemo.GetDictionary(id); + } + + return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data); + } + + private ArrayData LoadVariableField( + ref RecordBatchEnumerator recordBatchEnumerator, + Field field, + in Flatbuf.FieldNode fieldNode, + ByteBuffer bodyData) + { + + ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } + ArrowBuffer offsetArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } + ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + recordBatchEnumerator.MoveNextBuffer(); + + int fieldLength = (int)fieldNode.Length; + int fieldNullCount = (int)fieldNode.NullCount; + + if (fieldLength < 0) + { + throw new InvalidDataException("Field length must be >= 0"); // TODO: Localize exception message + } + + if (fieldNullCount < 0) + { + throw new InvalidDataException("Null count length must be >= 0"); //TODO: Localize exception message + } + + ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer, offsetArrowBuffer, valueArrowBuffer }; + ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData); + + IArrowArray dictionary = null; + if (field.DataType.TypeId == ArrowTypeId.Dictionary) + { + long id = DictionaryMemo.GetId(field); + dictionary = DictionaryMemo.GetDictionary(id); + } + + return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data); + } + + private ArrayData[] GetChildren( + ref RecordBatchEnumerator recordBatchEnumerator, + Field field, + ByteBuffer bodyData) + { + if (!(field.DataType is NestedType type)) return null; + + int childrenCount = type.Fields.Count; + var children = new ArrayData[childrenCount]; + for (int index = 0; index < childrenCount; index++) + { + recordBatchEnumerator.MoveNextNode(); + Flatbuf.FieldNode childFieldNode = recordBatchEnumerator.CurrentNode; + + Field childField = type.Fields[index]; + ArrayData child = childField.DataType.IsFixedPrimitive() + ? LoadPrimitiveField(ref recordBatchEnumerator, childField, in childFieldNode, bodyData) + : LoadVariableField(ref recordBatchEnumerator, childField, in childFieldNode, bodyData); + + children[index] = child; + } + return children; + } + + private ArrowBuffer BuildArrowBuffer(ByteBuffer bodyData, Flatbuf.Buffer buffer) + { + if (buffer.Length <= 0) + { + return ArrowBuffer.Empty; + } + + int offset = (int)buffer.Offset; + int length = (int)buffer.Length; + + var data = bodyData.ToReadOnlyMemory(offset, length); + return new ArrowBuffer(data); + } + } + + internal struct RecordBatchEnumerator + { + private Flatbuf.RecordBatch RecordBatch { get; } + internal int CurrentBufferIndex { get; private set; } + internal int CurrentNodeIndex { get; private set; } + + internal Flatbuf.Buffer CurrentBuffer => RecordBatch.Buffers(CurrentBufferIndex).GetValueOrDefault(); + + internal Flatbuf.FieldNode CurrentNode => RecordBatch.Nodes(CurrentNodeIndex).GetValueOrDefault(); + + internal bool MoveNextBuffer() + { + return ++CurrentBufferIndex < RecordBatch.BuffersLength; + } + + internal bool MoveNextNode() + { + return ++CurrentNodeIndex < RecordBatch.NodesLength; + } + + internal RecordBatchEnumerator(in Flatbuf.RecordBatch recordBatch) + { + RecordBatch = recordBatch; + CurrentBufferIndex = 0; + CurrentNodeIndex = 0; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs new file mode 100644 index 000000000..5e7d7befb --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + /// <summary> + /// Represents a reader that can read Arrow streams. + /// </summary> + public class ArrowStreamReader : IArrowReader, IDisposable + { + private protected readonly ArrowReaderImplementation _implementation; + + public Schema Schema => _implementation.Schema; + + public ArrowStreamReader(Stream stream) + : this(stream, allocator: null, leaveOpen: false) + { + } + + public ArrowStreamReader(Stream stream, MemoryAllocator allocator) + : this(stream, allocator, leaveOpen: false) + { + } + + public ArrowStreamReader(Stream stream, bool leaveOpen) + : this(stream, allocator: null, leaveOpen) + { + } + + public ArrowStreamReader(Stream stream, MemoryAllocator allocator, bool leaveOpen) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + + _implementation = new ArrowStreamReaderImplementation(stream, allocator, leaveOpen); + } + + public ArrowStreamReader(ReadOnlyMemory<byte> buffer) + { + _implementation = new ArrowMemoryReaderImplementation(buffer); + } + + private protected ArrowStreamReader(ArrowReaderImplementation implementation) + { + _implementation = implementation; + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _implementation.Dispose(); + } + } + + public ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) + { + return _implementation.ReadNextRecordBatchAsync(cancellationToken); + } + + public RecordBatch ReadNextRecordBatch() + { + return _implementation.ReadNextRecordBatch(); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs new file mode 100644 index 000000000..ffbe95669 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System; +using System.Buffers; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + internal class ArrowStreamReaderImplementation : ArrowReaderImplementation + { + public Stream BaseStream { get; } + private readonly bool _leaveOpen; + + public ArrowStreamReaderImplementation(Stream stream, MemoryAllocator allocator, bool leaveOpen) : base(allocator) + { + BaseStream = stream; + _leaveOpen = leaveOpen; + } + + protected override void Dispose(bool disposing) + { + if (disposing && !_leaveOpen) + { + BaseStream.Dispose(); + } + } + + public override async ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken) + { + // TODO: Loop until a record batch is read. + cancellationToken.ThrowIfCancellationRequested(); + return await ReadRecordBatchAsync(cancellationToken).ConfigureAwait(false); + } + + public override RecordBatch ReadNextRecordBatch() + { + return ReadRecordBatch(); + } + + protected async ValueTask<RecordBatch> ReadRecordBatchAsync(CancellationToken cancellationToken = default) + { + await ReadSchemaAsync().ConfigureAwait(false); + + RecordBatch result = null; + + while (result == null) + { + int messageLength = await ReadMessageLengthAsync(throwOnFullRead: false, cancellationToken) + .ConfigureAwait(false); + + if (messageLength == 0) + { + // reached end + return null; + } + + await ArrayPool<byte>.Shared.RentReturnAsync(messageLength, async (messageBuff) => + { + int bytesRead = await BaseStream.ReadFullBufferAsync(messageBuff, cancellationToken) + .ConfigureAwait(false); + EnsureFullRead(messageBuff, bytesRead); + + Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff)); + + int bodyLength = checked((int)message.BodyLength); + + IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength); + Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength); + bytesRead = await BaseStream.ReadFullBufferAsync(bodyBuff, cancellationToken) + .ConfigureAwait(false); + EnsureFullRead(bodyBuff, bytesRead); + + FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff); + result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner); + }).ConfigureAwait(false); + } + + return result; + } + + protected RecordBatch ReadRecordBatch() + { + ReadSchema(); + + RecordBatch result = null; + + while (result == null) + { + int messageLength = ReadMessageLength(throwOnFullRead: false); + + if (messageLength == 0) + { + // reached end + return null; + } + + ArrayPool<byte>.Shared.RentReturn(messageLength, messageBuff => + { + int bytesRead = BaseStream.ReadFullBuffer(messageBuff); + EnsureFullRead(messageBuff, bytesRead); + + Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff)); + + int bodyLength = checked((int)message.BodyLength); + + IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength); + Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength); + bytesRead = BaseStream.ReadFullBuffer(bodyBuff); + EnsureFullRead(bodyBuff, bytesRead); + + FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff); + result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner); + }); + } + + return result; + } + + protected virtual async ValueTask ReadSchemaAsync() + { + if (HasReadSchema) + { + return; + } + + // Figure out length of schema + int schemaMessageLength = await ReadMessageLengthAsync(throwOnFullRead: true) + .ConfigureAwait(false); + + await ArrayPool<byte>.Shared.RentReturnAsync(schemaMessageLength, async (buff) => + { + // Read in schema + int bytesRead = await BaseStream.ReadFullBufferAsync(buff).ConfigureAwait(false); + EnsureFullRead(buff, bytesRead); + + FlatBuffers.ByteBuffer schemabb = CreateByteBuffer(buff); + Schema = MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref _dictionaryMemo); + }).ConfigureAwait(false); + } + + protected virtual void ReadSchema() + { + if (HasReadSchema) + { + return; + } + + // Figure out length of schema + int schemaMessageLength = ReadMessageLength(throwOnFullRead: true); + + ArrayPool<byte>.Shared.RentReturn(schemaMessageLength, buff => + { + int bytesRead = BaseStream.ReadFullBuffer(buff); + EnsureFullRead(buff, bytesRead); + + FlatBuffers.ByteBuffer schemabb = CreateByteBuffer(buff); + Schema = MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref _dictionaryMemo); + }); + } + + private async ValueTask<int> ReadMessageLengthAsync(bool throwOnFullRead, CancellationToken cancellationToken = default) + { + int messageLength = 0; + await ArrayPool<byte>.Shared.RentReturnAsync(4, async (lengthBuffer) => + { + int bytesRead = await BaseStream.ReadFullBufferAsync(lengthBuffer, cancellationToken) + .ConfigureAwait(false); + if (throwOnFullRead) + { + EnsureFullRead(lengthBuffer, bytesRead); + } + else if (bytesRead != 4) + { + return; + } + + messageLength = BitUtility.ReadInt32(lengthBuffer); + + // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length + if (messageLength == MessageSerializer.IpcContinuationToken) + { + bytesRead = await BaseStream.ReadFullBufferAsync(lengthBuffer, cancellationToken) + .ConfigureAwait(false); + if (throwOnFullRead) + { + EnsureFullRead(lengthBuffer, bytesRead); + } + else if (bytesRead != 4) + { + messageLength = 0; + return; + } + + messageLength = BitUtility.ReadInt32(lengthBuffer); + } + }).ConfigureAwait(false); + + return messageLength; + } + + private int ReadMessageLength(bool throwOnFullRead) + { + int messageLength = 0; + ArrayPool<byte>.Shared.RentReturn(4, lengthBuffer => + { + int bytesRead = BaseStream.ReadFullBuffer(lengthBuffer); + if (throwOnFullRead) + { + EnsureFullRead(lengthBuffer, bytesRead); + } + else if (bytesRead != 4) + { + return; + } + + messageLength = BitUtility.ReadInt32(lengthBuffer); + + // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length + if (messageLength == MessageSerializer.IpcContinuationToken) + { + bytesRead = BaseStream.ReadFullBuffer(lengthBuffer); + if (throwOnFullRead) + { + EnsureFullRead(lengthBuffer, bytesRead); + } + else if (bytesRead != 4) + { + messageLength = 0; + return; + } + + messageLength = BitUtility.ReadInt32(lengthBuffer); + } + }); + + return messageLength; + } + + /// <summary> + /// Ensures the number of bytes read matches the buffer length + /// and throws an exception it if doesn't. This ensures we have read + /// a full buffer from the stream. + /// </summary> + internal static void EnsureFullRead(Memory<byte> buffer, int bytesRead) + { + if (bytesRead != buffer.Length) + { + throw new InvalidOperationException("Unexpectedly reached the end of the stream before a full buffer was read."); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs new file mode 100644 index 000000000..ec1ad31f3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -0,0 +1,982 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; +using FlatBuffers; + +namespace Apache.Arrow.Ipc +{ + public class ArrowStreamWriter : IDisposable + { + internal class ArrowRecordBatchFlatBufferBuilder : + IArrowArrayVisitor<Int8Array>, + IArrowArrayVisitor<Int16Array>, + IArrowArrayVisitor<Int32Array>, + IArrowArrayVisitor<Int64Array>, + IArrowArrayVisitor<UInt8Array>, + IArrowArrayVisitor<UInt16Array>, + IArrowArrayVisitor<UInt32Array>, + IArrowArrayVisitor<UInt64Array>, + IArrowArrayVisitor<FloatArray>, + IArrowArrayVisitor<DoubleArray>, + IArrowArrayVisitor<BooleanArray>, + IArrowArrayVisitor<TimestampArray>, + IArrowArrayVisitor<Date32Array>, + IArrowArrayVisitor<Date64Array>, + IArrowArrayVisitor<ListArray>, + IArrowArrayVisitor<StringArray>, + IArrowArrayVisitor<BinaryArray>, + IArrowArrayVisitor<FixedSizeBinaryArray>, + IArrowArrayVisitor<StructArray>, + IArrowArrayVisitor<Decimal128Array>, + IArrowArrayVisitor<Decimal256Array>, + IArrowArrayVisitor<DictionaryArray> + { + public readonly struct Buffer + { + public readonly ArrowBuffer DataBuffer; + public readonly int Offset; + + public Buffer(ArrowBuffer buffer, int offset) + { + DataBuffer = buffer; + Offset = offset; + } + } + + private readonly List<Buffer> _buffers; + + public IReadOnlyList<Buffer> Buffers => _buffers; + + public int TotalLength { get; private set; } + + public ArrowRecordBatchFlatBufferBuilder() + { + _buffers = new List<Buffer>(); + TotalLength = 0; + } + + public void Visit(Int8Array array) => CreateBuffers(array); + public void Visit(Int16Array array) => CreateBuffers(array); + public void Visit(Int32Array array) => CreateBuffers(array); + public void Visit(Int64Array array) => CreateBuffers(array); + public void Visit(UInt8Array array) => CreateBuffers(array); + public void Visit(UInt16Array array) => CreateBuffers(array); + public void Visit(UInt32Array array) => CreateBuffers(array); + public void Visit(UInt64Array array) => CreateBuffers(array); + public void Visit(FloatArray array) => CreateBuffers(array); + public void Visit(DoubleArray array) => CreateBuffers(array); + public void Visit(TimestampArray array) => CreateBuffers(array); + public void Visit(BooleanArray array) => CreateBuffers(array); + public void Visit(Date32Array array) => CreateBuffers(array); + public void Visit(Date64Array array) => CreateBuffers(array); + + public void Visit(ListArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer)); + + array.Values.Accept(this); + } + + public void Visit(StringArray array) => Visit(array as BinaryArray); + + public void Visit(BinaryArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + public void Visit(FixedSizeBinaryArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + public void Visit(Decimal128Array array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + public void Visit(Decimal256Array array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + public void Visit(StructArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + + for (int i = 0; i < array.Fields.Count; i++) + { + array.Fields[i].Accept(this); + } + } + + public void Visit(DictionaryArray array) + { + // Dictionary is serialized separately in Dictionary serialization. + // We are only interested in indices at this context. + + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.IndicesBuffer)); + } + + private void CreateBuffers(BooleanArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + private void CreateBuffers<T>(PrimitiveArray<T> array) + where T : struct + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBuffer(array.ValueBuffer)); + } + + private Buffer CreateBuffer(ArrowBuffer buffer) + { + int offset = TotalLength; + + int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); + TotalLength += paddedLength; + + return new Buffer(buffer, offset); + } + + public void Visit(IArrowArray array) + { + throw new NotImplementedException(); + } + } + + protected Stream BaseStream { get; } + + protected ArrayPool<byte> Buffers { get; } + + private protected FlatBufferBuilder Builder { get; } + + protected bool HasWrittenSchema { get; set; } + + private bool HasWrittenDictionaryBatch { get; set; } + + private bool HasWrittenStart { get; set; } + + private bool HasWrittenEnd { get; set; } + + protected Schema Schema { get; } + + private readonly bool _leaveOpen; + private readonly IpcOptions _options; + + private protected const Flatbuf.MetadataVersion CurrentMetadataVersion = Flatbuf.MetadataVersion.V4; + + private static readonly byte[] s_padding = new byte[64]; + + private readonly ArrowTypeFlatbufferBuilder _fieldTypeBuilder; + + private DictionaryMemo _dictionaryMemo; + private DictionaryMemo DictionaryMemo => _dictionaryMemo ??= new DictionaryMemo(); + + public ArrowStreamWriter(Stream baseStream, Schema schema) + : this(baseStream, schema, leaveOpen: false) + { + } + + public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen) + : this(baseStream, schema, leaveOpen, options: null) + { + } + + public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOptions options) + { + BaseStream = baseStream ?? throw new ArgumentNullException(nameof(baseStream)); + Schema = schema ?? throw new ArgumentNullException(nameof(schema)); + _leaveOpen = leaveOpen; + + Buffers = ArrayPool<byte>.Create(); + Builder = new FlatBufferBuilder(1024); + HasWrittenSchema = false; + + _fieldTypeBuilder = new ArrowTypeFlatbufferBuilder(Builder); + _options = options ?? IpcOptions.Default; + } + + + private void CreateSelfAndChildrenFieldNodes(ArrayData data) + { + if (data.DataType is NestedType) + { + // flatbuffer struct vectors have to be created in reverse order + for (int i = data.Children.Length - 1; i >= 0; i--) + { + CreateSelfAndChildrenFieldNodes(data.Children[i]); + } + } + Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.NullCount); + } + + private static int CountAllNodes(IReadOnlyDictionary<string, Field> fields) + { + int count = 0; + foreach (Field arrowArray in fields.Values) + { + CountSelfAndChildrenNodes(arrowArray.DataType, ref count); + } + return count; + } + + private static void CountSelfAndChildrenNodes(IArrowType type, ref int count) + { + if (type is NestedType nestedType) + { + foreach (Field childField in nestedType.Fields) + { + CountSelfAndChildrenNodes(childField.DataType, ref count); + } + } + count++; + } + + private protected void WriteRecordBatchInternal(RecordBatch recordBatch) + { + // TODO: Truncate buffers with extraneous padding / unused capacity + + if (!HasWrittenSchema) + { + WriteSchema(Schema); + HasWrittenSchema = true; + } + + if (!HasWrittenDictionaryBatch) + { + DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo); + WriteDictionaries(recordBatch); + HasWrittenDictionaryBatch = true; + } + + (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) = + PreparingWritingRecordBatch(recordBatch); + + VectorOffset buffersVectorOffset = Builder.EndVector(); + + // Serialize record batch + + StartingWritingRecordBatch(); + + Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, + fieldNodesVectorOffset, + buffersVectorOffset); + + long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch, + recordBatchOffset, recordBatchBuilder.TotalLength); + + long bufferLength = WriteBufferData(recordBatchBuilder.Buffers); + + FinishedWritingRecordBatch(bufferLength, metadataLength); + } + + private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch, + CancellationToken cancellationToken = default) + { + // TODO: Truncate buffers with extraneous padding / unused capacity + + if (!HasWrittenSchema) + { + await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); + HasWrittenSchema = true; + } + + if (!HasWrittenDictionaryBatch) + { + DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo); + await WriteDictionariesAsync(recordBatch, cancellationToken).ConfigureAwait(false); + HasWrittenDictionaryBatch = true; + } + + (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) = + PreparingWritingRecordBatch(recordBatch); + + VectorOffset buffersVectorOffset = Builder.EndVector(); + + // Serialize record batch + + StartingWritingRecordBatch(); + + Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, + fieldNodesVectorOffset, + buffersVectorOffset); + + long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch, + recordBatchOffset, recordBatchBuilder.TotalLength, + cancellationToken).ConfigureAwait(false); + + long bufferLength = await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false); + + FinishedWritingRecordBatch(bufferLength, metadataLength); + } + + private long WriteBufferData(IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers) + { + long bodyLength = 0; + + for (int i = 0; i < buffers.Count; i++) + { + ArrowBuffer buffer = buffers[i].DataBuffer; + if (buffer.IsEmpty) + continue; + + WriteBuffer(buffer); + + int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); + int padding = paddedLength - buffer.Length; + if (padding > 0) + { + WritePadding(padding); + } + + bodyLength += paddedLength; + } + + // Write padding so the record batch message body length is a multiple of 8 bytes + + int bodyPaddingLength = CalculatePadding(bodyLength); + + WritePadding(bodyPaddingLength); + + return bodyLength + bodyPaddingLength; + } + + private async ValueTask<long> WriteBufferDataAsync(IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers, CancellationToken cancellationToken = default) + { + long bodyLength = 0; + + for (int i = 0; i < buffers.Count; i++) + { + ArrowBuffer buffer = buffers[i].DataBuffer; + if (buffer.IsEmpty) + continue; + + await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false); + + int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); + int padding = paddedLength - buffer.Length; + if (padding > 0) + { + await WritePaddingAsync(padding).ConfigureAwait(false); + } + + bodyLength += paddedLength; + } + + // Write padding so the record batch message body length is a multiple of 8 bytes + + int bodyPaddingLength = CalculatePadding(bodyLength); + + await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false); + + return bodyLength + bodyPaddingLength; + } + + private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch) + { + return PreparingWritingRecordBatch(recordBatch.Schema.Fields, recordBatch.ArrayList); + } + + private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(IReadOnlyDictionary<string, Field> fields, IReadOnlyList<IArrowArray> arrays) + { + Builder.Clear(); + + // Serialize field nodes + + int fieldCount = fields.Count; + + Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes(fields)); + + // flatbuffer struct vectors have to be created in reverse order + for (int i = fieldCount - 1; i >= 0; i--) + { + CreateSelfAndChildrenFieldNodes(arrays[i].Data); + } + + VectorOffset fieldNodesVectorOffset = Builder.EndVector(); + + // Serialize buffers + + var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(); + for (int i = 0; i < fieldCount; i++) + { + IArrowArray fieldArray = arrays[i]; + fieldArray.Accept(recordBatchBuilder); + } + + IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers; + + Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); + + // flatbuffer struct vectors have to be created in reverse order + for (int i = buffers.Count - 1; i >= 0; i--) + { + Flatbuf.Buffer.CreateBuffer(Builder, + buffers[i].Offset, buffers[i].DataBuffer.Length); + } + + return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset); + } + + + private protected void WriteDictionaries(RecordBatch recordBatch) + { + foreach (Field field in recordBatch.Schema.Fields.Values) + { + WriteDictionary(field); + } + } + + private protected void WriteDictionary(Field field) + { + if (field.DataType.TypeId != ArrowTypeId.Dictionary) + { + if (field.DataType is NestedType nestedType) + { + foreach (Field child in nestedType.Fields) + { + WriteDictionary(child); + } + } + return; + } + + (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset) = + CreateDictionaryBatchOffset(field); + + WriteMessage(Flatbuf.MessageHeader.DictionaryBatch, + dictionaryBatchOffset, recordBatchBuilder.TotalLength); + + WriteBufferData(recordBatchBuilder.Buffers); + } + + private protected async Task WriteDictionariesAsync(RecordBatch recordBatch, CancellationToken cancellationToken) + { + foreach (Field field in recordBatch.Schema.Fields.Values) + { + await WriteDictionaryAsync(field, cancellationToken).ConfigureAwait(false); + } + } + + private protected async Task WriteDictionaryAsync(Field field, CancellationToken cancellationToken) + { + if (field.DataType.TypeId != ArrowTypeId.Dictionary) + { + if (field.DataType is NestedType nestedType) + { + foreach (Field child in nestedType.Fields) + { + await WriteDictionaryAsync(child, cancellationToken).ConfigureAwait(false); + } + } + return; + } + + (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset) = + CreateDictionaryBatchOffset(field); + + await WriteMessageAsync(Flatbuf.MessageHeader.DictionaryBatch, + dictionaryBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false); + + await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false); + } + + private Tuple<ArrowRecordBatchFlatBufferBuilder, Offset<Flatbuf.DictionaryBatch>> CreateDictionaryBatchOffset(Field field) + { + Field dictionaryField = new Field("dummy", ((DictionaryType)field.DataType).ValueType, false); + long id = DictionaryMemo.GetId(field); + IArrowArray dictionary = DictionaryMemo.GetDictionary(id); + + var fieldsDictionary = new Dictionary<string, Field> { + { dictionaryField.Name, dictionaryField } }; + + var arrays = new List<IArrowArray> { dictionary }; + + (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) = + PreparingWritingRecordBatch(fieldsDictionary, arrays); + + VectorOffset buffersVectorOffset = Builder.EndVector(); + + // Serialize record batch + Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length, + fieldNodesVectorOffset, + buffersVectorOffset); + + // TODO: Support delta. + Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset = Flatbuf.DictionaryBatch.CreateDictionaryBatch(Builder, id, recordBatchOffset, false); + return Tuple.Create(recordBatchBuilder, dictionaryBatchOffset); + } + + private protected virtual void WriteStartInternal() + { + if (!HasWrittenSchema) + { + WriteSchema(Schema); + HasWrittenSchema = true; + } + } + + private protected async virtual ValueTask WriteStartInternalAsync(CancellationToken cancellationToken) + { + if (!HasWrittenSchema) + { + await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); + HasWrittenSchema = true; + } + } + + private protected virtual void WriteEndInternal() + { + WriteIpcMessageLength(length: 0); + } + + private protected virtual ValueTask WriteEndInternalAsync(CancellationToken cancellationToken) + { + return WriteIpcMessageLengthAsync(length: 0, cancellationToken); + } + + private protected virtual void StartingWritingRecordBatch() + { + } + + private protected virtual void FinishedWritingRecordBatch(long bodyLength, long metadataLength) + { + } + + public virtual void WriteRecordBatch(RecordBatch recordBatch) + { + WriteRecordBatchInternal(recordBatch); + } + + public virtual Task WriteRecordBatchAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) + { + return WriteRecordBatchInternalAsync(recordBatch, cancellationToken); + } + + public void WriteStart() + { + if (!HasWrittenStart) + { + WriteStartInternal(); + HasWrittenStart = true; + } + } + + public async Task WriteStartAsync(CancellationToken cancellationToken = default) + { + if (!HasWrittenStart) + { + await WriteStartInternalAsync(cancellationToken); + HasWrittenStart = true; + } + } + + public void WriteEnd() + { + if (!HasWrittenEnd) + { + WriteEndInternal(); + HasWrittenEnd = true; + } + } + + public async Task WriteEndAsync(CancellationToken cancellationToken = default) + { + if (!HasWrittenEnd) + { + await WriteEndInternalAsync(cancellationToken); + HasWrittenEnd = true; + } + } + + private void WriteBuffer(ArrowBuffer arrowBuffer) + { + BaseStream.Write(arrowBuffer.Memory); + } + + private ValueTask WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default) + { + return BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken); + } + + private protected Offset<Flatbuf.Schema> SerializeSchema(Schema schema) + { + // Build metadata + VectorOffset metadataVectorOffset = default; + if (schema.HasMetadata) + { + Offset<Flatbuf.KeyValue>[] metadataOffsets = GetMetadataOffsets(schema.Metadata); + metadataVectorOffset = Flatbuf.Schema.CreateCustomMetadataVector(Builder, metadataOffsets); + } + + // Build fields + var fieldOffsets = new Offset<Flatbuf.Field>[schema.Fields.Count]; + for (int i = 0; i < fieldOffsets.Length; i++) + { + Field field = schema.GetFieldByIndex(i); + StringOffset fieldNameOffset = Builder.CreateString(field.Name); + ArrowTypeFlatbufferBuilder.FieldType fieldType = _fieldTypeBuilder.BuildFieldType(field); + + VectorOffset fieldChildrenVectorOffset = GetChildrenFieldOffset(field); + VectorOffset fieldMetadataVectorOffset = GetFieldMetadataOffset(field); + Offset<Flatbuf.DictionaryEncoding> dictionaryOffset = GetDictionaryOffset(field); + + fieldOffsets[i] = Flatbuf.Field.CreateField(Builder, + fieldNameOffset, field.IsNullable, fieldType.Type, fieldType.Offset, + dictionaryOffset, fieldChildrenVectorOffset, fieldMetadataVectorOffset); + } + + VectorOffset fieldsVectorOffset = Flatbuf.Schema.CreateFieldsVector(Builder, fieldOffsets); + + // Build schema + + Flatbuf.Endianness endianness = BitConverter.IsLittleEndian ? Flatbuf.Endianness.Little : Flatbuf.Endianness.Big; + + return Flatbuf.Schema.CreateSchema( + Builder, endianness, fieldsVectorOffset, metadataVectorOffset); + } + + private VectorOffset GetChildrenFieldOffset(Field field) + { + IArrowType targetDataType = field.DataType is DictionaryType dictionaryType ? + dictionaryType.ValueType : + field.DataType; + + if (!(targetDataType is NestedType type)) + { + return default; + } + + int childrenCount = type.Fields.Count; + var children = new Offset<Flatbuf.Field>[childrenCount]; + + for (int i = 0; i < childrenCount; i++) + { + Field childField = type.Fields[i]; + StringOffset childFieldNameOffset = Builder.CreateString(childField.Name); + ArrowTypeFlatbufferBuilder.FieldType childFieldType = _fieldTypeBuilder.BuildFieldType(childField); + + VectorOffset childFieldChildrenVectorOffset = GetChildrenFieldOffset(childField); + VectorOffset childFieldMetadataVectorOffset = GetFieldMetadataOffset(childField); + Offset<Flatbuf.DictionaryEncoding> dictionaryOffset = GetDictionaryOffset(childField); + + children[i] = Flatbuf.Field.CreateField(Builder, + childFieldNameOffset, childField.IsNullable, childFieldType.Type, childFieldType.Offset, + dictionaryOffset, childFieldChildrenVectorOffset, childFieldMetadataVectorOffset); + } + + return Builder.CreateVectorOfTables(children); + } + + private VectorOffset GetFieldMetadataOffset(Field field) + { + if (!field.HasMetadata) + { + return default; + } + + Offset<Flatbuf.KeyValue>[] metadataOffsets = GetMetadataOffsets(field.Metadata); + return Flatbuf.Field.CreateCustomMetadataVector(Builder, metadataOffsets); + } + + private Offset<Flatbuf.DictionaryEncoding> GetDictionaryOffset(Field field) + { + if (field.DataType.TypeId != ArrowTypeId.Dictionary) + { + return default; + } + + long id = DictionaryMemo.GetOrAssignId(field); + var dicType = field.DataType as DictionaryType; + var indexType = dicType.IndexType as NumberType; + + Offset<Flatbuf.Int> indexOffset = Flatbuf.Int.CreateInt(Builder, indexType.BitWidth, indexType.IsSigned); + return Flatbuf.DictionaryEncoding.CreateDictionaryEncoding(Builder, id, indexOffset, dicType.Ordered); + } + + private Offset<Flatbuf.KeyValue>[] GetMetadataOffsets(IReadOnlyDictionary<string, string> metadata) + { + Debug.Assert(metadata != null); + Debug.Assert(metadata.Count > 0); + + Offset<Flatbuf.KeyValue>[] metadataOffsets = new Offset<Flatbuf.KeyValue>[metadata.Count]; + int index = 0; + foreach (KeyValuePair<string, string> metadatum in metadata) + { + StringOffset keyOffset = Builder.CreateString(metadatum.Key); + StringOffset valueOffset = Builder.CreateString(metadatum.Value); + + metadataOffsets[index++] = Flatbuf.KeyValue.CreateKeyValue(Builder, keyOffset, valueOffset); + } + + return metadataOffsets; + } + + private Offset<Flatbuf.Schema> WriteSchema(Schema schema) + { + Builder.Clear(); + + // Build schema + + Offset<Flatbuf.Schema> schemaOffset = SerializeSchema(schema); + + // Build message + + WriteMessage(Flatbuf.MessageHeader.Schema, schemaOffset, 0); + + return schemaOffset; + } + + private async ValueTask<Offset<Flatbuf.Schema>> WriteSchemaAsync(Schema schema, CancellationToken cancellationToken) + { + Builder.Clear(); + + // Build schema + + Offset<Flatbuf.Schema> schemaOffset = SerializeSchema(schema); + + // Build message + + await WriteMessageAsync(Flatbuf.MessageHeader.Schema, schemaOffset, 0, cancellationToken) + .ConfigureAwait(false); + + return schemaOffset; + } + + /// <summary> + /// Writes the message to the <see cref="BaseStream"/>. + /// </summary> + /// <returns> + /// The number of bytes written to the stream. + /// </returns> + private protected long WriteMessage<T>( + Flatbuf.MessageHeader headerType, Offset<T> headerOffset, int bodyLength) + where T : struct + { + Offset<Flatbuf.Message> messageOffset = Flatbuf.Message.CreateMessage( + Builder, CurrentMetadataVersion, headerType, headerOffset.Value, + bodyLength); + + Builder.Finish(messageOffset.Value); + + ReadOnlyMemory<byte> messageData = Builder.DataBuffer.ToReadOnlyMemory(Builder.DataBuffer.Position, Builder.Offset); + int messagePaddingLength = CalculatePadding(_options.SizeOfIpcLength + messageData.Length); + + WriteIpcMessageLength(messageData.Length + messagePaddingLength); + + BaseStream.Write(messageData); + WritePadding(messagePaddingLength); + + checked + { + return _options.SizeOfIpcLength + messageData.Length + messagePaddingLength; + } + } + + /// <summary> + /// Writes the message to the <see cref="BaseStream"/>. + /// </summary> + /// <returns> + /// The number of bytes written to the stream. + /// </returns> + private protected virtual async ValueTask<long> WriteMessageAsync<T>( + Flatbuf.MessageHeader headerType, Offset<T> headerOffset, int bodyLength, + CancellationToken cancellationToken) + where T : struct + { + Offset<Flatbuf.Message> messageOffset = Flatbuf.Message.CreateMessage( + Builder, CurrentMetadataVersion, headerType, headerOffset.Value, + bodyLength); + + Builder.Finish(messageOffset.Value); + + ReadOnlyMemory<byte> messageData = Builder.DataBuffer.ToReadOnlyMemory(Builder.DataBuffer.Position, Builder.Offset); + int messagePaddingLength = CalculatePadding(_options.SizeOfIpcLength + messageData.Length); + + await WriteIpcMessageLengthAsync(messageData.Length + messagePaddingLength, cancellationToken) + .ConfigureAwait(false); + + await BaseStream.WriteAsync(messageData, cancellationToken).ConfigureAwait(false); + await WritePaddingAsync(messagePaddingLength).ConfigureAwait(false); + + checked + { + return _options.SizeOfIpcLength + messageData.Length + messagePaddingLength; + } + } + + private protected void WriteFlatBuffer() + { + ReadOnlyMemory<byte> segment = Builder.DataBuffer.ToReadOnlyMemory(Builder.DataBuffer.Position, Builder.Offset); + + BaseStream.Write(segment); + } + + private protected async ValueTask WriteFlatBufferAsync(CancellationToken cancellationToken = default) + { + ReadOnlyMemory<byte> segment = Builder.DataBuffer.ToReadOnlyMemory(Builder.DataBuffer.Position, Builder.Offset); + + await BaseStream.WriteAsync(segment, cancellationToken).ConfigureAwait(false); + } + + private void WriteIpcMessageLength(int length) + { + Buffers.RentReturn(_options.SizeOfIpcLength, (buffer) => + { + Memory<byte> currentBufferPosition = buffer; + if (!_options.WriteLegacyIpcFormat) + { + BinaryPrimitives.WriteInt32LittleEndian( + currentBufferPosition.Span, MessageSerializer.IpcContinuationToken); + currentBufferPosition = currentBufferPosition.Slice(sizeof(int)); + } + + BinaryPrimitives.WriteInt32LittleEndian(currentBufferPosition.Span, length); + BaseStream.Write(buffer); + }); + } + + private async ValueTask WriteIpcMessageLengthAsync(int length, CancellationToken cancellationToken) + { + await Buffers.RentReturnAsync(_options.SizeOfIpcLength, async (buffer) => + { + Memory<byte> currentBufferPosition = buffer; + if (!_options.WriteLegacyIpcFormat) + { + BinaryPrimitives.WriteInt32LittleEndian( + currentBufferPosition.Span, MessageSerializer.IpcContinuationToken); + currentBufferPosition = currentBufferPosition.Slice(sizeof(int)); + } + + BinaryPrimitives.WriteInt32LittleEndian(currentBufferPosition.Span, length); + await BaseStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + }).ConfigureAwait(false); + } + + protected int CalculatePadding(long offset, int alignment = 8) + { + long result = BitUtility.RoundUpToMultiplePowerOfTwo(offset, alignment) - offset; + checked + { + return (int)result; + } + } + + private protected void WritePadding(int length) + { + if (length > 0) + { + BaseStream.Write(s_padding.AsMemory(0, Math.Min(s_padding.Length, length))); + } + } + + private protected ValueTask WritePaddingAsync(int length) + { + if (length > 0) + { + return BaseStream.WriteAsync(s_padding.AsMemory(0, Math.Min(s_padding.Length, length))); + } + + return default; + } + + public virtual void Dispose() + { + if (!_leaveOpen) + { + BaseStream.Dispose(); + } + } + } + + internal static class DictionaryCollector + { + internal static void Collect(RecordBatch recordBatch, ref DictionaryMemo dictionaryMemo) + { + Schema schema = recordBatch.Schema; + for (int i = 0; i < schema.Fields.Count; i++) + { + Field field = schema.GetFieldByIndex(i); + IArrowArray array = recordBatch.Column(i); + + CollectDictionary(field, array.Data, ref dictionaryMemo); + } + } + + private static void CollectDictionary(Field field, ArrayData arrayData, ref DictionaryMemo dictionaryMemo) + { + if (field.DataType is DictionaryType dictionaryType) + { + if (arrayData.Dictionary == null) + { + throw new ArgumentException($"{nameof(arrayData.Dictionary)} must not be null"); + } + arrayData.Dictionary.EnsureDataType(dictionaryType.ValueType.TypeId); + + IArrowArray dictionary = ArrowArrayFactory.BuildArray(arrayData.Dictionary); + + dictionaryMemo ??= new DictionaryMemo(); + long id = dictionaryMemo.GetOrAssignId(field); + + dictionaryMemo.AddOrReplaceDictionary(id, dictionary); + WalkChildren(dictionary.Data, ref dictionaryMemo); + } + else + { + WalkChildren(arrayData, ref dictionaryMemo); + } + } + + private static void WalkChildren(ArrayData arrayData, ref DictionaryMemo dictionaryMemo) + { + ArrayData[] children = arrayData.Children; + + if (children == null) + { + return; + } + + if (arrayData.DataType is NestedType nestedType) + { + for (int i = 0; i < nestedType.Fields.Count; i++) + { + Field childField = nestedType.Fields[i]; + ArrayData child = children[i]; + + CollectDictionary(childField, child, ref dictionaryMemo); + } + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs new file mode 100644 index 000000000..ee119ae5d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -0,0 +1,266 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Flatbuf; +using Apache.Arrow.Types; +using FlatBuffers; +using DateUnit = Apache.Arrow.Flatbuf.DateUnit; +using TimeUnit = Apache.Arrow.Types.TimeUnit; + +namespace Apache.Arrow.Ipc +{ + internal class ArrowTypeFlatbufferBuilder + { + public struct FieldType + { + public readonly Flatbuf.Type Type; + public readonly int Offset; + + public static FieldType Build<T>(Flatbuf.Type type, Offset<T> offset) + where T : struct => + new FieldType(type, offset.Value); + + public FieldType(Flatbuf.Type type, int offset) + { + Type = type; + Offset = offset; + } + } + + class TypeVisitor : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<Int8Type>, + IArrowTypeVisitor<Int16Type>, + IArrowTypeVisitor<Int32Type>, + IArrowTypeVisitor<Int64Type>, + IArrowTypeVisitor<UInt8Type>, + IArrowTypeVisitor<UInt16Type>, + IArrowTypeVisitor<UInt32Type>, + IArrowTypeVisitor<UInt64Type>, + IArrowTypeVisitor<FloatType>, + IArrowTypeVisitor<DoubleType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<Date32Type>, + IArrowTypeVisitor<Date64Type>, + IArrowTypeVisitor<Time32Type>, + IArrowTypeVisitor<Time64Type>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<TimestampType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<UnionType>, + IArrowTypeVisitor<StructType>, + IArrowTypeVisitor<Decimal128Type>, + IArrowTypeVisitor<Decimal256Type>, + IArrowTypeVisitor<DictionaryType>, + IArrowTypeVisitor<FixedSizeBinaryType> + { + private FlatBufferBuilder Builder { get; } + + public FieldType Result { get; private set; } + + public TypeVisitor(FlatBufferBuilder builder) + { + Builder = builder; + } + + public void Visit(Int8Type type) => CreateIntType(type); + public void Visit(Int16Type type) => CreateIntType(type); + public void Visit(Int32Type type) => CreateIntType(type); + public void Visit(Int64Type type) => CreateIntType(type); + public void Visit(UInt8Type type) => CreateIntType(type); + public void Visit(UInt16Type type) => CreateIntType(type); + public void Visit(UInt32Type type) => CreateIntType(type); + public void Visit(UInt64Type type) => CreateIntType(type); + + public void Visit(BooleanType type) + { + Flatbuf.Bool.StartBool(Builder); + Result = FieldType.Build( + Flatbuf.Type.Bool, + Flatbuf.Bool.EndBool(Builder)); + } + + public void Visit(BinaryType type) + { + Flatbuf.Binary.StartBinary(Builder); + Result = FieldType.Build( + Flatbuf.Type.Binary, + Flatbuf.Binary.EndBinary(Builder)); + } + + public void Visit(ListType type) + { + Flatbuf.List.StartList(Builder); + Result = FieldType.Build( + Flatbuf.Type.List, + Flatbuf.List.EndList(Builder)); + } + + public void Visit(UnionType type) + { + throw new NotImplementedException(); + } + + public void Visit(StringType type) + { + Flatbuf.Utf8.StartUtf8(Builder); + Offset<Utf8> offset = Flatbuf.Utf8.EndUtf8(Builder); + Result = FieldType.Build( + Flatbuf.Type.Utf8, offset); + } + + public void Visit(TimestampType type) + { + StringOffset timezoneStringOffset = default; + + if (!string.IsNullOrWhiteSpace(type.Timezone)) + timezoneStringOffset = Builder.CreateString(type.Timezone); + + Result = FieldType.Build( + Flatbuf.Type.Timestamp, + Flatbuf.Timestamp.CreateTimestamp(Builder, ToFlatBuffer(type.Unit), timezoneStringOffset)); + } + + public void Visit(Date32Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Date, + Flatbuf.Date.CreateDate(Builder, DateUnit.DAY)); + } + + public void Visit(Date64Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Date, + Flatbuf.Date.CreateDate(Builder)); + } + + public void Visit(Time32Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Time, + Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit))); + } + + public void Visit(FloatType type) + { + Result = FieldType.Build( + Flatbuf.Type.FloatingPoint, + Flatbuf.FloatingPoint.CreateFloatingPoint(Builder, Precision.SINGLE)); + } + + public void Visit(DoubleType type) + { + Result = FieldType.Build( + Flatbuf.Type.FloatingPoint, + Flatbuf.FloatingPoint.CreateFloatingPoint(Builder, Precision.DOUBLE)); + } + + public void Visit(Time64Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Time, + Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit), 64)); + } + + public void Visit(StructType type) + { + Flatbuf.Struct_.StartStruct_(Builder); + Result = FieldType.Build(Flatbuf.Type.Struct_, Flatbuf.Struct_.EndStruct_(Builder)); + } + + public void Visit(Decimal128Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Decimal, + Flatbuf.Decimal.CreateDecimal(Builder, type.Precision, type.Scale, type.BitWidth)); + } + + public void Visit(Decimal256Type type) + { + Result = FieldType.Build( + Flatbuf.Type.Decimal, + Flatbuf.Decimal.CreateDecimal(Builder, type.Precision, type.Scale, type.BitWidth)); + } + + private void CreateIntType(NumberType type) + { + Result = FieldType.Build( + Flatbuf.Type.Int, + Flatbuf.Int.CreateInt(Builder, type.BitWidth, type.IsSigned)); + } + + public void Visit(DictionaryType type) + { + // In this library, the dictionary "type" is a logical construct. Here we + // pass through to the value type, as we've already captured the index + // type in the DictionaryEncoding metadata in the parent field + type.ValueType.Accept(this); + } + + public void Visit(FixedSizeBinaryType type) + { + Result = FieldType.Build( + Flatbuf.Type.FixedSizeBinary, + Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth)); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException(); + } + } + + private readonly TypeVisitor _visitor; + + public ArrowTypeFlatbufferBuilder(FlatBufferBuilder builder) + { + _visitor = new TypeVisitor(builder); + } + + public FieldType BuildFieldType(Field field) + { + field.DataType.Accept(_visitor); + return _visitor.Result; + } + + private static Flatbuf.TimeUnit ToFlatBuffer(TimeUnit unit) + { + Flatbuf.TimeUnit result; + + switch (unit) + { + case TimeUnit.Microsecond: + result = Flatbuf.TimeUnit.MICROSECOND; + break; + case TimeUnit.Millisecond: + result = Flatbuf.TimeUnit.MILLISECOND; + break; + case TimeUnit.Nanosecond: + result = Flatbuf.TimeUnit.NANOSECOND; + break; + case TimeUnit.Second: + result = Flatbuf.TimeUnit.SECOND; + break; + default: + throw new ArgumentException(nameof(unit), + $"unsupported timestamp unit <{unit}>"); + } + + return result; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/Block.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/Block.cs new file mode 100644 index 000000000..4aaa3b4d3 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/Block.cs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Ipc +{ + internal readonly struct Block + { + public readonly long Offset; + public readonly long BodyLength; + public readonly int MetadataLength; + + public Block(long offset, long length, int metadataLength) + { + Offset = offset; + BodyLength = length; + MetadataLength = metadataLength; + } + + public Block(Flatbuf.Block block) + { + Offset = block.Offset; + BodyLength = block.BodyLength; + MetadataLength = block.MetaDataLength; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs new file mode 100644 index 000000000..24f25a142 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Ipc +{ + class DictionaryMemo + { + private readonly Dictionary<long, IArrowArray> _idToDictionary; + private readonly Dictionary<long, IArrowType> _idToValueType; + private readonly Dictionary<Field, long> _fieldToId; + + public DictionaryMemo() + { + _idToDictionary = new Dictionary<long, IArrowArray>(); + _idToValueType = new Dictionary<long, IArrowType>(); + _fieldToId = new Dictionary<Field, long>(); + } + + public IArrowType GetDictionaryType(long id) + { + if (!_idToValueType.TryGetValue(id, out IArrowType type)) + { + throw new ArgumentException($"Dictionary with id {id} not found"); + } + return type; + } + + public IArrowArray GetDictionary(long id) + { + if (!_idToDictionary.TryGetValue(id, out IArrowArray dictionary)) + { + throw new ArgumentException($"Dictionary with id {id} not found"); + } + return dictionary; + } + + public void AddField(long id, Field field) + { + if (_fieldToId.ContainsKey(field)) + { + throw new ArgumentException($"Field {field.Name} is already in Memo"); + } + + if (field.DataType.TypeId != ArrowTypeId.Dictionary) + { + throw new ArgumentException($"Field type is not DictionaryType: Name={field.Name}, {field.DataType.Name}"); + } + + IArrowType valueType = ((DictionaryType)field.DataType).ValueType; + + if (_idToValueType.TryGetValue(id, out IArrowType valueTypeInDic)) + { + if (valueType != valueTypeInDic) + { + throw new ArgumentException($"Field type {field.DataType.Name} does not match the existing type {valueTypeInDic})"); + } + } + + _fieldToId.Add(field, id); + _idToValueType.Add(id, valueType); + } + + public long GetId(Field field) + { + if (!_fieldToId.TryGetValue(field, out long id)) + { + throw new ArgumentException($"Field with name {field.Name} not found"); + } + return id; + } + + public long GetOrAssignId(Field field) + { + if (!_fieldToId.TryGetValue(field, out long id)) + { + id = _fieldToId.Count + 1; + AddField(id, field); + } + return id; + } + + public void AddOrReplaceDictionary(long id, IArrowArray dictionary) + { + _idToDictionary[id] = dictionary; + } + + public void AddDeltaDictionary(long id, IArrowArray deltaDictionary, MemoryAllocator allocator = default) + { + IArrowArray currentDictionary = _idToDictionary[id]; + IArrowArray dictionary = ArrowArrayConcatenator.Concatenate(new List<IArrowArray>{ currentDictionary, deltaDictionary }, allocator); + AddOrReplaceDictionary(id, dictionary); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs new file mode 100644 index 000000000..255ea4f53 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Threading; +using System.Threading.Tasks; + +namespace Apache.Arrow.Ipc +{ + public interface IArrowReader + { + ValueTask<RecordBatch> ReadNextRecordBatchAsync( + CancellationToken cancellationToken = default); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs new file mode 100644 index 000000000..b6cc3a1cb --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Ipc +{ + public class IpcOptions + { + internal static IpcOptions Default { get; } = new IpcOptions(); + + /// <summary> + /// Write the pre-0.15.0 encapsulated IPC message format + /// consisting of a 4-byte prefix instead of 8 byte. + /// </summary> + public bool WriteLegacyIpcFormat { get; set; } + + public IpcOptions() + { + } + + /// <summary> + /// Gets the number of bytes used in the IPC message prefix. + /// </summary> + internal int SizeOfIpcLength => WriteLegacyIpcFormat ? 4 : 8; + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs new file mode 100644 index 000000000..a09fff61b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -0,0 +1,201 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Ipc +{ + internal class MessageSerializer + { + public const int IpcContinuationToken = -1; + + public static Types.NumberType GetNumberType(int bitWidth, bool signed) + { + if (signed) + { + if (bitWidth == 8) + return Types.Int8Type.Default; + if (bitWidth == 16) + return Types.Int16Type.Default; + if (bitWidth == 32) + return Types.Int32Type.Default; + if (bitWidth == 64) + return Types.Int64Type.Default; + } + else + { + if (bitWidth == 8) + return Types.UInt8Type.Default; + if (bitWidth == 16) + return Types.UInt16Type.Default; + if (bitWidth == 32) + return Types.UInt32Type.Default; + if (bitWidth == 64) + return Types.UInt64Type.Default; + } + throw new Exception($"Unexpected bit width of {bitWidth} for " + + $"{(signed ? "signed " : "unsigned")} integer."); + } + + internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dictionaryMemo) + { + List<Field> fields = new List<Field>(); + for (int i = 0; i < schema.FieldsLength; i++) + { + Flatbuf.Field field = schema.Fields(i).GetValueOrDefault(); + fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo)); + } + + Dictionary<string, string> metadata = schema.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null; + for (int i = 0; i < schema.CustomMetadataLength; i++) + { + Flatbuf.KeyValue keyValue = schema.CustomMetadata(i).GetValueOrDefault(); + + metadata[keyValue.Key] = keyValue.Value; + } + + return new Schema(fields, metadata, copyCollections: false); + } + + private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo) + { + Field[] childFields = flatbufField.ChildrenLength > 0 ? new Field[flatbufField.ChildrenLength] : null; + for (int i = 0; i < flatbufField.ChildrenLength; i++) + { + Flatbuf.Field? childFlatbufField = flatbufField.Children(i); + childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo); + } + + Flatbuf.DictionaryEncoding? dictionaryEncoding = flatbufField.Dictionary; + IArrowType type = GetFieldArrowType(flatbufField, childFields); + + if (dictionaryEncoding.HasValue) + { + Flatbuf.Int? indexTypeAsInt = dictionaryEncoding.Value.IndexType; + IArrowType indexType = indexTypeAsInt.HasValue ? + GetNumberType(indexTypeAsInt.Value.BitWidth, indexTypeAsInt.Value.IsSigned) : + GetNumberType(Int32Type.Default.BitWidth, Int32Type.Default.IsSigned); + + type = new DictionaryType(indexType, type, dictionaryEncoding.Value.IsOrdered); + } + + Dictionary<string, string> metadata = flatbufField.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null; + for (int i = 0; i < flatbufField.CustomMetadataLength; i++) + { + Flatbuf.KeyValue keyValue = flatbufField.CustomMetadata(i).GetValueOrDefault(); + + metadata[keyValue.Key] = keyValue.Value; + } + + var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false); + + if (dictionaryEncoding.HasValue) + { + dictionaryMemo ??= new DictionaryMemo(); + dictionaryMemo.AddField(dictionaryEncoding.Value.Id, arrowField); + } + + return arrowField; + } + + private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] childFields = null) + { + switch (field.TypeType) + { + case Flatbuf.Type.Int: + Flatbuf.Int intMetaData = field.Type<Flatbuf.Int>().Value; + return MessageSerializer.GetNumberType(intMetaData.BitWidth, intMetaData.IsSigned); + case Flatbuf.Type.FloatingPoint: + Flatbuf.FloatingPoint floatingPointTypeMetadata = field.Type<Flatbuf.FloatingPoint>().Value; + switch (floatingPointTypeMetadata.Precision) + { + case Flatbuf.Precision.SINGLE: + return Types.FloatType.Default; + case Flatbuf.Precision.DOUBLE: + return Types.DoubleType.Default; + case Flatbuf.Precision.HALF: + return Types.HalfFloatType.Default; + default: + throw new InvalidDataException("Unsupported floating point precision"); + } + case Flatbuf.Type.Bool: + return new Types.BooleanType(); + case Flatbuf.Type.Decimal: + Flatbuf.Decimal decMeta = field.Type<Flatbuf.Decimal>().Value; + switch (decMeta.BitWidth) + { + case 128: + return new Types.Decimal128Type(decMeta.Precision, decMeta.Scale); + case 256: + return new Types.Decimal256Type(decMeta.Precision, decMeta.Scale); + default: + throw new InvalidDataException("Unsupported decimal bit width " + decMeta.BitWidth); + } + case Flatbuf.Type.Date: + Flatbuf.Date dateMeta = field.Type<Flatbuf.Date>().Value; + switch (dateMeta.Unit) + { + case Flatbuf.DateUnit.DAY: + return Types.Date32Type.Default; + case Flatbuf.DateUnit.MILLISECOND: + return Types.Date64Type.Default; + default: + throw new InvalidDataException("Unsupported date unit"); + } + case Flatbuf.Type.Time: + Flatbuf.Time timeMeta = field.Type<Flatbuf.Time>().Value; + switch (timeMeta.BitWidth) + { + case 32: + return new Types.Time32Type(timeMeta.Unit.ToArrow()); + case 64: + return new Types.Time64Type(timeMeta.Unit.ToArrow()); + default: + throw new InvalidDataException("Unsupported time bit width"); + } + case Flatbuf.Type.Timestamp: + Flatbuf.Timestamp timestampTypeMetadata = field.Type<Flatbuf.Timestamp>().Value; + Types.TimeUnit unit = timestampTypeMetadata.Unit.ToArrow(); + string timezone = timestampTypeMetadata.Timezone; + return new Types.TimestampType(unit, timezone); + case Flatbuf.Type.Interval: + Flatbuf.Interval intervalMetadata = field.Type<Flatbuf.Interval>().Value; + return new Types.IntervalType(intervalMetadata.Unit.ToArrow()); + case Flatbuf.Type.Utf8: + return new Types.StringType(); + case Flatbuf.Type.FixedSizeBinary: + Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type<Flatbuf.FixedSizeBinary>().Value; + return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth); + case Flatbuf.Type.Binary: + return Types.BinaryType.Default; + case Flatbuf.Type.List: + if (childFields == null || childFields.Length != 1) + { + throw new InvalidDataException($"List type must have exactly one child."); + } + return new Types.ListType(childFields[0]); + case Flatbuf.Type.Struct_: + Debug.Assert(childFields != null); + return new Types.StructType(childFields); + default: + throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Ipc/ReadOnlyMemoryBufferAllocator.cs b/src/arrow/csharp/src/Apache.Arrow/Ipc/ReadOnlyMemoryBufferAllocator.cs new file mode 100644 index 000000000..7e78fcf9e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Ipc/ReadOnlyMemoryBufferAllocator.cs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using FlatBuffers; +using System; + +namespace Apache.Arrow.Ipc +{ + internal sealed class ReadOnlyMemoryBufferAllocator : ByteBufferAllocator + { + private readonly ReadOnlyMemory<byte> _buffer; + + public ReadOnlyMemoryBufferAllocator(ReadOnlyMemory<byte> buffer) + { + _buffer = buffer; + Length = buffer.Length; + } + + public override ReadOnlySpan<byte> ReadOnlySpan => _buffer.Span; + public override ReadOnlyMemory<byte> ReadOnlyMemory => _buffer; + + // since this is read-only, the following are not supported + public override Memory<byte> Memory => throw new NotSupportedException(); + public override Span<byte> Span => throw new NotSupportedException(); + public override void GrowFront(int newSize) => throw new NotSupportedException(); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Memory/MemoryAllocator.cs b/src/arrow/csharp/src/Apache.Arrow/Memory/MemoryAllocator.cs new file mode 100644 index 000000000..58bd110ae --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Memory/MemoryAllocator.cs @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Threading; + +namespace Apache.Arrow.Memory +{ + public abstract class MemoryAllocator + { + public const int DefaultAlignment = 64; + + private static IMemoryOwner<byte> NullMemoryOwner { get; } = new NullMemoryOwner(); + + public static Lazy<MemoryAllocator> Default { get; } = new Lazy<MemoryAllocator>(BuildDefault, true); + + public class Stats + { + private long _bytesAllocated; + private long _allocations; + + public long Allocations => Interlocked.Read(ref _allocations); + public long BytesAllocated => Interlocked.Read(ref _bytesAllocated); + + internal void Allocate(int n) + { + Interlocked.Increment(ref _allocations); + Interlocked.Add(ref _bytesAllocated, n); + } + } + + public Stats Statistics { get; } + + protected int Alignment { get; } + + protected MemoryAllocator(int alignment = DefaultAlignment) + { + Statistics = new Stats(); + Alignment = alignment; + } + + public IMemoryOwner<byte> Allocate(int length) + { + if (length < 0) + { + throw new ArgumentOutOfRangeException(nameof(length)); + } + + if (length == 0) + { + return NullMemoryOwner; + } + + IMemoryOwner<byte> memoryOwner = AllocateInternal(length, out int bytesAllocated); + + Statistics.Allocate(bytesAllocated); + + return memoryOwner; + } + + private static MemoryAllocator BuildDefault() + { + return new NativeMemoryAllocator(DefaultAlignment); + } + + protected abstract IMemoryOwner<byte> AllocateInternal(int length, out int bytesAllocated); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs b/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs new file mode 100644 index 000000000..69a046747 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryAllocator.cs @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Runtime.InteropServices; + +namespace Apache.Arrow.Memory +{ + public class NativeMemoryAllocator : MemoryAllocator + { + public NativeMemoryAllocator(int alignment = DefaultAlignment) + : base(alignment) { } + + protected override IMemoryOwner<byte> AllocateInternal(int length, out int bytesAllocated) + { + // TODO: Ensure memory is released if exception occurs. + + // TODO: Optimize storage overhead; native memory manager stores a pointer + // to allocated memory, offset, and the allocation size. + + // TODO: Should the allocation be moved to NativeMemory? + + int size = length + Alignment; + IntPtr ptr = Marshal.AllocHGlobal(size); + int offset = (int)(Alignment - (ptr.ToInt64() & (Alignment - 1))); + var manager = new NativeMemoryManager(ptr, offset, length); + + bytesAllocated = (length + Alignment); + + GC.AddMemoryPressure(bytesAllocated); + + // Ensure all allocated memory is zeroed. + manager.Memory.Span.Fill(0); + + return manager; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs new file mode 100644 index 000000000..00eb7dc16 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Threading; + +namespace Apache.Arrow.Memory +{ + public class NativeMemoryManager: MemoryManager<byte> + { + private IntPtr _ptr; + private readonly int _offset; + private readonly int _length; + + public NativeMemoryManager(IntPtr ptr, int offset, int length) + { + _ptr = ptr; + _offset = offset; + _length = length; + } + + ~NativeMemoryManager() + { + Dispose(false); + } + + public override unsafe Span<byte> GetSpan() + { + void* ptr = CalculatePointer(0); + return new Span<byte>(ptr, _length); + } + + public override unsafe MemoryHandle Pin(int elementIndex = 0) + { + // NOTE: Unmanaged memory doesn't require GC pinning because by definition it's not + // managed by the garbage collector. + + void* ptr = CalculatePointer(elementIndex); + return new MemoryHandle(ptr, default, this); + } + + public override void Unpin() + { + // SEE: Pin implementation + return; + } + + protected override void Dispose(bool disposing) + { + // Only free once. + + lock (this) + { + if (_ptr != IntPtr.Zero) + { + Marshal.FreeHGlobal(_ptr); + Interlocked.Exchange(ref _ptr, IntPtr.Zero); + GC.RemoveMemoryPressure(_length); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe void* CalculatePointer(int index) => + (_ptr + _offset + index).ToPointer(); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Memory/NullMemoryOwner.cs b/src/arrow/csharp/src/Apache.Arrow/Memory/NullMemoryOwner.cs new file mode 100644 index 000000000..7415ffc32 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Memory/NullMemoryOwner.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; + +namespace Apache.Arrow.Memory +{ + internal class NullMemoryOwner : IMemoryOwner<byte> + { + public Memory<byte> Memory => Memory<byte>.Empty; + + public void Dispose() + { + } + } +}
\ No newline at end of file diff --git a/src/arrow/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs b/src/arrow/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs new file mode 100644 index 000000000..415bf89af --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Apache.Arrow.Flight, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] diff --git a/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.Designer.cs b/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.Designer.cs new file mode 100644 index 000000000..b92b34e66 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.Designer.cs @@ -0,0 +1,73 @@ +//------------------------------------------------------------------------------ +// <auto-generated> +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// </auto-generated> +//------------------------------------------------------------------------------ + +namespace Apache.Arrow.Properties { + using System; + using System.Reflection; + + + /// <summary> + /// A strongly-typed resource class, for looking up localized strings, etc. + /// </summary> + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "15.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Resources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Resources() { + } + + /// <summary> + /// Returns the cached ResourceManager instance used by this class. + /// </summary> + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Apache.Arrow.Properties.Resources", typeof(Resources).GetTypeInfo().Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// <summary> + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// </summary> + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + + /// <summary> + /// Looks up a localized string similar to Array has invalid data type.. + /// </summary> + internal static string ExceptionArrayDataInvalidType { + get { + return ResourceManager.GetString("ExceptionArrayDataInvalidType", resourceCulture); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.resx b/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.resx new file mode 100644 index 000000000..271675a63 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Properties/Resources.resx @@ -0,0 +1,123 @@ +<?xml version="1.0" encoding="utf-8"?> +<root> + <!-- + Microsoft ResX Schema + + Version 2.0 + + The primary goals of this format is to allow a simple XML format + that is mostly human readable. The generation and parsing of the + various data types are done through the TypeConverter classes + associated with the data types. + + Example: + + ... ado.net/XML headers & schema ... + <resheader name="resmimetype">text/microsoft-resx</resheader> + <resheader name="version">2.0</resheader> + <resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader> + <resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader> + <data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data> + <data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data> + <data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64"> + <value>[base64 mime encoded serialized .NET Framework object]</value> + </data> + <data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64"> + <value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value> + <comment>This is a comment</comment> + </data> + + There are any number of "resheader" rows that contain simple + name/value pairs. + + Each data row contains a name, and value. The row also contains a + type or mimetype. Type corresponds to a .NET class that support + text/value conversion through the TypeConverter architecture. + Classes that don't support this are serialized and stored with the + mimetype set. + + The mimetype is used for serialized objects, and tells the + ResXResourceReader how to depersist the object. This is currently not + extensible. For a given mimetype the value must be set accordingly: + + Note - application/x-microsoft.net.object.binary.base64 is the format + that the ResXResourceWriter will generate, however the reader can + read any of the formats listed below. + + mimetype: application/x-microsoft.net.object.binary.base64 + value : The object must be serialized with + : System.Runtime.Serialization.Formatters.Binary.BinaryFormatter + : and then encoded with base64 encoding. + + mimetype: application/x-microsoft.net.object.soap.base64 + value : The object must be serialized with + : System.Runtime.Serialization.Formatters.Soap.SoapFormatter + : and then encoded with base64 encoding. + + mimetype: application/x-microsoft.net.object.bytearray.base64 + value : The object must be serialized into a byte array + : using a System.ComponentModel.TypeConverter + : and then encoded with base64 encoding. + --> + <xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata"> + <xsd:import namespace="http://www.w3.org/XML/1998/namespace" /> + <xsd:element name="root" msdata:IsDataSet="true"> + <xsd:complexType> + <xsd:choice maxOccurs="unbounded"> + <xsd:element name="metadata"> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="value" type="xsd:string" minOccurs="0" /> + </xsd:sequence> + <xsd:attribute name="name" use="required" type="xsd:string" /> + <xsd:attribute name="type" type="xsd:string" /> + <xsd:attribute name="mimetype" type="xsd:string" /> + <xsd:attribute ref="xml:space" /> + </xsd:complexType> + </xsd:element> + <xsd:element name="assembly"> + <xsd:complexType> + <xsd:attribute name="alias" type="xsd:string" /> + <xsd:attribute name="name" type="xsd:string" /> + </xsd:complexType> + </xsd:element> + <xsd:element name="data"> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" /> + <xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" /> + </xsd:sequence> + <xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" /> + <xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" /> + <xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" /> + <xsd:attribute ref="xml:space" /> + </xsd:complexType> + </xsd:element> + <xsd:element name="resheader"> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" /> + </xsd:sequence> + <xsd:attribute name="name" type="xsd:string" use="required" /> + </xsd:complexType> + </xsd:element> + </xsd:choice> + </xsd:complexType> + </xsd:element> + </xsd:schema> + <resheader name="resmimetype"> + <value>text/microsoft-resx</value> + </resheader> + <resheader name="version"> + <value>2.0</value> + </resheader> + <resheader name="reader"> + <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> + </resheader> + <resheader name="writer"> + <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> + </resheader> + <data name="ExceptionArrayDataInvalidType" xml:space="preserve"> + <value>Array has invalid data type.</value> + </data> +</root>
\ No newline at end of file diff --git a/src/arrow/csharp/src/Apache.Arrow/RecordBatch.Builder.cs b/src/arrow/csharp/src/Apache.Arrow/RecordBatch.Builder.cs new file mode 100644 index 000000000..c50bf1ace --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/RecordBatch.Builder.cs @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow +{ + public partial class RecordBatch + { + public class ArrayBuilder + { + private readonly MemoryAllocator _allocator; + + internal ArrayBuilder(MemoryAllocator allocator) + { + _allocator = allocator; + } + + public BooleanArray Boolean(Action<BooleanArray.Builder> action) => Build<BooleanArray, BooleanArray.Builder>(new BooleanArray.Builder(), action); + public Int8Array Int8(Action<Int8Array.Builder> action) => Build<Int8Array, Int8Array.Builder>(new Int8Array.Builder(), action); + public Int16Array Int16(Action<Int16Array.Builder> action) => Build<Int16Array, Int16Array.Builder>(new Int16Array.Builder(), action); + public Int32Array Int32(Action<Int32Array.Builder> action) => Build<Int32Array, Int32Array.Builder>(new Int32Array.Builder(), action); + public Int64Array Int64(Action<Int64Array.Builder> action) => Build<Int64Array, Int64Array.Builder>(new Int64Array.Builder(), action); + public UInt8Array UInt8(Action<UInt8Array.Builder> action) => Build<UInt8Array, UInt8Array.Builder>(new UInt8Array.Builder(), action); + public UInt16Array UInt16(Action<UInt16Array.Builder> action) => Build<UInt16Array, UInt16Array.Builder>(new UInt16Array.Builder(), action); + public UInt32Array UInt32(Action<UInt32Array.Builder> action) => Build<UInt32Array, UInt32Array.Builder>(new UInt32Array.Builder(), action); + public UInt64Array UInt64(Action<UInt64Array.Builder> action) => Build<UInt64Array, UInt64Array.Builder>(new UInt64Array.Builder(), action); + public FloatArray Float(Action<FloatArray.Builder> action) => Build<FloatArray, FloatArray.Builder>(new FloatArray.Builder(), action); + public DoubleArray Double(Action<DoubleArray.Builder> action) => Build<DoubleArray, DoubleArray.Builder>(new DoubleArray.Builder(), action); + public Decimal128Array Decimal128(Decimal128Type type, Action<Decimal128Array.Builder> action) => + Build<Decimal128Array, Decimal128Array.Builder>( + new Decimal128Array.Builder(type), action); + public Decimal256Array Decimal256(Decimal256Type type, Action<Decimal256Array.Builder> action) => + Build<Decimal256Array, Decimal256Array.Builder>( + new Decimal256Array.Builder(type), action); + public Date32Array Date32(Action<Date32Array.Builder> action) => Build<Date32Array, Date32Array.Builder>(new Date32Array.Builder(), action); + public Date64Array Date64(Action<Date64Array.Builder> action) => Build<Date64Array, Date64Array.Builder>(new Date64Array.Builder(), action); + public BinaryArray Binary(Action<BinaryArray.Builder> action) => Build<BinaryArray, BinaryArray.Builder>(new BinaryArray.Builder(), action); + public StringArray String(Action<StringArray.Builder> action) => Build<StringArray, StringArray.Builder>(new StringArray.Builder(), action); + public TimestampArray Timestamp(Action<TimestampArray.Builder> action) => Build<TimestampArray, TimestampArray.Builder>(new TimestampArray.Builder(), action); + public TimestampArray Timestamp(TimestampType type, Action<TimestampArray.Builder> action) => + Build<TimestampArray, TimestampArray.Builder>( + new TimestampArray.Builder(type), action); + public TimestampArray Timestamp(TimeUnit unit, TimeZoneInfo timezone, Action<TimestampArray.Builder> action) => + Build<TimestampArray, TimestampArray.Builder>( + new TimestampArray.Builder(new TimestampType(unit, timezone)), action); + + private TArray Build<TArray, TArrayBuilder>(TArrayBuilder builder, Action<TArrayBuilder> action) + where TArray: IArrowArray + where TArrayBuilder: IArrowArrayBuilder<TArray> + { + if (action == null) + { + return default; + } + + action(builder); + + return builder.Build(_allocator); + } + } + + public class Builder + { + private readonly MemoryAllocator _allocator; + private readonly ArrayBuilder _arrayBuilder; + private readonly Schema.Builder _schemaBuilder; + private readonly List<IArrowArray> _arrays; + + public Builder(MemoryAllocator allocator = default) + { + _allocator = allocator ?? MemoryAllocator.Default.Value; + _arrayBuilder = new ArrayBuilder(_allocator); + _schemaBuilder = new Schema.Builder(); + _arrays = new List<IArrowArray>(); + } + + public RecordBatch Build() + { + Schema schema = _schemaBuilder.Build(); + int length = _arrays.Max(x => x.Length); + + // each array has its own memoryOwner, so the RecordBatch itself doesn't + // have a memoryOwner + IMemoryOwner<byte> memoryOwner = null; + var batch = new RecordBatch(schema, memoryOwner, _arrays, length); + + return batch; + } + + public Builder Clear() + { + _schemaBuilder.Clear(); + _arrays.Clear(); + return this; + } + + public Builder Append(RecordBatch batch) + { + foreach (KeyValuePair<string, Field> field in batch.Schema.Fields) + { + _schemaBuilder.Field(field.Value); + } + + foreach (IArrowArray array in batch.Arrays) + { + _arrays.Add(array); + } + + return this; + } + + public Builder Append<TArray>(string name, bool nullable, IArrowArrayBuilder<TArray> builder) + where TArray: IArrowArray + { + return builder == null + ? this + : Append(name, nullable, builder.Build(_allocator)); + } + + public Builder Append<TArray>(string name, bool nullable, TArray array) + where TArray: IArrowArray + { + if (string.IsNullOrWhiteSpace(name)) throw new ArgumentNullException(nameof(name)); + if (array == null) return this; + + _arrays.Add(array); + + _schemaBuilder.Field(f => f + .Name(name) + .Nullable(nullable) + .DataType(array.Data.DataType)); + + return this; + } + + public Builder Append<TArray>(string name, bool nullable, Func<ArrayBuilder, TArray> action) + where TArray: IArrowArray + { + if (action == null) return this; + + TArray array = action(_arrayBuilder); + + Append(name, nullable, array); + + return this; + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/RecordBatch.cs b/src/arrow/csharp/src/Apache.Arrow/RecordBatch.cs new file mode 100644 index 000000000..6e9710068 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/RecordBatch.cs @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Apache.Arrow +{ + public partial class RecordBatch : IDisposable + { + public Schema Schema { get; } + public int ColumnCount => _arrays.Count; + public IEnumerable<IArrowArray> Arrays => _arrays; + public int Length { get; } + + internal IReadOnlyList<IArrowArray> ArrayList => _arrays; + + private readonly IMemoryOwner<byte> _memoryOwner; + private readonly List<IArrowArray> _arrays; + + public IArrowArray Column(int i) + { + return _arrays[i]; + } + + public IArrowArray Column(string columnName) + { + int fieldIndex = Schema.GetFieldIndex(columnName); + return _arrays[fieldIndex]; + } + + public void Dispose() + { + Dispose(disposing: true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _memoryOwner?.Dispose(); + + foreach (IArrowArray array in _arrays) + { + array.Dispose(); + } + } + } + + public RecordBatch(Schema schema, IEnumerable<IArrowArray> data, int length) + { + if (length < 0) + { + throw new ArgumentOutOfRangeException(nameof(length)); + } + + _arrays = data?.ToList() ?? throw new ArgumentNullException(nameof(data)); + + Schema = schema ?? throw new ArgumentNullException(nameof(schema)); + Length = length; + } + + internal RecordBatch(Schema schema, IMemoryOwner<byte> memoryOwner, List<IArrowArray> arrays, int length) + { + Debug.Assert(schema != null); + Debug.Assert(arrays != null); + Debug.Assert(length >= 0); + + _memoryOwner = memoryOwner; + _arrays = arrays; + Schema = schema; + Length = length; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Schema.Builder.cs b/src/arrow/csharp/src/Apache.Arrow/Schema.Builder.cs new file mode 100644 index 000000000..89e9e3cee --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Schema.Builder.cs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + public partial class Schema + { + public class Builder + { + private readonly List<Field> _fields; + private Dictionary<string, string> _metadata; + + public Builder() + { + _fields = new List<Field>(); + } + + public Builder Clear() + { + _fields.Clear(); + _metadata?.Clear(); + return this; + } + + public Builder Field(Field field) + { + if (field == null) return this; + + _fields.Add(field); + return this; + } + + public Builder Field(Action<Field.Builder> fieldBuilderAction) + { + if (fieldBuilderAction == null) return this; + + var fieldBuilder = new Field.Builder(); + fieldBuilderAction(fieldBuilder); + Field field = fieldBuilder.Build(); + + _fields.Add(field); + return this; + } + + public Builder Metadata(string key, string value) + { + if (string.IsNullOrWhiteSpace(key)) + { + throw new ArgumentNullException(nameof(key)); + } + + _metadata ??= new Dictionary<string, string>(); + + _metadata[key] = value; + return this; + } + + public Builder Metadata(IEnumerable<KeyValuePair<string, string>> dictionary) + { + if (dictionary == null) + { + throw new ArgumentNullException(nameof(dictionary)); + } + foreach (KeyValuePair<string, string> entry in dictionary) + { + Metadata(entry.Key, entry.Value); + } + return this; + } + + public Schema Build() + { + return new Schema(_fields, _metadata); + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Schema.cs b/src/arrow/csharp/src/Apache.Arrow/Schema.cs new file mode 100644 index 000000000..59d19214d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Schema.cs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Apache.Arrow +{ + public partial class Schema + { + public IReadOnlyDictionary<string, Field> Fields + { + get => _fieldsDictionary; + } + + private readonly Dictionary<string, Field> _fieldsDictionary; + + public IReadOnlyDictionary<string, string> Metadata { get; } + + public bool HasMetadata => + Metadata != null && Metadata.Count > 0; + + private readonly IList<Field> _fields; + + public Schema( + IEnumerable<Field> fields, + IEnumerable<KeyValuePair<string, string>> metadata) + { + if (fields == null) + { + throw new ArgumentNullException(nameof(fields)); + } + + _fields = fields.ToList(); + + _fieldsDictionary = fields.ToDictionary( + field => field.Name, field => field, + StringComparer.OrdinalIgnoreCase); + + Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value); + } + + internal Schema(List<Field> fields, IReadOnlyDictionary<string, string> metadata, bool copyCollections) + { + Debug.Assert(fields != null); + Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections."); + + _fields = fields; + + _fieldsDictionary = fields.ToDictionary( + field => field.Name, field => field, + StringComparer.OrdinalIgnoreCase); + + Metadata = metadata; + } + + public Field GetFieldByIndex(int i) + { + return _fields[i]; + } + + public Field GetFieldByName(string name) => + Fields.TryGetValue(name, out Field field) ? field : null; + + public int GetFieldIndex(string name, StringComparer comparer = default) + { + if (comparer == null) + comparer = StringComparer.CurrentCulture; + + return _fields.IndexOf( + _fields.Single(x => comparer.Compare(x.Name, name) == 0)); + } + + public Schema RemoveField(int fieldIndex) + { + if (fieldIndex < 0 || fieldIndex >= _fields.Count) + { + throw new ArgumentException("Invalid fieldIndex", nameof(fieldIndex)); + } + + IList<Field> fields = Utility.DeleteListElement(_fields, fieldIndex); + + return new Schema(fields, Metadata); + } + + public Schema InsertField(int fieldIndex, Field newField) + { + newField = newField ?? throw new ArgumentNullException(nameof(newField)); + if (fieldIndex < 0 || fieldIndex > _fields.Count) + { + throw new ArgumentException(nameof(fieldIndex), $"Invalid fieldIndex {fieldIndex} passed in to Schema.AddField"); + } + + IList<Field> fields = Utility.AddListElement(_fields, fieldIndex, newField); + + return new Schema(fields, Metadata); + } + + public Schema SetField(int fieldIndex, Field newField) + { + if (fieldIndex <0 || fieldIndex >= Fields.Count) + { + throw new ArgumentException($"Invalid fieldIndex {fieldIndex} passed in to Schema.SetColumn"); + } + + IList<Field> fields = Utility.SetListElement(_fields, fieldIndex, newField); + + return new Schema(fields, Metadata); + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Table.cs b/src/arrow/csharp/src/Apache.Arrow/Table.cs new file mode 100644 index 000000000..7116ff202 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Table.cs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + /// <summary> + /// A logical Table class to represent a dataset as a sequence of Columns + /// </summary> + public class Table + { + public Schema Schema { get; } + public long RowCount { get; } + public int ColumnCount { get; private set; } + public Column Column(int columnIndex) => _columns[columnIndex]; + + private readonly IList<Column> _columns; + public static Table TableFromRecordBatches(Schema schema, IList<RecordBatch> recordBatches) + { + int nBatches = recordBatches.Count; + int nColumns = schema.Fields.Count; + + List<Column> columns = new List<Column>(nColumns); + List<Array> columnArrays = new List<Array>(nBatches); + for (int icol = 0; icol < nColumns; icol++) + { + for (int jj = 0; jj < nBatches; jj++) + { + columnArrays.Add(recordBatches[jj].Column(icol) as Array); + } + columns.Add(new Arrow.Column(schema.GetFieldByIndex(icol), columnArrays)); + columnArrays.Clear(); + } + + return new Table(schema, columns); + } + + public Table(Schema schema, IList<Column> columns) + { + Schema = schema; + _columns = columns; + if (columns.Count > 0) + { + RowCount = columns[0].Length; + ColumnCount = columns.Count; + } + } + + public Table() + { + Schema = new Schema.Builder().Build(); + _columns = new List<Column>(); + } + + public Table RemoveColumn(int columnIndex) + { + Schema newSchema = Schema.RemoveField(columnIndex); + IList<Column> newColumns = Utility.DeleteListElement(_columns, columnIndex); + return new Table(newSchema, newColumns); + } + + public Table InsertColumn(int columnIndex, Column column) + { + column = column ?? throw new ArgumentNullException(nameof(column)); + if (columnIndex < 0 || columnIndex > _columns.Count) + { + throw new ArgumentException($"Invalid columnIndex {columnIndex} passed into Table.AddColumn"); + } + if (column.Length != RowCount) + { + throw new ArgumentException($"Column's length {column.Length} must match Table's length {RowCount}"); + } + + Schema newSchema = Schema.InsertField(columnIndex, column.Field); + IList<Column> newColumns = Utility.AddListElement(_columns, columnIndex, column); + return new Table(newSchema, newColumns); + } + + public Table SetColumn(int columnIndex, Column column) + { + column = column ?? throw new ArgumentNullException(nameof(column)); + if (columnIndex < 0 || columnIndex >= ColumnCount) + { + throw new ArgumentException($"Invalid columnIndex {columnIndex} passed in to Table.SetColumn"); + } + + if (column.Length != RowCount) + { + throw new ArgumentException($"Column's length {column.Length} must match table's length {RowCount}"); + } + + Schema newSchema = Schema.SetField(columnIndex, column.Field); + IList<Column> newColumns = Utility.SetListElement(_columns, columnIndex, column); + return new Table(newSchema, newColumns); + } + + // TODO: Flatten for Tables with Lists/Structs? + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/ArrowType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/ArrowType.cs new file mode 100644 index 000000000..c0eca23da --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/ArrowType.cs @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public abstract class ArrowType: IArrowType + { + public abstract ArrowTypeId TypeId { get; } + + public abstract string Name { get; } + + public virtual bool IsFixedWidth => false; + + public abstract void Accept(IArrowTypeVisitor visitor); + + internal static void Accept<T>(T type, IArrowTypeVisitor visitor) + where T: class, IArrowType + { + switch (visitor) + { + case IArrowTypeVisitor<T> typedVisitor: + typedVisitor.Visit(type); + break; + default: + visitor.Visit(type); + break; + } + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/BinaryType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/BinaryType.cs new file mode 100644 index 000000000..6734d93ad --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/BinaryType.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public class BinaryType: ArrowType + { + public static readonly BinaryType Default = new BinaryType(); + + public override ArrowTypeId TypeId => ArrowTypeId.Binary; + public override string Name => "binary"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/BooleanType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/BooleanType.cs new file mode 100644 index 000000000..3b57414b0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/BooleanType.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class BooleanType: NumberType + { + public static readonly BooleanType Default = new BooleanType(); + + public override ArrowTypeId TypeId => ArrowTypeId.Boolean; + public override string Name => "bool"; + public override int BitWidth => 1; + public override bool IsSigned => false; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Date32Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Date32Type.cs new file mode 100644 index 000000000..9673bf62f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Date32Type.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class Date32Type: DateType + { + public static readonly Date32Type Default = new Date32Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Date32; + public override string Name => "date32"; + public override int BitWidth => 32; + public override DateUnit Unit => DateUnit.Day; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Date64Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Date64Type.cs new file mode 100644 index 000000000..2a9e1aac0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Date64Type.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class Date64Type : DateType + { + public static readonly Date64Type Default = new Date64Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Date64; + public override string Name => "date64"; + public override int BitWidth => 64; + public override DateUnit Unit => DateUnit.Milliseconds; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/DateType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/DateType.cs new file mode 100644 index 000000000..8f15b08fc --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/DateType.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public enum DateUnit + { + Day = 0, + Milliseconds = 1 + } + + public abstract class DateType: FixedWidthType + { + public abstract DateUnit Unit { get; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Decimal128Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Decimal128Type.cs new file mode 100644 index 000000000..e00b9da10 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Decimal128Type.cs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class Decimal128Type : FixedSizeBinaryType + { + public override ArrowTypeId TypeId => ArrowTypeId.Decimal128; + public override string Name => "decimal128"; + + public int Precision { get; } + public int Scale { get; } + + public Decimal128Type(int precision, int scale) + : base(16) + { + Precision = precision; + Scale = scale; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Decimal256Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Decimal256Type.cs new file mode 100644 index 000000000..b184deb3c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Decimal256Type.cs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class Decimal256Type: FixedSizeBinaryType + { + public override ArrowTypeId TypeId => ArrowTypeId.Decimal256; + public override string Name => "decimal256"; + + public int Precision { get; } + public int Scale { get; } + + public Decimal256Type(int precision, int scale) + : base(32) + { + Precision = precision; + Scale = scale; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/DictionaryType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/DictionaryType.cs new file mode 100644 index 000000000..5c1dd4095 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/DictionaryType.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; + +namespace Apache.Arrow.Types +{ + public sealed class DictionaryType : FixedWidthType + { + public static readonly DictionaryType Default = new DictionaryType(Int64Type.Default, Int64Type.Default, false); + + public DictionaryType(IArrowType indexType, IArrowType valueType, bool ordered) + { + if (!(indexType is IntegerType)) + { + throw new ArgumentException($"{nameof(indexType)} must be integer"); + } + + IndexType = indexType; + ValueType = valueType; + Ordered = ordered; + } + + public override ArrowTypeId TypeId => ArrowTypeId.Dictionary; + public override string Name => "dictionary"; + public override int BitWidth => 64; + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + + public IArrowType IndexType { get; private set; } + public IArrowType ValueType { get; private set; } + public bool Ordered { get; private set; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/DoubleType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/DoubleType.cs new file mode 100644 index 000000000..aa6ade650 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/DoubleType.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class DoubleType: FloatingPointType + { + public static readonly DoubleType Default = new DoubleType(); + + public override ArrowTypeId TypeId => ArrowTypeId.Double; + public override string Name => "double"; + public override int BitWidth => 64; + public override bool IsSigned => true; + public override PrecisionKind Precision => PrecisionKind.Double; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs new file mode 100644 index 000000000..ccbfc8c1f --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/FixedSizeBinaryType.cs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Types +{ + public class FixedSizeBinaryType : FixedWidthType + { + public override ArrowTypeId TypeId => ArrowTypeId.FixedSizedBinary; + public override string Name => "fixed_size_binary"; + public int ByteWidth { get; } + public override int BitWidth => ByteWidth * 8; + + public FixedSizeBinaryType(int byteWidth) + { + if (byteWidth <= 0) + throw new ArgumentOutOfRangeException(nameof(byteWidth)); + + ByteWidth = byteWidth; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/FixedWidthType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/FixedWidthType.cs new file mode 100644 index 000000000..d1c9e8c1d --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/FixedWidthType.cs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public abstract class FixedWidthType: ArrowType + { + public override bool IsFixedWidth => true; + + public abstract int BitWidth { get; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/FloatType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/FloatType.cs new file mode 100644 index 000000000..a3f7b39bf --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/FloatType.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class FloatType: FloatingPointType + { + public static readonly FloatType Default = new FloatType(); + + public override ArrowTypeId TypeId => ArrowTypeId.Float; + public override string Name => "float"; + public override int BitWidth => 32; + public override bool IsSigned => true; + public override PrecisionKind Precision => PrecisionKind.Single; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/FloatingPointType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/FloatingPointType.cs new file mode 100644 index 000000000..9fbe43a99 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/FloatingPointType.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public abstract class FloatingPointType: NumberType + { + public enum PrecisionKind + { + Half, + Single, + Double + } + + public abstract PrecisionKind Precision { get; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/HalfFloatType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/HalfFloatType.cs new file mode 100644 index 000000000..5bfa232dc --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/HalfFloatType.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class HalfFloatType: FloatingPointType + { + public static readonly HalfFloatType Default = new HalfFloatType(); + + public override ArrowTypeId TypeId => ArrowTypeId.HalfFloat; + public override string Name => "halffloat"; + public override int BitWidth => 16; + public override bool IsSigned => true; + public override PrecisionKind Precision => PrecisionKind.Half; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/IArrowType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/IArrowType.cs new file mode 100644 index 000000000..15c9a0244 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/IArrowType.cs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public enum ArrowTypeId + { + Null, + Boolean, + UInt8, + Int8, + UInt16, + Int16, + UInt32, + Int32, + UInt64, + Int64, + HalfFloat, + Float, + Double, + String, + Binary, + FixedSizedBinary, + Date32, + Date64, + Timestamp, + Time32, + Time64, + Interval, + Decimal128, + Decimal256, + List, + Struct, + Union, + Dictionary, + Map + } + + public interface IArrowType + { + ArrowTypeId TypeId { get; } + + string Name { get; } + + void Accept(IArrowTypeVisitor visitor); + + bool IsFixedWidth { get; } + + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs b/src/arrow/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs new file mode 100644 index 000000000..ce5b114bf --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/IArrowTypeVisitor.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public interface IArrowTypeVisitor + { + void Visit(IArrowType type); + } + + public interface IArrowTypeVisitor<in T>: IArrowTypeVisitor + where T: IArrowType + { + void Visit(T type); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Int16Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Int16Type.cs new file mode 100644 index 000000000..564ae0692 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Int16Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class Int16Type : IntegerType + { + public static readonly Int16Type Default = new Int16Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Int16; + public override string Name => "int16"; + public override int BitWidth => 16; + public override bool IsSigned => true; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Int32Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Int32Type.cs new file mode 100644 index 000000000..bc2ad32e4 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Int32Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class Int32Type : IntegerType + { + public static readonly Int32Type Default = new Int32Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Int32; + public override string Name => "int32"; + public override int BitWidth => 32; + public override bool IsSigned => true; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Int64Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Int64Type.cs new file mode 100644 index 000000000..9be7f2161 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Int64Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class Int64Type : IntegerType + { + public static readonly Int64Type Default = new Int64Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Int64; + public override string Name => "int64"; + public override int BitWidth => 64; + public override bool IsSigned => true; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Int8Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Int8Type.cs new file mode 100644 index 000000000..fd6e47115 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Int8Type.cs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class Int8Type : IntegerType + { + public static readonly Int8Type Default = new Int8Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Int8; + public override string Name => "int8"; + public override int BitWidth => 8; + public override bool IsSigned => true; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/IntegerType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/IntegerType.cs new file mode 100644 index 000000000..7a5057c46 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/IntegerType.cs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public abstract class IntegerType: NumberType + { + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/IntervalUnit.cs b/src/arrow/csharp/src/Apache.Arrow/Types/IntervalUnit.cs new file mode 100644 index 000000000..6dda0cfe9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/IntervalUnit.cs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public enum IntervalUnit + { + YearMonth = 0, + DayTime = 1 + } + + public sealed class IntervalType : FixedWidthType + { + public override ArrowTypeId TypeId => ArrowTypeId.Interval; + public override string Name => "date"; + public override int BitWidth => 64; + + public IntervalUnit Unit { get; } + + public IntervalType(IntervalUnit unit = IntervalUnit.YearMonth) + { + Unit = unit; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/ListType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/ListType.cs new file mode 100644 index 000000000..a006c2282 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/ListType.cs @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace Apache.Arrow.Types +{ + public sealed class ListType : NestedType + { + public override ArrowTypeId TypeId => ArrowTypeId.List; + public override string Name => "list"; + + public Field ValueField => Fields[0]; + + public IArrowType ValueDataType => Fields[0].DataType; + + public ListType(Field valueField) + : base(valueField) { } + + public ListType(IArrowType valueDataType) + : this(new Field("item", valueDataType, true)) { } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/NestedType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/NestedType.cs new file mode 100644 index 000000000..da6b0140a --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/NestedType.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow.Types +{ + public abstract class NestedType : ArrowType + { + [Obsolete("Use `Fields` instead")] + public IReadOnlyList<Field> Children => Fields; + + public IReadOnlyList<Field> Fields { get; } + + protected NestedType(IReadOnlyList<Field> fields) + { + if (fields == null || fields.Count == 0) + { + throw new ArgumentNullException(nameof(fields)); + } + Fields = fields; + } + + protected NestedType(Field field) + { + if (field == null) + { + throw new ArgumentNullException(nameof(field)); + } + Fields = new Field[] { field }; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/NullType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/NullType.cs new file mode 100644 index 000000000..4afe1dc38 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/NullType.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class NullType : ArrowType + { + public static readonly NullType Default = new NullType(); + + public override ArrowTypeId TypeId => ArrowTypeId.Null; + public override string Name => "null"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/NumberType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/NumberType.cs new file mode 100644 index 000000000..04d21bc8c --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/NumberType.cs @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public abstract class NumberType: FixedWidthType + { + public abstract bool IsSigned { get; } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/StringType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/StringType.cs new file mode 100644 index 000000000..33620aad9 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/StringType.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class StringType : ArrowType + { + public static StringType Default = new StringType(); + + public override ArrowTypeId TypeId => ArrowTypeId.String; + public override string Name => "utf8"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/StructType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/StructType.cs new file mode 100644 index 000000000..79e83db16 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/StructType.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow.Types +{ + public sealed class StructType : NestedType + { + public override ArrowTypeId TypeId => ArrowTypeId.Struct; + public override string Name => "struct"; + + public StructType(IReadOnlyList<Field> fields) : base(fields) + { } + + public Field GetFieldByName(string name, + IEqualityComparer<string> comparer = default) + { + if (comparer == null) + comparer = StringComparer.Ordinal; + + return Fields.FirstOrDefault( + field => comparer.Equals(field.Name, name)); + } + + public int GetFieldIndex(string name, + IEqualityComparer<string> comparer = default) + { + if (comparer == null) + comparer = StringComparer.Ordinal; + + // TODO: Consider caching field index if this method is in hot path. + + for (int i = 0; i < Fields.Count; i++) + { + if (comparer.Equals(Fields[i].Name, name)) + { + return i; + } + } + + return -1; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Time32Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Time32Type.cs new file mode 100644 index 000000000..99c409bab --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Time32Type.cs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class Time32Type : TimeType + { + public static readonly Time32Type Default = new Time32Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Time32; + public override string Name => "time32"; + public override int BitWidth => 32; + + public Time32Type(TimeUnit unit = TimeUnit.Millisecond) + : base(unit) { } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/Time64Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/Time64Type.cs new file mode 100644 index 000000000..5d6c2e46e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/Time64Type.cs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public sealed class Time64Type : TimeType + { + public static readonly Time64Type Default = new Time64Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.Time64; + public override string Name => "time64"; + public override int BitWidth => 64; + + public Time64Type(TimeUnit unit = TimeUnit.Millisecond) + : base(unit) { } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/TimeType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/TimeType.cs new file mode 100644 index 000000000..9afa3fb62 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/TimeType.cs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace Apache.Arrow.Types +{ + public enum TimeUnit + { + Second, + Millisecond, + Microsecond, + Nanosecond + } + + public abstract class TimeType: FixedWidthType + { + public TimeUnit Unit { get; } + + protected TimeType(TimeUnit unit) + { + Unit = unit; + } + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/TimestampType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/TimestampType.cs new file mode 100644 index 000000000..66b6ca609 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/TimestampType.cs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; + +namespace Apache.Arrow.Types +{ + public sealed class TimestampType : FixedWidthType + { + public static readonly TimestampType Default = new TimestampType(TimeUnit.Millisecond, "+00:00"); + + public override ArrowTypeId TypeId => ArrowTypeId.Timestamp; + public override string Name => "timestamp"; + public override int BitWidth => 64; + + public TimeUnit Unit { get; } + public string Timezone { get; } + + public bool IsTimeZoneAware => !string.IsNullOrWhiteSpace(Timezone); + + public TimestampType( + TimeUnit unit = TimeUnit.Millisecond, + string timezone = default) + { + Unit = unit; + Timezone = timezone; + } + + public TimestampType( + TimeUnit unit = TimeUnit.Millisecond, + TimeZoneInfo timezone = default) + { + Unit = unit; + Timezone = timezone?.BaseUtcOffset.ToTimeZoneOffsetString(); + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/UInt16Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/UInt16Type.cs new file mode 100644 index 000000000..7e020d37e --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/UInt16Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class UInt16Type : IntegerType + { + public static readonly UInt16Type Default = new UInt16Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.UInt16; + public override string Name => "uint16"; + public override int BitWidth => 16; + public override bool IsSigned => false; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/UInt32Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/UInt32Type.cs new file mode 100644 index 000000000..9015f118b --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/UInt32Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class UInt32Type : IntegerType + { + public static readonly UInt32Type Default = new UInt32Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.UInt32; + public override string Name => "uint32"; + public override int BitWidth => 32; + public override bool IsSigned => false; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/UInt64Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/UInt64Type.cs new file mode 100644 index 000000000..a414e7016 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/UInt64Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class UInt64Type : IntegerType + { + public static readonly UInt64Type Default = new UInt64Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.UInt64; + public override string Name => "uint64"; + public override int BitWidth => 64; + public override bool IsSigned => false; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/UInt8Type.cs b/src/arrow/csharp/src/Apache.Arrow/Types/UInt8Type.cs new file mode 100644 index 000000000..31121b4e0 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/UInt8Type.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class UInt8Type : IntegerType + { + public static readonly UInt8Type Default = new UInt8Type(); + + public override ArrowTypeId TypeId => ArrowTypeId.UInt8; + public override string Name => "uint8"; + public override int BitWidth => 8; + public override bool IsSigned => false; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Types/UnionType.cs b/src/arrow/csharp/src/Apache.Arrow/Types/UnionType.cs new file mode 100644 index 000000000..293271018 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Types/UnionType.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow.Types +{ + public enum UnionMode + { + Sparse, + Dense + } + + public sealed class UnionType : ArrowType + { + public override ArrowTypeId TypeId => ArrowTypeId.Union; + public override string Name => "union"; + + public UnionMode Mode { get; } + + public IEnumerable<byte> TypeCodes { get; } + + public UnionType( + IEnumerable<Field> fields, IEnumerable<byte> typeCodes, + UnionMode mode = UnionMode.Sparse) + { + TypeCodes = typeCodes.ToList(); + Mode = mode; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/src/arrow/csharp/src/Apache.Arrow/Utility.cs b/src/arrow/csharp/src/Apache.Arrow/Utility.cs new file mode 100644 index 000000000..c4e5732e6 --- /dev/null +++ b/src/arrow/csharp/src/Apache.Arrow/Utility.cs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Flatbuf; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Apache.Arrow +{ + internal static class Utility + { + public static IList<T> DeleteListElement<T>(IList<T> values, int index) + { + if (index < 0 || index >= values.Count) + { + throw new ArgumentException("Invalid index", nameof(index)); + } + + List<T> newList = new List<T>(values.Count - 1); + for (int i = 0; i < index; i++) + { + newList.Add(values[i]); + } + for (int i = index + 1; i < values.Count; i++) + { + newList.Add(values[i]); + } + + return newList; + } + + public static IList<T> AddListElement<T>(IList<T> values, int index, T newElement) + { + if (index < 0 || index > values.Count) + { + throw new ArgumentException("Invalid index", nameof(index)); + } + + List<T> newList = new List<T>(values.Count + 1); + for (int i = 0; i < index; i++) + { + newList.Add(values[i]); + } + newList.Add(newElement); + for (int i = index; i < values.Count; i++) + { + newList.Add(values[i]); + } + + return newList; + } + + public static IList<T> SetListElement<T>(IList<T> values, int index, T newElement) + { + if (index < 0 || index >= values.Count) + { + throw new ArgumentException("Invalid index", nameof(index)); + } + + List<T> newList = new List<T>(values.Count); + for (int i = 0; i < index; i++) + { + newList.Add(values[i]); + } + newList.Add(newElement); + for (int i = index + 1; i < values.Count; i++) + { + newList.Add(values[i]); + } + + return newList; + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj new file mode 100644 index 000000000..e38d538af --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj @@ -0,0 +1,18 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <OutputType>Exe</OutputType> + <TargetFramework>netcoreapp3.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="BenchmarkDotNet" Version="0.12.1" /> + <PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.12.1" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" /> + <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs new file mode 100644 index 000000000..4e491a2a6 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Memory; +using Apache.Arrow.Tests; +using Apache.Arrow.Types; +using BenchmarkDotNet.Attributes; +using System; +using System.IO; +using System.Linq; +using System.Threading.Tasks; + +namespace Apache.Arrow.Benchmarks +{ + //[EtwProfiler] - needs elevated privileges + [MemoryDiagnoser] + public class ArrowReaderBenchmark + { + [Params(10_000, 1_000_000)] + public int Count { get; set; } + + private MemoryStream _memoryStream; + private static readonly MemoryAllocator s_allocator = new TestMemoryAllocator(); + + [GlobalSetup] + public async Task GlobalSetup() + { + RecordBatch batch = TestData.CreateSampleRecordBatch(length: Count); + _memoryStream = new MemoryStream(); + + ArrowStreamWriter writer = new ArrowStreamWriter(_memoryStream, batch.Schema); + await writer.WriteRecordBatchAsync(batch); + } + + [IterationSetup] + public void Setup() + { + _memoryStream.Position = 0; + } + + [Benchmark] + public async Task<double> ArrowReaderWithMemoryStream() + { + double sum = 0; + var reader = new ArrowStreamReader(_memoryStream); + RecordBatch recordBatch; + while ((recordBatch = await reader.ReadNextRecordBatchAsync()) != null) + { + using (recordBatch) + { + sum += SumAllNumbers(recordBatch); + } + } + return sum; + } + + [Benchmark] + public async Task<double> ArrowReaderWithMemoryStream_ManagedMemory() + { + double sum = 0; + var reader = new ArrowStreamReader(_memoryStream, s_allocator); + RecordBatch recordBatch; + while ((recordBatch = await reader.ReadNextRecordBatchAsync()) != null) + { + using (recordBatch) + { + sum += SumAllNumbers(recordBatch); + } + } + return sum; + } + + [Benchmark] + public async Task<double> ArrowReaderWithMemory() + { + double sum = 0; + var reader = new ArrowStreamReader(_memoryStream.GetBuffer()); + RecordBatch recordBatch; + while ((recordBatch = await reader.ReadNextRecordBatchAsync()) != null) + { + using (recordBatch) + { + sum += SumAllNumbers(recordBatch); + } + } + return sum; + } + + private static double SumAllNumbers(RecordBatch recordBatch) + { + double sum = 0; + + for (int k = 0; k < recordBatch.ColumnCount; k++) + { + var array = recordBatch.Arrays.ElementAt(k); + switch (recordBatch.Schema.GetFieldByIndex(k).DataType.TypeId) + { + case ArrowTypeId.Int64: + Int64Array int64Array = (Int64Array)array; + sum += Sum(int64Array); + break; + case ArrowTypeId.Double: + DoubleArray doubleArray = (DoubleArray)array; + sum += Sum(doubleArray); + break; + case ArrowTypeId.Decimal128: + Decimal128Array decimalArray = (Decimal128Array)array; + sum += Sum(decimalArray); + break; + } + } + return sum; + } + + private static double Sum(DoubleArray doubleArray) + { + double sum = 0; + ReadOnlySpan<double> values = doubleArray.Values; + for (int valueIndex = 0; valueIndex < values.Length; valueIndex++) + { + sum += values[valueIndex]; + } + return sum; + } + + private static long Sum(Int64Array int64Array) + { + long sum = 0; + ReadOnlySpan<long> values = int64Array.Values; + for (int valueIndex = 0; valueIndex < values.Length; valueIndex++) + { + sum += values[valueIndex]; + } + return sum; + } + + private static double Sum(Decimal128Array decimal128Array) + { + double sum = 0; + for (int valueIndex = 0; valueIndex < decimal128Array.Length; valueIndex++) + { + sum += (double)decimal128Array.GetValue(valueIndex); + } + return sum; + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs new file mode 100644 index 000000000..c791c9969 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Tests; +using BenchmarkDotNet.Attributes; +using System.IO; +using System.Threading.Tasks; + +namespace Apache.Arrow.Benchmarks +{ + //[EtwProfiler] - needs elevated privileges + [MemoryDiagnoser] + public class ArrowWriterBenchmark + { + [Params(10_000, 1_000_000)] + public int BatchLength{ get; set; } + + //Max column set count is 15 before reaching 2gb limit of memory stream + [Params(10, 14)] + public int ColumnSetCount { get; set; } + + private MemoryStream _memoryStream; + private RecordBatch _batch; + + [GlobalSetup] + public void GlobalSetup() + { + _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount, false); + _memoryStream = new MemoryStream(); + } + + [IterationSetup] + public void Setup() + { + _memoryStream.Position = 0; + } + + [Benchmark] + public async Task WriteBatch() + { + ArrowStreamWriter writer = new ArrowStreamWriter(_memoryStream, _batch.Schema); + await writer.WriteRecordBatchAsync(_batch); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Program.cs b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Program.cs new file mode 100644 index 000000000..0f1410fcb --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Benchmarks/Program.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using BenchmarkDotNet.Running; + +namespace Apache.Arrow.Benchmarks +{ + public static class Program + { + public static void Main(string[] args) + { + BenchmarkSwitcher + .FromAssembly(typeof(Program).Assembly) + .Run(args); + } + } +}
\ No newline at end of file diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj new file mode 100644 index 000000000..5214b3a2c --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -0,0 +1,15 @@ +<Project Sdk="Microsoft.NET.Sdk.Web"> + + <PropertyGroup> + <TargetFramework>netcoreapp3.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="Grpc.AspNetCore" Version="2.33.1" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\..\src\Apache.Arrow.Flight.AspNetCore\Apache.Arrow.Flight.AspNetCore.csproj" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Extensions/AsyncStreamExtensions.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Extensions/AsyncStreamExtensions.cs new file mode 100644 index 000000000..eeb13a8ca --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Extensions/AsyncStreamExtensions.cs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace Grpc.Core.Utils +{ + public static class AsyncStreamExtensions + { + /// <summary> + /// Reads the entire stream and creates a list containing all the elements read. + /// </summary> + public static async Task<List<T>> ToListAsync<T>(this IAsyncStreamReader<T> streamReader) + where T : class + { + var result = new List<T>(); + while (await streamReader.MoveNext().ConfigureAwait(false)) + { + result.Add(streamReader.Current); + } + return result; + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightHolder.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightHolder.cs new file mode 100644 index 000000000..34a527018 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightHolder.cs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Google.Protobuf; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class FlightHolder + { + private readonly FlightDescriptor _flightDescriptor; + private readonly Schema _schema; + private readonly string _location; + + //Not thread safe, but only used in tests + private readonly List<RecordBatchWithMetadata> _recordBatches = new List<RecordBatchWithMetadata>(); + + public FlightHolder(FlightDescriptor flightDescriptor, Schema schema, string location) + { + _flightDescriptor = flightDescriptor; + _schema = schema; + _location = location; + } + + public void AddBatch(RecordBatchWithMetadata recordBatchWithMetadata) + { + //Should validate schema here + _recordBatches.Add(recordBatchWithMetadata); + } + + public IEnumerable<RecordBatchWithMetadata> GetRecordBatches() + { + return _recordBatches.ToList(); + } + + public FlightInfo GetFlightInfo() + { + return new FlightInfo(_schema, _flightDescriptor, new List<FlightEndpoint>() + { + new FlightEndpoint(new FlightTicket(_flightDescriptor.Paths.FirstOrDefault()), new List<FlightLocation>(){ + new FlightLocation(_location) + }) + }); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightStore.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightStore.cs new file mode 100644 index 000000000..fe53d88e3 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/FlightStore.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class FlightStore + { + public Dictionary<FlightDescriptor, FlightHolder> Flights { get; set; } = new Dictionary<FlightDescriptor, FlightHolder>(); + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Program.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Program.cs new file mode 100644 index 000000000..2c5c002b3 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Program.cs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.Server.Kestrel.Core; +using Microsoft.Extensions.Hosting; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class Program + { + public static void Main(string[] args) + { + CreateHostBuilder(args).Build().Run(); + } + + // Additional configuration is required to successfully run gRPC on macOS. + // For instructions on how to configure Kestrel and gRPC clients on macOS, visit https://go.microsoft.com/fwlink/?linkid=2099682 + public static IHostBuilder CreateHostBuilder(string[] args) => + Host.CreateDefaultBuilder(args) + .ConfigureWebHostDefaults(webBuilder => + { + webBuilder + .ConfigureKestrel((context, options) => + { + if (context.HostingEnvironment.IsDevelopment()) + { + options.Listen(IPEndPoint.Parse("0.0.0.0:5001"), l => l.Protocols = HttpProtocols.Http2); + } + }) + .UseStartup<Startup>(); + }); + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Properties/launchSettings.json b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Properties/launchSettings.json new file mode 100644 index 000000000..50e6f3dd6 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Properties/launchSettings.json @@ -0,0 +1,12 @@ +{ + "profiles": { + "Apache.Arrow.Flight.TestWeb": { + "commandName": "Project", + "launchBrowser": false, + "applicationUrl": "https://localhost:5001", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/RecordBatchWithMetadata.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/RecordBatchWithMetadata.cs new file mode 100644 index 000000000..2a4d7e726 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/RecordBatchWithMetadata.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Google.Protobuf; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class RecordBatchWithMetadata + { + public RecordBatch RecordBatch { get; } + public ByteString Metadata { get; } + + public RecordBatchWithMetadata(RecordBatch recordBatch, ByteString metadata = null) + { + RecordBatch = recordBatch; + Metadata = metadata; + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Startup.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Startup.cs new file mode 100644 index 000000000..97c1af2f0 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/Startup.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.Http; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class Startup + { + // This method gets called by the runtime. Use this method to add services to the container. + // For more information on how to configure your application, visit https://go.microsoft.com/fwlink/?LinkID=398940 + public void ConfigureServices(IServiceCollection services) + { + services.AddGrpc() + .AddFlightServer<TestFlightServer>(); + + services.AddSingleton(new FlightStore()); + } + + // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. + public void Configure(IApplicationBuilder app, IWebHostEnvironment env) + { + if (env.IsDevelopment()) + { + app.UseDeveloperExceptionPage(); + } + + app.UseRouting(); + + app.UseEndpoints(endpoints => + { + endpoints.MapFlightEndpoint(); + + endpoints.MapGet("/", async context => + { + await context.Response.WriteAsync("Communication with gRPC endpoints must be made through a gRPC client. To learn how to create a client, visit: https://go.microsoft.com/fwlink/?linkid=2086909"); + }); + }); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs new file mode 100644 index 000000000..ae6e2e4b0 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/TestFlightServer.cs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Server; +using Grpc.Core; +using Grpc.Core.Utils; + +namespace Apache.Arrow.Flight.TestWeb +{ + public class TestFlightServer : FlightServer + { + private readonly FlightStore _flightStore; + + public TestFlightServer(FlightStore flightStore) + { + _flightStore = flightStore; + } + + public override async Task DoAction(FlightAction request, IAsyncStreamWriter<FlightResult> responseStream, ServerCallContext context) + { + switch (request.Type) + { + case "test": + await responseStream.WriteAsync(new FlightResult("test data")); + break; + default: + throw new NotImplementedException(); + } + } + + public override async Task DoGet(FlightTicket ticket, FlightServerRecordBatchStreamWriter responseStream, ServerCallContext context) + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor(ticket.Ticket.ToStringUtf8()); + + if(_flightStore.Flights.TryGetValue(flightDescriptor, out var flightHolder)) + { + var batches = flightHolder.GetRecordBatches(); + + + foreach(var batch in batches) + { + await responseStream.WriteAsync(batch.RecordBatch, batch.Metadata); + } + } + } + + public override async Task DoPut(FlightServerRecordBatchStreamReader requestStream, IAsyncStreamWriter<FlightPutResult> responseStream, ServerCallContext context) + { + var flightDescriptor = await requestStream.FlightDescriptor; + + if(!_flightStore.Flights.TryGetValue(flightDescriptor, out var flightHolder)) + { + flightHolder = new FlightHolder(flightDescriptor, await requestStream.Schema, $"http://{context.Host}"); + _flightStore.Flights.Add(flightDescriptor, flightHolder); + } + + while (await requestStream.MoveNext()) + { + flightHolder.AddBatch(new RecordBatchWithMetadata(requestStream.Current, requestStream.ApplicationMetadata.FirstOrDefault())); + await responseStream.WriteAsync(FlightPutResult.Empty); + } + } + + public override Task<FlightInfo> GetFlightInfo(FlightDescriptor request, ServerCallContext context) + { + if(_flightStore.Flights.TryGetValue(request, out var flightHolder)) + { + return Task.FromResult(flightHolder.GetFlightInfo()); + } + throw new RpcException(new Status(StatusCode.NotFound, "Flight not found")); + } + + public override Task<Schema> GetSchema(FlightDescriptor request, ServerCallContext context) + { + if(_flightStore.Flights.TryGetValue(request, out var flightHolder)) + { + return Task.FromResult(flightHolder.GetFlightInfo().Schema); + } + throw new RpcException(new Status(StatusCode.NotFound, "Flight not found")); + } + + public override async Task ListActions(IAsyncStreamWriter<FlightActionType> responseStream, ServerCallContext context) + { + await responseStream.WriteAsync(new FlightActionType("get", "get a flight")); + await responseStream.WriteAsync(new FlightActionType("put", "add a flight")); + await responseStream.WriteAsync(new FlightActionType("delete", "delete a flight")); + await responseStream.WriteAsync(new FlightActionType("test", "test action")); + } + + public override async Task ListFlights(FlightCriteria request, IAsyncStreamWriter<FlightInfo> responseStream, ServerCallContext context) + { + var flightInfos = _flightStore.Flights.Select(x => x.Value.GetFlightInfo()).ToList(); + + foreach(var flightInfo in flightInfos) + { + await responseStream.WriteAsync(flightInfo); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.Development.json b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.Development.json new file mode 100644 index 000000000..fe20c40cc --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.Development.json @@ -0,0 +1,10 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Debug", + "System": "Information", + "Grpc": "Information", + "Microsoft": "Information" + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.json b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.json new file mode 100644 index 000000000..1f292413b --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.TestWeb/appsettings.json @@ -0,0 +1,15 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft": "Warning", + "Microsoft.Hosting.Lifetime": "Information" + } + }, + "AllowedHosts": "*", + "Kestrel": { + "EndpointDefaults": { + "Protocols": "Http2" + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj new file mode 100644 index 000000000..31efc526e --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -0,0 +1,21 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>netcoreapp3.1</TargetFramework> + + <IsPackable>false</IsPackable> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" /> + <PackageReference Include="xunit" Version="2.4.0" /> + <PackageReference Include="xunit.runner.visualstudio" Version="2.4.0" /> + <PackageReference Include="coverlet.collector" Version="1.2.0" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\Apache.Arrow.Flight.TestWeb\Apache.Arrow.Flight.TestWeb.csproj" /> + <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" /> + </ItemGroup> + +</Project> diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightInfoComparer.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightInfoComparer.cs new file mode 100644 index 000000000..b92e5c4cc --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightInfoComparer.cs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Tests; +using Xunit; + +namespace Apache.Arrow.Flight.Tests +{ + public static class FlightInfoComparer + { + public static void Compare(FlightInfo expected, FlightInfo actual) + { + //Check endpoints + Assert.Equal(expected.Endpoints, actual.Endpoints); + + //Check flight descriptor + Assert.Equal(expected.Descriptor, actual.Descriptor); + + //Check schema + SchemaComparer.Compare(expected.Schema, actual.Schema); + + Assert.Equal(expected.TotalBytes, actual.TotalBytes); + + Assert.Equal(expected.TotalRecords, actual.TotalRecords); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs new file mode 100644 index 000000000..79025a217 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs @@ -0,0 +1,316 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Apache.Arrow.Flight.Client; +using Apache.Arrow.Flight.TestWeb; +using Apache.Arrow.Tests; +using Google.Protobuf; +using Grpc.Core.Utils; +using Xunit; + +namespace Apache.Arrow.Flight.Tests +{ + public class FlightTests : IDisposable + { + readonly TestWebFactory _testWebFactory; + readonly FlightClient _flightClient; + readonly FlightStore _flightStore; + public FlightTests() + { + _flightStore = new FlightStore(); + _testWebFactory = new TestWebFactory(_flightStore); + _flightClient = new FlightClient(_testWebFactory.GetChannel()); + } + + public void Dispose() + { + _testWebFactory.Dispose(); + } + + private RecordBatch CreateTestBatch(int startValue, int length) + { + var batchBuilder = new RecordBatch.Builder(); + Int32Array.Builder builder = new Int32Array.Builder(); + for (int i = 0; i < length; i++) + { + builder.Append(startValue + i); + } + batchBuilder.Append("test", true, builder.Build()); + return batchBuilder.Build(); + } + + + private IEnumerable<RecordBatchWithMetadata> GetStoreBatch(FlightDescriptor flightDescriptor) + { + Assert.Contains(flightDescriptor, (IReadOnlyDictionary<FlightDescriptor, FlightHolder>)_flightStore.Flights); + + var flightHolder = _flightStore.Flights[flightDescriptor]; + return flightHolder.GetRecordBatches(); + } + + private FlightInfo GivenStoreBatches(FlightDescriptor flightDescriptor, params RecordBatchWithMetadata[] batches) + { + var initialBatch = batches.FirstOrDefault(); + + var flightHolder = new FlightHolder(flightDescriptor, initialBatch.RecordBatch.Schema, _testWebFactory.GetAddress()); + + foreach(var batch in batches) + { + flightHolder.AddBatch(batch); + } + + _flightStore.Flights.Add(flightDescriptor, flightHolder); + + return flightHolder.GetFlightInfo(); + } + + [Fact] + public async Task TestPutSingleRecordBatch() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch = CreateTestBatch(0, 100); + + var putStream = _flightClient.StartPut(flightDescriptor); + await putStream.RequestStream.WriteAsync(expectedBatch); + await putStream.RequestStream.CompleteAsync(); + var putResults = await putStream.ResponseStream.ToListAsync(); + + Assert.Single(putResults); + + var actualBatches = GetStoreBatch(flightDescriptor); + Assert.Single(actualBatches); + + ArrowReaderVerifier.CompareBatches(expectedBatch, actualBatches.First().RecordBatch); + } + + [Fact] + public async Task TestPutTwoRecordBatches() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch1 = CreateTestBatch(0, 100); + var expectedBatch2 = CreateTestBatch(0, 100); + + var putStream = _flightClient.StartPut(flightDescriptor); + await putStream.RequestStream.WriteAsync(expectedBatch1); + await putStream.RequestStream.WriteAsync(expectedBatch2); + await putStream.RequestStream.CompleteAsync(); + var putResults = await putStream.ResponseStream.ToListAsync(); + + Assert.Equal(2, putResults.Count); + + var actualBatches = GetStoreBatch(flightDescriptor).ToList(); + Assert.Equal(2, actualBatches.Count); + + ArrowReaderVerifier.CompareBatches(expectedBatch1, actualBatches[0].RecordBatch); + ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch); + } + + [Fact] + public async Task TestGetSingleRecordBatch() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch = CreateTestBatch(0, 100); + + //Add batch to the in memory store + GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch)); + + //Get the flight info for the ticket + var flightInfo = await _flightClient.GetInfo(flightDescriptor); + Assert.Single(flightInfo.Endpoints); + + var endpoint = flightInfo.Endpoints.FirstOrDefault(); + + var getStream = _flightClient.GetStream(endpoint.Ticket); + var resultList = await getStream.ResponseStream.ToListAsync(); + + Assert.Single(resultList); + ArrowReaderVerifier.CompareBatches(expectedBatch, resultList[0]); + } + + [Fact] + public async Task TestGetTwoRecordBatch() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch1 = CreateTestBatch(0, 100); + var expectedBatch2 = CreateTestBatch(100, 100); + + //Add batch to the in memory store + GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch1), new RecordBatchWithMetadata(expectedBatch2)); + + //Get the flight info for the ticket + var flightInfo = await _flightClient.GetInfo(flightDescriptor); + Assert.Single(flightInfo.Endpoints); + + var endpoint = flightInfo.Endpoints.FirstOrDefault(); + + var getStream = _flightClient.GetStream(endpoint.Ticket); + var resultList = await getStream.ResponseStream.ToListAsync(); + + Assert.Equal(2, resultList.Count); + ArrowReaderVerifier.CompareBatches(expectedBatch1, resultList[0]); + ArrowReaderVerifier.CompareBatches(expectedBatch2, resultList[1]); + } + + [Fact] + public async Task TestGetFlightMetadata() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch1 = CreateTestBatch(0, 100); + + var expectedMetadata = ByteString.CopyFromUtf8("test metadata"); + var expectedMetadataList = new List<ByteString>() { expectedMetadata }; + + //Add batch to the in memory store + GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch1, expectedMetadata)); + + //Get the flight info for the ticket + var flightInfo = await _flightClient.GetInfo(flightDescriptor); + Assert.Single(flightInfo.Endpoints); + + var endpoint = flightInfo.Endpoints.FirstOrDefault(); + + var getStream = _flightClient.GetStream(endpoint.Ticket); + + List<ByteString> actualMetadata = new List<ByteString>(); + while(await getStream.ResponseStream.MoveNext(default)) + { + actualMetadata.AddRange(getStream.ResponseStream.ApplicationMetadata); + } + + Assert.Equal(expectedMetadataList, actualMetadata); + } + + [Fact] + public async Task TestPutWithMetadata() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch = CreateTestBatch(0, 100); + var expectedMetadata = ByteString.CopyFromUtf8("test metadata"); + + var putStream = _flightClient.StartPut(flightDescriptor); + await putStream.RequestStream.WriteAsync(expectedBatch, expectedMetadata); + await putStream.RequestStream.CompleteAsync(); + var putResults = await putStream.ResponseStream.ToListAsync(); + + Assert.Single(putResults); + + var actualBatches = GetStoreBatch(flightDescriptor); + Assert.Single(actualBatches); + + ArrowReaderVerifier.CompareBatches(expectedBatch, actualBatches.First().RecordBatch); + Assert.Equal(expectedMetadata, actualBatches.First().Metadata); + } + + [Fact] + public async Task TestGetSchema() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch = CreateTestBatch(0, 100); + var expectedSchema = expectedBatch.Schema; + + GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch)); + + var actualSchema = await _flightClient.GetSchema(flightDescriptor); + + SchemaComparer.Compare(expectedSchema, actualSchema); + } + + [Fact] + public async Task TestDoAction() + { + var expectedResult = new List<FlightResult>() + { + new FlightResult("test data") + }; + + var resultStream = _flightClient.DoAction(new FlightAction("test")); + var actualResult = await resultStream.ResponseStream.ToListAsync(); + + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public async Task TestListActions() + { + var expected = new List<FlightActionType>() + { + new FlightActionType("get", "get a flight"), + new FlightActionType("put", "add a flight"), + new FlightActionType("delete", "delete a flight"), + new FlightActionType("test", "test action") + }; + + var actual = await _flightClient.ListActions().ResponseStream.ToListAsync(); + + Assert.Equal(expected, actual); + } + + [Fact] + public async Task TestListFlights() + { + var flightDescriptor1 = FlightDescriptor.CreatePathDescriptor("test1"); + var flightDescriptor2 = FlightDescriptor.CreatePathDescriptor("test2"); + var expectedBatch = CreateTestBatch(0, 100); + + List<FlightInfo> expectedFlightInfo = new List<FlightInfo>(); + + expectedFlightInfo.Add(GivenStoreBatches(flightDescriptor1, new RecordBatchWithMetadata(expectedBatch))); + expectedFlightInfo.Add(GivenStoreBatches(flightDescriptor2, new RecordBatchWithMetadata(expectedBatch))); + + var listFlightStream = _flightClient.ListFlights(); + + var actualFlights = await listFlightStream.ResponseStream.ToListAsync(); + + for(int i = 0; i < expectedFlightInfo.Count; i++) + { + FlightInfoComparer.Compare(expectedFlightInfo[i], actualFlights[i]); + } + } + + [Fact] + public async Task TestGetBatchesWithAsyncEnumerable() + { + var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test"); + var expectedBatch1 = CreateTestBatch(0, 100); + var expectedBatch2 = CreateTestBatch(100, 100); + + //Add batch to the in memory store + GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch1), new RecordBatchWithMetadata(expectedBatch2)); + + //Get the flight info for the ticket + var flightInfo = await _flightClient.GetInfo(flightDescriptor); + Assert.Single(flightInfo.Endpoints); + + var endpoint = flightInfo.Endpoints.FirstOrDefault(); + + var getStream = _flightClient.GetStream(endpoint.Ticket); + + + List<RecordBatch> resultList = new List<RecordBatch>(); + await foreach(var recordBatch in getStream.ResponseStream) + { + resultList.Add(recordBatch); + } + + Assert.Equal(2, resultList.Count); + ArrowReaderVerifier.CompareBatches(expectedBatch1, resultList[0]); + ArrowReaderVerifier.CompareBatches(expectedBatch2, resultList[1]); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/TestWebFactory.cs b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/TestWebFactory.cs new file mode 100644 index 000000000..9e6ebc476 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Flight.Tests/TestWebFactory.cs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Net; +using System.Text; +using Apache.Arrow.Flight.TestWeb; +using Grpc.Net.Client; +using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.Server.Kestrel.Core; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; + +namespace Apache.Arrow.Flight.Tests +{ + public class TestWebFactory : IDisposable + { + readonly IHost host; + + public TestWebFactory(FlightStore flightStore) + { + host = WebHostBuilder(flightStore).Build(); //Create the server + host.Start(); + AppContext.SetSwitch( + "System.Net.Http.SocketsHttpHandler.Http2UnencryptedSupport", true); + } + + private IHostBuilder WebHostBuilder(FlightStore flightStore) + { + return Host.CreateDefaultBuilder() + .ConfigureWebHostDefaults(webBuilder => + { + webBuilder + .ConfigureKestrel(c => + { + c.Listen(IPEndPoint.Parse("0.0.0.0:5001"), l => l.Protocols = HttpProtocols.Http2); + }) + .UseStartup<Startup>() + .ConfigureServices(services => + { + services.AddSingleton(flightStore); + }); + }); + } + + public string GetAddress() + { + return "http://127.0.0.1:5001"; + } + + public GrpcChannel GetChannel() + { + return GrpcChannel.ForAddress(GetAddress()); + } + + public void Stop() + { + host.StopAsync().Wait(); + } + + public void Dispose() + { + Stop(); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj new file mode 100644 index 000000000..813734084 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj @@ -0,0 +1,16 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <OutputType>Exe</OutputType> + <TargetFramework>netcoreapp3.1</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="System.CommandLine" Version="2.0.0-beta1.21216.1" /> + <PackageReference Include="System.Text.Json" Version="5.0.2" /> + <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" /> + <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" /> + </ItemGroup> + +</Project>
\ No newline at end of file diff --git a/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs new file mode 100644 index 000000000..d45662419 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs @@ -0,0 +1,609 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Numerics; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using Apache.Arrow.Arrays; +using Apache.Arrow.Ipc; +using Apache.Arrow.Tests; +using Apache.Arrow.Types; + +namespace Apache.Arrow.IntegrationTest +{ + public class IntegrationCommand + { + public string Mode { get; set; } + public FileInfo JsonFileInfo { get; set; } + public FileInfo ArrowFileInfo { get; set; } + + public IntegrationCommand(string mode, FileInfo jsonFileInfo, FileInfo arrowFileInfo) + { + Mode = mode; + JsonFileInfo = jsonFileInfo; + ArrowFileInfo = arrowFileInfo; + } + + public async Task<int> Execute() + { + Func<Task<int>> commandDelegate = Mode switch + { + "validate" => Validate, + "json-to-arrow" => JsonToArrow, + "stream-to-file" => StreamToFile, + "file-to-stream" => FileToStream, + _ => () => + { + Console.WriteLine($"Mode '{Mode}' is not supported."); + return Task.FromResult(-1); + } + }; + return await commandDelegate(); + } + + private async Task<int> Validate() + { + JsonFile jsonFile = await ParseJsonFile(); + + using FileStream arrowFileStream = ArrowFileInfo.OpenRead(); + using ArrowFileReader reader = new ArrowFileReader(arrowFileStream); + int batchCount = await reader.RecordBatchCountAsync(); + + if (batchCount != jsonFile.Batches.Count) + { + Console.WriteLine($"Incorrect batch count. JsonFile: {jsonFile.Batches.Count}, ArrowFile: {batchCount}"); + return -1; + } + + Schema jsonFileSchema = CreateSchema(jsonFile.Schema); + Schema arrowFileSchema = reader.Schema; + + SchemaComparer.Compare(jsonFileSchema, arrowFileSchema); + + for (int i = 0; i < batchCount; i++) + { + RecordBatch arrowFileRecordBatch = reader.ReadNextRecordBatch(); + RecordBatch jsonFileRecordBatch = CreateRecordBatch(jsonFileSchema, jsonFile.Batches[i]); + + ArrowReaderVerifier.CompareBatches(jsonFileRecordBatch, arrowFileRecordBatch, strictCompare: false); + } + + // ensure there are no more batches in the file + if (reader.ReadNextRecordBatch() != null) + { + Console.WriteLine($"The ArrowFile has more RecordBatches than it should."); + return -1; + } + + return 0; + } + + private async Task<int> JsonToArrow() + { + JsonFile jsonFile = await ParseJsonFile(); + Schema schema = CreateSchema(jsonFile.Schema); + + using (FileStream fs = ArrowFileInfo.Create()) + { + ArrowFileWriter writer = new ArrowFileWriter(fs, schema); + await writer.WriteStartAsync(); + + foreach (var jsonRecordBatch in jsonFile.Batches) + { + RecordBatch batch = CreateRecordBatch(schema, jsonRecordBatch); + await writer.WriteRecordBatchAsync(batch); + } + await writer.WriteEndAsync(); + await fs.FlushAsync(); + } + + return 0; + } + + private RecordBatch CreateRecordBatch(Schema schema, JsonRecordBatch jsonRecordBatch) + { + if (schema.Fields.Count != jsonRecordBatch.Columns.Count) + { + throw new NotSupportedException($"jsonRecordBatch.Columns.Count '{jsonRecordBatch.Columns.Count}' doesn't match schema field count '{schema.Fields.Count}'"); + } + + List<IArrowArray> arrays = new List<IArrowArray>(jsonRecordBatch.Columns.Count); + for (int i = 0; i < jsonRecordBatch.Columns.Count; i++) + { + JsonFieldData data = jsonRecordBatch.Columns[i]; + Field field = schema.GetFieldByName(data.Name); + ArrayCreator creator = new ArrayCreator(data); + field.DataType.Accept(creator); + arrays.Add(creator.Array); + } + + return new RecordBatch(schema, arrays, jsonRecordBatch.Count); + } + + private static Schema CreateSchema(JsonSchema jsonSchema) + { + Schema.Builder builder = new Schema.Builder(); + for (int i = 0; i < jsonSchema.Fields.Count; i++) + { + builder.Field(f => CreateField(f, jsonSchema.Fields[i])); + } + return builder.Build(); + } + + private static void CreateField(Field.Builder builder, JsonField jsonField) + { + builder.Name(jsonField.Name) + .DataType(ToArrowType(jsonField.Type)) + .Nullable(jsonField.Nullable); + + if (jsonField.Metadata != null) + { + builder.Metadata(jsonField.Metadata); + } + } + + private static IArrowType ToArrowType(JsonArrowType type) + { + return type.Name switch + { + "bool" => BooleanType.Default, + "int" => ToIntArrowType(type), + "floatingpoint" => ToFloatingPointArrowType(type), + "decimal" => ToDecimalArrowType(type), + "binary" => BinaryType.Default, + "utf8" => StringType.Default, + "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth), + "date" => ToDateArrowType(type), + "time" => ToTimeArrowType(type), + "timestamp" => ToTimestampArrowType(type), + _ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}") + }; + } + + private static IArrowType ToIntArrowType(JsonArrowType type) + { + return (type.BitWidth, type.IsSigned) switch + { + (8, true) => Int8Type.Default, + (8, false) => UInt8Type.Default, + (16, true) => Int16Type.Default, + (16, false) => UInt16Type.Default, + (32, true) => Int32Type.Default, + (32, false) => UInt32Type.Default, + (64, true) => Int64Type.Default, + (64, false) => UInt64Type.Default, + _ => throw new NotSupportedException($"Int type not supported: {type.BitWidth}, {type.IsSigned}") + }; + } + + private static IArrowType ToFloatingPointArrowType(JsonArrowType type) + { + return type.FloatingPointPrecision switch + { + "SINGLE" => FloatType.Default, + "DOUBLE" => DoubleType.Default, + _ => throw new NotSupportedException($"FloatingPoint type not supported: {type.FloatingPointPrecision}") + }; + } + + private static IArrowType ToDecimalArrowType(JsonArrowType type) + { + return type.BitWidth switch + { + 256 => new Decimal256Type(type.DecimalPrecision, type.Scale), + _ => new Decimal128Type(type.DecimalPrecision, type.Scale), + }; + } + + private static IArrowType ToDateArrowType(JsonArrowType type) + { + return type.Unit switch + { + "DAY" => Date32Type.Default, + "MILLISECOND" => Date64Type.Default, + _ => throw new NotSupportedException($"Date type not supported: {type.Unit}") + }; + } + + private static IArrowType ToTimeArrowType(JsonArrowType type) + { + return (type.Unit, type.BitWidth) switch + { + ("SECOND", 32) => new Time32Type(TimeUnit.Second), + ("SECOND", 64) => new Time64Type(TimeUnit.Second), + ("MILLISECOND", 32) => new Time32Type(TimeUnit.Millisecond), + ("MILLISECOND", 64) => new Time64Type(TimeUnit.Millisecond), + ("MICROSECOND", 32) => new Time32Type(TimeUnit.Microsecond), + ("MICROSECOND", 64) => new Time64Type(TimeUnit.Microsecond), + ("NANOSECOND", 32) => new Time32Type(TimeUnit.Nanosecond), + ("NANOSECOND", 64) => new Time64Type(TimeUnit.Nanosecond), + _ => throw new NotSupportedException($"Time type not supported: {type.Unit}, {type.BitWidth}") + }; + } + + private static IArrowType ToTimestampArrowType(JsonArrowType type) + { + return type.Unit switch + { + "SECOND" => new TimestampType(TimeUnit.Second, type.Timezone), + "MILLISECOND" => new TimestampType(TimeUnit.Millisecond, type.Timezone), + "MICROSECOND" => new TimestampType(TimeUnit.Microsecond, type.Timezone), + "NANOSECOND" => new TimestampType(TimeUnit.Nanosecond, type.Timezone), + _ => throw new NotSupportedException($"Time type not supported: {type.Unit}, {type.BitWidth}") + }; + } + + private class ArrayCreator : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<Int8Type>, + IArrowTypeVisitor<Int16Type>, + IArrowTypeVisitor<Int32Type>, + IArrowTypeVisitor<Int64Type>, + IArrowTypeVisitor<UInt8Type>, + IArrowTypeVisitor<UInt16Type>, + IArrowTypeVisitor<UInt32Type>, + IArrowTypeVisitor<UInt64Type>, + IArrowTypeVisitor<FloatType>, + IArrowTypeVisitor<DoubleType>, + IArrowTypeVisitor<Decimal128Type>, + IArrowTypeVisitor<Decimal256Type>, + IArrowTypeVisitor<Date32Type>, + IArrowTypeVisitor<Date64Type>, + IArrowTypeVisitor<TimestampType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<FixedSizeBinaryType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + private JsonFieldData JsonFieldData { get; } + public IArrowArray Array { get; private set; } + + public ArrayCreator(JsonFieldData jsonFieldData) + { + JsonFieldData = jsonFieldData; + } + + public void Visit(BooleanType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer.BitmapBuilder valueBuilder = new ArrowBuffer.BitmapBuilder(validityBuffer.Length); + + var json = JsonFieldData.Data.GetRawText(); + bool[] values = JsonSerializer.Deserialize<bool[]>(json); + + foreach (bool value in values) + { + valueBuilder.Append(value); + } + ArrowBuffer valueBuffer = valueBuilder.Build(); + + Array = new BooleanArray( + valueBuffer, validityBuffer, + JsonFieldData.Count, nullCount, 0); + } + + public void Visit(Int8Type type) => GenerateArray<sbyte, Int8Array>((v, n, c, nc, o) => new Int8Array(v, n, c, nc, o)); + public void Visit(Int16Type type) => GenerateArray<short, Int16Array>((v, n, c, nc, o) => new Int16Array(v, n, c, nc, o)); + public void Visit(Int32Type type) => GenerateArray<int, Int32Array>((v, n, c, nc, o) => new Int32Array(v, n, c, nc, o)); + public void Visit(Int64Type type) => GenerateLongArray<long, Int64Array>((v, n, c, nc, o) => new Int64Array(v, n, c, nc, o), s => long.Parse(s)); + public void Visit(UInt8Type type) => GenerateArray<byte, UInt8Array>((v, n, c, nc, o) => new UInt8Array(v, n, c, nc, o)); + public void Visit(UInt16Type type) => GenerateArray<ushort, UInt16Array>((v, n, c, nc, o) => new UInt16Array(v, n, c, nc, o)); + public void Visit(UInt32Type type) => GenerateArray<uint, UInt32Array>((v, n, c, nc, o) => new UInt32Array(v, n, c, nc, o)); + public void Visit(UInt64Type type) => GenerateLongArray<ulong, UInt64Array>((v, n, c, nc, o) => new UInt64Array(v, n, c, nc, o), s => ulong.Parse(s)); + public void Visit(FloatType type) => GenerateArray<float, FloatArray>((v, n, c, nc, o) => new FloatArray(v, n, c, nc, o)); + public void Visit(DoubleType type) => GenerateArray<double, DoubleArray>((v, n, c, nc, o) => new DoubleArray(v, n, c, nc, o)); + + public void Visit(Decimal128Type type) + { + Array = new Decimal128Array(GetDecimalArrayData(type)); + } + + public void Visit(Decimal256Type type) + { + Array = new Decimal256Array(GetDecimalArrayData(type)); + } + + private ArrayData GetDecimalArrayData(FixedSizeBinaryType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json, s_options); + + Span<byte> buffer = stackalloc byte[type.ByteWidth]; + + ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); + foreach (string value in values) + { + buffer.Fill(0); + + BigInteger bigInteger = BigInteger.Parse(value); + if (!bigInteger.TryWriteBytes(buffer, out int bytesWritten, false, !BitConverter.IsLittleEndian)) + { + throw new InvalidDataException($"Decimal data was too big to fit into {type.BitWidth} bits."); + } + + if (bigInteger.Sign == -1) + { + buffer.Slice(bytesWritten).Fill(255); + } + + valueBuilder.Append(buffer); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + return new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, valueBuffer }); + } + + public void Visit(Date32Type type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + ArrowBuffer.Builder<int> valueBuilder = new ArrowBuffer.Builder<int>(JsonFieldData.Count); + var json = JsonFieldData.Data.GetRawText(); + int[] values = JsonSerializer.Deserialize<int[]>(json, s_options); + + foreach (int value in values) + { + valueBuilder.Append(value); + } + ArrowBuffer valueBuffer = valueBuilder.Build(); + + Array = new Date32Array( + valueBuffer, validityBuffer, + JsonFieldData.Count, nullCount, 0); + } + + public void Visit(Date64Type type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + ArrowBuffer.Builder<long> valueBuilder = new ArrowBuffer.Builder<long>(JsonFieldData.Count); + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json, s_options); + + foreach (string value in values) + { + valueBuilder.Append(long.Parse(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(); + + Array = new Date64Array( + valueBuffer, validityBuffer, + JsonFieldData.Count, nullCount, 0); + } + + public void Visit(TimestampType type) + { + throw new NotImplementedException(); + } + + public void Visit(StringType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json, s_options); + + ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); + foreach (string value in values) + { + valueBuilder.Append(Encoding.UTF8.GetBytes(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + Array = new StringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount); + } + + public void Visit(BinaryType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json, s_options); + + ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); + foreach (string value in values) + { + valueBuilder.Append(ConvertHexStringToByteArray(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, offsetBuffer, valueBuffer }); + Array = new BinaryArray(arrayData); + } + + public void Visit(FixedSizeBinaryType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json, s_options); + + ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); + foreach (string value in values) + { + valueBuilder.Append(ConvertHexStringToByteArray(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, valueBuffer }); + Array = new FixedSizeBinaryArray(arrayData); + } + + public void Visit(ListType type) + { + throw new NotImplementedException(); + } + + public void Visit(StructType type) + { + throw new NotImplementedException(); + } + + private static byte[] ConvertHexStringToByteArray(string hexString) + { + byte[] data = new byte[hexString.Length / 2]; + for (int index = 0; index < data.Length; index++) + { + data[index] = byte.Parse(hexString.AsSpan(index * 2, 2), NumberStyles.HexNumber, CultureInfo.InvariantCulture); + } + + return data; + } + + private static readonly JsonSerializerOptions s_options = new JsonSerializerOptions() + { + Converters = + { + new ByteArrayConverter() + } + }; + + private void GenerateArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, int, int, TArray> createArray) + where TArray : PrimitiveArray<T> + where T : struct + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + ArrowBuffer.Builder<T> valueBuilder = new ArrowBuffer.Builder<T>(JsonFieldData.Count); + var json = JsonFieldData.Data.GetRawText(); + T[] values = JsonSerializer.Deserialize<T[]>(json, s_options); + + foreach (T value in values) + { + valueBuilder.Append(value); + } + ArrowBuffer valueBuffer = valueBuilder.Build(); + + Array = createArray( + valueBuffer, validityBuffer, + JsonFieldData.Count, nullCount, 0); + } + + private void GenerateLongArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, int, int, TArray> createArray, Func<string, T> parse) + where TArray : PrimitiveArray<T> + where T : struct + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + + ArrowBuffer.Builder<T> valueBuilder = new ArrowBuffer.Builder<T>(JsonFieldData.Count); + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize<string[]>(json); + + foreach (string value in values) + { + valueBuilder.Append(parse(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(); + + Array = createArray( + valueBuffer, validityBuffer, + JsonFieldData.Count, nullCount, 0); + } + + private ArrowBuffer GetOffsetBuffer() + { + ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length); + valueOffsets.AppendRange(JsonFieldData.Offset); + return valueOffsets.Build(default); + } + + private ArrowBuffer GetValidityBuffer(out int nullCount) + { + if (JsonFieldData.Validity == null) + { + nullCount = 0; + return ArrowBuffer.Empty; + } + + ArrowBuffer.BitmapBuilder validityBuilder = new ArrowBuffer.BitmapBuilder(JsonFieldData.Validity.Length); + validityBuilder.AppendRange(JsonFieldData.Validity); + + nullCount = validityBuilder.UnsetBitCount; + return validityBuilder.Build(); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException($"{type.Name} not implemented"); + } + } + + private async Task<int> StreamToFile() + { + using ArrowStreamReader reader = new ArrowStreamReader(Console.OpenStandardInput()); + + RecordBatch batch = await reader.ReadNextRecordBatchAsync(); + + using FileStream fileStream = ArrowFileInfo.OpenWrite(); + using ArrowFileWriter writer = new ArrowFileWriter(fileStream, reader.Schema); + await writer.WriteStartAsync(); + + while (batch != null) + { + await writer.WriteRecordBatchAsync(batch); + + batch = await reader.ReadNextRecordBatchAsync(); + } + + await writer.WriteEndAsync(); + + return 0; + } + + private async Task<int> FileToStream() + { + using FileStream fileStream = ArrowFileInfo.OpenRead(); + using ArrowFileReader fileReader = new ArrowFileReader(fileStream); + + // read the record batch count to initialize the Schema + await fileReader.RecordBatchCountAsync(); + + using ArrowStreamWriter writer = new ArrowStreamWriter(Console.OpenStandardOutput(), fileReader.Schema); + await writer.WriteStartAsync(); + + RecordBatch batch; + while ((batch = fileReader.ReadNextRecordBatch()) != null) + { + await writer.WriteRecordBatchAsync(batch); + } + + await writer.WriteEndAsync(); + + return 0; + } + + private async ValueTask<JsonFile> ParseJsonFile() + { + using var fileStream = JsonFileInfo.OpenRead(); + JsonSerializerOptions options = new JsonSerializerOptions() + { + PropertyNamingPolicy = JsonFileNamingPolicy.Instance, + }; + options.Converters.Add(new ValidityConverter()); + + return await JsonSerializer.DeserializeAsync<JsonFile>(fileStream, options); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs new file mode 100644 index 000000000..f074afc01 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Apache.Arrow.IntegrationTest +{ + public class JsonFile + { + public JsonSchema Schema { get; set; } + public List<JsonRecordBatch> Batches { get; set; } + //public List<DictionaryBatch> Dictionaries {get;set;} + } + + public class JsonSchema + { + public List<JsonField> Fields { get; set; } + public JsonMetadata Metadata { get; set; } + } + + public class JsonField + { + public string Name { get; set; } + public bool Nullable { get; set; } + public JsonArrowType Type { get; set; } + public List<JsonField> Children { get; set; } + public JsonDictionaryIndex Dictionary { get; set; } + public JsonMetadata Metadata { get; set; } + } + + public class JsonArrowType + { + public string Name { get; set; } + + // int fields + public int BitWidth { get; set; } + public bool IsSigned { get; set; } + + // floating point fields + [JsonIgnore] + public string FloatingPointPrecision => ExtensionData["precision"].GetString(); + + // decimal fields + [JsonIgnore] + public int DecimalPrecision => ExtensionData["precision"].GetInt32(); + public int Scale { get; set; } + + // date and time fields + public string Unit { get; set; } + // timestamp fields + public string Timezone { get; set; } + + // FixedSizeBinary fields + public int ByteWidth { get; set; } + + [JsonExtensionData] + public Dictionary<string, JsonElement> ExtensionData { get; set; } + } + + public class JsonDictionaryIndex + { + public int Id { get; set; } + public JsonArrowType Type { get; set; } + public bool IsOrdered { get; set; } + } + + public class JsonMetadata : List<KeyValuePair<string, string>> + { + } + + public class JsonRecordBatch + { + public int Count { get; set; } + public List<JsonFieldData> Columns { get; set; } + } + + public class JsonFieldData + { + public string Name { get; set; } + public int Count { get; set; } + public bool[] Validity { get; set; } + public int[] Offset { get; set; } + public int[] TypeId { get; set; } + public JsonElement Data { get; set; } + public List<JsonFieldData> Children { get; set; } + } + + internal sealed class ValidityConverter : JsonConverter<bool> + { + public override bool Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + if (reader.TokenType == JsonTokenType.True) return true; + if (reader.TokenType == JsonTokenType.False) return false; + + if (typeToConvert != typeof(bool) || reader.TokenType != JsonTokenType.Number) + { + throw new InvalidOperationException($"Unexpected bool data: {reader.TokenType}"); + } + + int value = reader.GetInt32(); + if (value == 0) return false; + if (value == 1) return true; + + throw new InvalidOperationException($"Unexpected bool value: {value}"); + } + + public override void Write(Utf8JsonWriter writer, bool value, JsonSerializerOptions options) => throw new NotImplementedException(); + } + + internal sealed class ByteArrayConverter : JsonConverter<byte[]> + { + public override byte[] Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + if (reader.TokenType != JsonTokenType.StartArray) + { + throw new InvalidOperationException($"Unexpected byte[] token: {reader.TokenType}"); + } + + List<byte> values = new List<byte>(); + while (reader.Read()) + { + if (reader.TokenType == JsonTokenType.EndArray) + { + return values.ToArray(); + } + + if (reader.TokenType != JsonTokenType.Number) + { + throw new InvalidOperationException($"Unexpected byte token: {reader.TokenType}"); + } + + values.Add(reader.GetByte()); + } + + throw new InvalidOperationException("Unexpectedly reached the end of the reader"); + } + + public override void Write(Utf8JsonWriter writer, byte[] value, JsonSerializerOptions options) => throw new NotImplementedException(); + } + + internal sealed class JsonFileNamingPolicy : JsonNamingPolicy + { + public static JsonFileNamingPolicy Instance { get; } = new JsonFileNamingPolicy(); + + public override string ConvertName(string name) + { + if (name == "Validity") + { + return "VALIDITY"; + } + else if (name == "Offset") + { + return "OFFSET"; + } + else if (name == "TypeId") + { + return "TYPE_ID"; + } + else if (name == "Data") + { + return "DATA"; + } + else + { + return CamelCase.ConvertName(name); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Program.cs b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Program.cs new file mode 100644 index 000000000..243269386 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.IntegrationTest/Program.cs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.CommandLine; +using System.CommandLine.Invocation; +using System.IO; +using System.Linq; +using System.Threading.Tasks; + +namespace Apache.Arrow.IntegrationTest +{ + public class Program + { + public static async Task<int> Main(string[] args) + { + var integrationTestCommand = new RootCommand + { + new Option<string>( + "--mode", + description: "Which command to run"), + new Option<FileInfo>( + new[] { "--json-file", "-j" }, + "The JSON file to interact with"), + new Option<FileInfo>( + new[] { "--arrow-file", "-a" }, + "The arrow file to interact with") + }; + + integrationTestCommand.Description = "Integration test app for Apache.Arrow .NET Library."; + + integrationTestCommand.Handler = CommandHandler.Create<string, FileInfo, FileInfo>(async (mode, j, a) => + { + var integrationCommand = new IntegrationCommand(mode, j, a); + await integrationCommand.Execute(); + }); + return await integrationTestCommand.InvokeAsync(args); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/src/arrow/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj new file mode 100644 index 000000000..a725fe57e --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>netcoreapp3.1</TargetFramework> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + </PropertyGroup> + + <ItemGroup> + <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" /> + <PackageReference Include="xunit" Version="2.4.0" /> + <PackageReference Include="xunit.runner.visualstudio" Version="2.4.0"> + <PrivateAssets>all</PrivateAssets> + <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets> + </PackageReference> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" /> + </ItemGroup> + +</Project>
\ No newline at end of file diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs new file mode 100644 index 000000000..41078998b --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrayBuilderTests + { + // TODO: Test various builder invariants (Append, AppendRange, Clear, Resize, Reserve, etc) + + [Fact] + public void PrimitiveArrayBuildersProduceExpectedArray() + { + TestArrayBuilder<Int8Array, Int8Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<Int16Array, Int16Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<Int32Array, Int32Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<Int64Array, Int64Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<UInt8Array, UInt8Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<UInt16Array, UInt16Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<UInt32Array, UInt32Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<UInt64Array, UInt64Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<FloatArray, FloatArray.Builder>(x => x.Append(10).Append(20).Append(30)); + TestArrayBuilder<DoubleArray, DoubleArray.Builder>(x => x.Append(10).Append(20).Append(30)); + } + + [Fact] + public void PrimitiveArrayBuildersProduceExpectedArrayWithNulls() + { + TestArrayBuilder<Int8Array, Int8Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(127), 4, 2, 0x09); + TestArrayBuilder<Int16Array, Int16Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<Int32Array, Int32Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<Int64Array, Int64Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<UInt8Array, UInt8Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(127), 4, 2, 0x09); + TestArrayBuilder<UInt16Array, UInt16Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<UInt32Array, UInt32Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<UInt64Array, UInt64Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<UInt64Array, UInt64Array.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<FloatArray, FloatArray.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + TestArrayBuilder<DoubleArray, DoubleArray.Builder>(x => x.Append(123).AppendNull().AppendNull().Append(456), 4, 2, 0x09); + } + + [Fact] + public void BooleanArrayBuilderProducersExpectedArray() + { + TestArrayBuilder<BooleanArray, BooleanArray.Builder>(x => x.Append(true).Append(false).Append(true)); + TestArrayBuilder<BooleanArray, BooleanArray.Builder>(x => x.Append(true).AppendNull().Append(false).Append(true), 4, 1, 0x0D); + } + + [Fact] + public void StringArrayBuilderHandlesNullsAndEmptyStrings() + { + var stringArray = TestArrayBuilder<StringArray, StringArray.Builder>(x => x.Append("123").Append(null).AppendNull().Append(string.Empty), 4, 2, 0x09); + Assert.Equal("123", stringArray.GetString(0)); + Assert.Null(stringArray.GetString(1)); + Assert.Null(stringArray.GetString(2)); + Assert.Equal(string.Empty, stringArray.GetString(3)); + } + + + [Fact] + public void ListArrayBuilder() + { + var listBuilder = new ListArray.Builder(StringType.Default); + var valueBuilder = listBuilder.ValueBuilder as StringArray.Builder; + Assert.NotNull(valueBuilder); + listBuilder.Append(); + valueBuilder.Append("1"); + listBuilder.AppendNull(); + listBuilder.Append(); + valueBuilder.Append("22").Append("33"); + listBuilder.Append(); + valueBuilder.Append("444").AppendNull().Append("555").Append("666"); + + var list = listBuilder.Build(); + + Assert.Equal( + new List<string> { "1" }, + ConvertStringArrayToList(list.GetSlicedValues(0) as StringArray)); + Assert.Null(list.GetSlicedValues(1)); + Assert.Equal( + new List<string> { "22", "33" }, + ConvertStringArrayToList(list.GetSlicedValues(2) as StringArray)); + Assert.Equal( + new List<string> { "444", null, "555", "666" }, + ConvertStringArrayToList(list.GetSlicedValues(3) as StringArray)); + + List<string> ConvertStringArrayToList(StringArray array) + { + var length = array.Length; + var resultList = new List<string>(length); + for (var index = 0; index < length; index++) + { + resultList.Add(array.GetString(index)); + } + return resultList; + } + } + + [Fact] + public void ListArrayBuilderValidityBuffer() + { + ListArray listArray = new ListArray.Builder(Int64Type.Default).Append().AppendNull().Build(); + Assert.False(listArray.IsValid(2)); + } + + [Fact] + public void NestedListArrayBuilder() + { + var childListType = new ListType(Int64Type.Default); + var parentListBuilder = new ListArray.Builder(childListType); + var childListBuilder = parentListBuilder.ValueBuilder as ListArray.Builder; + Assert.NotNull(childListBuilder); + var valueBuilder = childListBuilder.ValueBuilder as Int64Array.Builder; + Assert.NotNull(valueBuilder); + + parentListBuilder.Append(); + childListBuilder.Append(); + valueBuilder.Append(1); + childListBuilder.Append(); + valueBuilder.Append(2).Append(3); + parentListBuilder.Append(); + childListBuilder.Append(); + valueBuilder.Append(4).Append(5).Append(6).Append(7); + parentListBuilder.Append(); + childListBuilder.Append(); + valueBuilder.Append(8).Append(9).Append(10).Append(11).Append(12); + + var parentList = parentListBuilder.Build(); + + var childList1 = (ListArray)parentList.GetSlicedValues(0); + var childList2 = (ListArray)parentList.GetSlicedValues(1); + var childList3 = (ListArray)parentList.GetSlicedValues(2); + + Assert.Equal(2, childList1.Length); + Assert.Equal(1, childList2.Length); + Assert.Equal(1, childList3.Length); + Assert.Equal( + new List<long?> { 1 }, + ((Int64Array)childList1.GetSlicedValues(0)).ToList()); + Assert.Equal( + new List<long?> { 2, 3 }, + ((Int64Array)childList1.GetSlicedValues(1)).ToList()); + Assert.Equal( + new List<long?> { 4, 5, 6, 7 }, + ((Int64Array)childList2.GetSlicedValues(0)).ToList()); + Assert.Equal( + new List<long?> { 8, 9, 10, 11, 12 }, + ((Int64Array)childList3.GetSlicedValues(0)).ToList()); + } + + public class TimestampArrayBuilder + { + [Fact] + public void ProducesExpectedArray() + { + var now = DateTimeOffset.UtcNow.ToLocalTime(); + var timestampType = new TimestampType(TimeUnit.Nanosecond, TimeZoneInfo.Local); + var array = new TimestampArray.Builder(timestampType) + .Append(now) + .Build(); + + Assert.Equal(1, array.Length); + var value = array.GetTimestamp(0); + Assert.NotNull(value); + Assert.Equal(now, value.Value); + + timestampType = new TimestampType(TimeUnit.Microsecond, TimeZoneInfo.Local); + array = new TimestampArray.Builder(timestampType) + .Append(now) + .Build(); + + Assert.Equal(1, array.Length); + value = array.GetTimestamp(0); + Assert.NotNull(value); + Assert.Equal(now.Truncate(TimeSpan.FromTicks(10)), value.Value); + + timestampType = new TimestampType(TimeUnit.Millisecond, TimeZoneInfo.Local); + array = new TimestampArray.Builder(timestampType) + .Append(now) + .Build(); + + Assert.Equal(1, array.Length); + value = array.GetTimestamp(0); + Assert.NotNull(value); + Assert.Equal(now.Truncate(TimeSpan.FromTicks(TimeSpan.TicksPerMillisecond)), value.Value); + } + } + + private static TArray TestArrayBuilder<TArray, TArrayBuilder>(Action<TArrayBuilder> action, int expectedLength = 3, int expectedNullCount = 0, int expectedNulls = 0) + where TArray : IArrowArray + where TArrayBuilder : IArrowArrayBuilder<TArray>, new() + { + var builder = new TArrayBuilder(); + action(builder); + var array = builder.Build(default); + + Assert.IsAssignableFrom<TArray>(array); + Assert.NotNull(array); + Assert.Equal(expectedLength, array.Length); + Assert.Equal(expectedNullCount, array.NullCount); + if (expectedNulls != 0) + { + Assert.True(array.Data.Buffers[0].Span.Slice(0, 1).SequenceEqual(new ReadOnlySpan<byte>(BitConverter.GetBytes(expectedNulls).Take(1).ToArray()))); + } + return array; + } + + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayDataConcatenatorTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayDataConcatenatorTests.cs new file mode 100644 index 000000000..9f034b9d0 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayDataConcatenatorTests.cs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Reflection; +using Apache.Arrow.Memory; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrayDataConcatenatorTests + { + [Fact] + public void TestNullOrEmpty() + { + Assert.Null(ArrayDataConcatenatorReflector.InvokeConcatenate(null)); + Assert.Null(ArrayDataConcatenatorReflector.InvokeConcatenate(new List<ArrayData>())); + } + + [Fact] + public void TestSingleElement() + { + Int32Array array = new Int32Array.Builder().Append(1).Append(2).Build(); + ArrayData actualArray = ArrayDataConcatenatorReflector.InvokeConcatenate(new[] { array.Data }); + ArrowReaderVerifier.CompareArrays(array, ArrowArrayFactory.BuildArray(actualArray)); + } + + private static class ArrayDataConcatenatorReflector + { + private static readonly MethodInfo s_concatenateInfo = typeof(ArrayData).Assembly.GetType("Apache.Arrow.ArrayDataConcatenator") + .GetMethod("Concatenate", BindingFlags.Static | BindingFlags.NonPublic); + + internal static ArrayData InvokeConcatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + return s_concatenateInfo.Invoke(null, new object[] { arrayDataList, allocator }) as ArrayData; + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs new file mode 100644 index 000000000..f75111b66 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Diagnostics; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrayTypeComparer : + IArrowTypeVisitor<TimestampType>, + IArrowTypeVisitor<Date32Type>, + IArrowTypeVisitor<Date64Type>, + IArrowTypeVisitor<Time32Type>, + IArrowTypeVisitor<Time64Type>, + IArrowTypeVisitor<FixedSizeBinaryType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + private readonly IArrowType _expectedType; + + public ArrayTypeComparer(IArrowType expectedType) + { + Debug.Assert(expectedType != null); + _expectedType = expectedType; + } + + public void Visit(TimestampType actualType) + { + Assert.IsAssignableFrom<TimestampType>(_expectedType); + + var expectedType = (TimestampType)_expectedType; + + Assert.Equal(expectedType.Timezone, actualType.Timezone); + Assert.Equal(expectedType.Unit, actualType.Unit); + } + + public void Visit(Date32Type actualType) + { + Assert.IsAssignableFrom<Date32Type>(_expectedType); + var expectedType = (Date32Type)_expectedType; + + Assert.Equal(expectedType.Unit, actualType.Unit); + } + + public void Visit(Date64Type actualType) + { + Assert.IsAssignableFrom<Date64Type>(_expectedType); + var expectedType = (Date64Type)_expectedType; + + Assert.Equal(expectedType.Unit, actualType.Unit); + } + + public void Visit(Time32Type actualType) + { + Assert.IsAssignableFrom<Time32Type>(_expectedType); + var expectedType = (Time32Type)_expectedType; + + Assert.Equal(expectedType.Unit, actualType.Unit); + } + + public void Visit(Time64Type actualType) + { + Assert.IsAssignableFrom<Time64Type>(_expectedType); + var expectedType = (Time64Type)_expectedType; + + Assert.Equal(expectedType.Unit, actualType.Unit); + } + + public void Visit(FixedSizeBinaryType actualType) + { + Assert.IsAssignableFrom<FixedSizeBinaryType>(_expectedType); + var expectedType = (FixedSizeBinaryType)_expectedType; + + Assert.Equal(expectedType.ByteWidth, actualType.ByteWidth); + } + + public void Visit(ListType actualType) + { + Assert.IsAssignableFrom<ListType>(_expectedType); + var expectedType = (ListType)_expectedType; + + CompareNested(expectedType, actualType); + } + + public void Visit(StructType actualType) + { + Assert.IsAssignableFrom<StructType>(_expectedType); + var expectedType = (StructType)_expectedType; + + CompareNested(expectedType, actualType); + } + + private static void CompareNested(NestedType expectedType, NestedType actualType) + { + Assert.Equal(expectedType.Fields.Count, actualType.Fields.Count); + + for (int i = 0; i < expectedType.Fields.Count; i++) + { + FieldComparer.Compare(expectedType.Fields[i], actualType.Fields[i]); + } + } + + public void Visit(IArrowType actualType) + { + Assert.IsAssignableFrom(actualType.GetType(), _expectedType); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayBuilderFactoryReflector.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayBuilderFactoryReflector.cs new file mode 100644 index 000000000..69894ab3c --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayBuilderFactoryReflector.cs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Reflection; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Tests +{ + static class ArrayArrayBuilderFactoryReflector + { + private static readonly MethodInfo s_buildInfo = typeof(ArrayData).Assembly.GetType("Apache.Arrow.ArrowArrayBuilderFactory") + .GetMethod("Build", BindingFlags.Static | BindingFlags.NonPublic); + + internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> InvokeBuild(IArrowType dataType) + { + return s_buildInfo.Invoke(null, new object[] { dataType }) as IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>; + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs new file mode 100644 index 000000000..6b3277ed5 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -0,0 +1,396 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowArrayConcatenatorTests + { + [Fact] + public void TestStandardCases() + { + foreach ((List<IArrowArray> testTargetArrayList, IArrowArray expectedArray) in GenerateTestData()) + { + IArrowArray actualArray = ArrowArrayConcatenatorReflector.InvokeConcatenate(testTargetArrayList); + ArrowReaderVerifier.CompareArrays(expectedArray, actualArray); + } + } + + [Fact] + public void TestNullOrEmpty() + { + Assert.Null(ArrowArrayConcatenatorReflector.InvokeConcatenate(null)); + Assert.Null(ArrowArrayConcatenatorReflector.InvokeConcatenate(new List<IArrowArray>())); + } + + [Fact] + public void TestSingleElement() + { + Int32Array array = new Int32Array.Builder().Append(1).Append(2).Build(); + IArrowArray actualArray = ArrowArrayConcatenatorReflector.InvokeConcatenate(new[] { array }); + ArrowReaderVerifier.CompareArrays(array, actualArray); + } + + private static IEnumerable<Tuple<List<IArrowArray>, IArrowArray>> GenerateTestData() + { + var targetTypes = new List<IArrowType>() { + BooleanType.Default, + Int8Type.Default, + Int16Type.Default, + Int32Type.Default, + Int64Type.Default, + UInt8Type.Default, + UInt16Type.Default, + UInt32Type.Default, + UInt64Type.Default, + FloatType.Default, + DoubleType.Default, + BinaryType.Default, + StringType.Default, + Date32Type.Default, + Date64Type.Default, + TimestampType.Default, + new Decimal128Type(14, 10), + new Decimal256Type(14,10), + new ListType(Int64Type.Default), + new StructType(new List<Field>{ + new Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(), + new Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build() + }), + }; + + foreach (IArrowType type in targetTypes) + { + var creator = new TestDataGenerator(); + type.Accept(creator); + yield return Tuple.Create(creator.TestTargetArrayList, creator.ExpectedArray); + } + } + + private static class ArrowArrayConcatenatorReflector + { + private static readonly MethodInfo s_concatenateInfo = typeof(ArrayData).Assembly.GetType("Apache.Arrow.ArrowArrayConcatenator") + .GetMethod("Concatenate", BindingFlags.Static | BindingFlags.NonPublic); + + internal static IArrowArray InvokeConcatenate(IReadOnlyList<IArrowArray> arrowArrayList, MemoryAllocator allocator = default) + { + return s_concatenateInfo.Invoke(null, new object[] { arrowArrayList, allocator }) as IArrowArray; + } + } + + private class TestDataGenerator : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<Int8Type>, + IArrowTypeVisitor<Int16Type>, + IArrowTypeVisitor<Int32Type>, + IArrowTypeVisitor<Int64Type>, + IArrowTypeVisitor<UInt8Type>, + IArrowTypeVisitor<UInt16Type>, + IArrowTypeVisitor<UInt32Type>, + IArrowTypeVisitor<UInt64Type>, + IArrowTypeVisitor<FloatType>, + IArrowTypeVisitor<DoubleType>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<Decimal128Type>, + IArrowTypeVisitor<Decimal256Type>, + IArrowTypeVisitor<Date32Type>, + IArrowTypeVisitor<Date64Type>, + IArrowTypeVisitor<TimestampType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + + private List<List<int?>> _baseData; + + private int _baseDataListCount; + + private int _baseDataTotalElementCount; + + public List<IArrowArray> TestTargetArrayList { get; } + public IArrowArray ExpectedArray { get; private set; } + + public TestDataGenerator() + { + _baseData = new List<List<int?>> { + new List<int?> { 1, 2, 3 }, + new List<int?> { 100, 101, null }, + new List<int?> { 11, null, 12 }, + }; + + _baseDataListCount = _baseData.Count; + _baseDataTotalElementCount = _baseData.Sum(_ => _.Count); + TestTargetArrayList = new List<IArrowArray>(_baseDataListCount); + } + + public void Visit(BooleanType type) => GenerateTestData<bool, BooleanArray, BooleanArray.Builder>(type, x => x % 2 == 0); + public void Visit(Int8Type type) => GenerateTestData<sbyte, Int8Array, Int8Array.Builder>(type, x => (sbyte)x); + public void Visit(Int16Type type) => GenerateTestData<short, Int16Array, Int16Array.Builder>(type, x => (short)x); + public void Visit(Int32Type type) => GenerateTestData<int, Int32Array, Int32Array.Builder>(type, x => x); + public void Visit(Int64Type type) => GenerateTestData<long, Int64Array, Int64Array.Builder>(type, x => x); + public void Visit(UInt8Type type) => GenerateTestData<byte, UInt8Array, UInt8Array.Builder>(type, x => (byte)x); + public void Visit(UInt16Type type) => GenerateTestData<ushort, UInt16Array, UInt16Array.Builder>(type, x => (ushort)x); + public void Visit(UInt32Type type) => GenerateTestData<uint, UInt32Array, UInt32Array.Builder>(type, x => (uint)x); + public void Visit(UInt64Type type) => GenerateTestData<ulong, UInt64Array, UInt64Array.Builder>(type, x => (ulong)x); + public void Visit(FloatType type) => GenerateTestData<float, FloatArray, FloatArray.Builder>(type, x => x); + public void Visit(DoubleType type) => GenerateTestData<double, DoubleArray, DoubleArray.Builder>(type, x => x); + public void Visit(Date32Type type) => GenerateTestData<DateTime, Date32Array, Date32Array.Builder>(type, x => DateTime.MinValue.AddDays(x)); + public void Visit(Date64Type type) => GenerateTestData<DateTime, Date64Array, Date64Array.Builder>(type, x => DateTime.MinValue.AddDays(x)); + + public void Visit(Decimal128Type type) + { + Decimal128Array.Builder resultBuilder = new Decimal128Array.Builder(type).Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + Decimal128Array.Builder builder = new Decimal128Array.Builder(type).Reserve(dataList.Count); + foreach (decimal? value in dataList) + { + if (value.HasValue) + { + builder.Append(value.Value); + resultBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + public void Visit(Decimal256Type type) + { + Decimal256Array.Builder resultBuilder = new Decimal256Array.Builder(type).Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + Decimal256Array.Builder builder = new Decimal256Array.Builder(type).Reserve(dataList.Count); + foreach (decimal? value in dataList) + { + if (value.HasValue) + { + builder.Append(value.Value); + resultBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + public void Visit(TimestampType type) + { + TimestampArray.Builder resultBuilder = new TimestampArray.Builder().Reserve(_baseDataTotalElementCount); + DateTimeOffset basis = DateTimeOffset.UtcNow; + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + TimestampArray.Builder builder = new TimestampArray.Builder().Reserve(dataList.Count); + foreach (int? value in dataList) + { + if (value.HasValue) + { + DateTimeOffset dateValue = basis.AddMilliseconds(value.Value); + builder.Append(dateValue); + resultBuilder.Append(dateValue); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + + public void Visit(BinaryType type) + { + BinaryArray.Builder resultBuilder = new BinaryArray.Builder().Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + BinaryArray.Builder builder = new BinaryArray.Builder().Reserve(dataList.Count); + + foreach (byte? value in dataList) + { + if (value.HasValue) + { + builder.Append(value.Value); + resultBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + public void Visit(StringType type) + { + StringArray.Builder resultBuilder = new StringArray.Builder().Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + StringArray.Builder builder = new StringArray.Builder().Reserve(dataList.Count); + + foreach (string value in dataList.Select(_ => _.ToString() ?? null)) + { + builder.Append(value); + resultBuilder.Append(value); + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + public void Visit(ListType type) + { + ListArray.Builder resultBuilder = new ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount); + //Todo : Support various types + Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + + ListArray.Builder builder = new ListArray.Builder(type.ValueField).Reserve(dataList.Count); + Int64Array.Builder valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(dataList.Count); + + foreach (long? value in dataList) + { + if (value.HasValue) + { + builder.Append(); + resultBuilder.Append(); + + valueBuilder.Append(value.Value); + resultValueBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + + public void Visit(StructType type) + { + // TODO: Make data from type fields. + + // The following can be improved with a Builder class for StructArray. + StringArray.Builder resultStringBuilder = new StringArray.Builder(); + Int32Array.Builder resultInt32Builder = new Int32Array.Builder(); + ArrowBuffer nullBitmapBuffer = new ArrowBuffer.BitmapBuilder().Append(true).Append(true).Append(false).Build(); + + for (int i = 0; i < 3; i++) + { + resultStringBuilder.Append("joe").AppendNull().AppendNull().Append("mark"); + resultInt32Builder.Append(1).Append(2).AppendNull().Append(4); + StringArray stringArray = new StringArray.Builder().Append("joe").AppendNull().AppendNull().Append("mark").Build(); + Int32Array intArray = new Int32Array.Builder().Append(1).Append(2).AppendNull().Append(4).Build(); + List<Array> arrays = new List<Array> + { + stringArray, + intArray + }; + + TestTargetArrayList.Add(new StructArray(type, 3, arrays, nullBitmapBuffer, 1)); + } + + StringArray resultStringArray = resultStringBuilder.Build(); + Int32Array resultInt32Array = resultInt32Builder.Build(); + + ExpectedArray = new StructArray(type, 3, new List<Array> { resultStringArray, resultInt32Array }, nullBitmapBuffer, 1); + } + + + public void Visit(IArrowType type) + { + throw new NotImplementedException(); + } + + private void GenerateTestData<T, TArray, TArrayBuilder>(IArrowType type, Func<int, T> generator) + where TArrayBuilder : IArrowArrayBuilder<T, TArray, TArrayBuilder> + where TArray : IArrowArray + { + var resultBuilder = (IArrowArrayBuilder<T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type); + resultBuilder.Reserve(_baseDataTotalElementCount); + + for (int i = 0; i < _baseDataListCount; i++) + { + List<int?> dataList = _baseData[i]; + var builder = (IArrowArrayBuilder<T, TArray, TArrayBuilder>)ArrayArrayBuilderFactoryReflector.InvokeBuild(type); + builder.Reserve(dataList.Count); + + foreach (int? value in dataList) + { + if (value.HasValue) + { + builder.Append(generator(value.Value)); + resultBuilder.Append(generator(value.Value)); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build(default)); + } + + ExpectedArray = resultBuilder.Build(default); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs new file mode 100644 index 000000000..18d405613 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -0,0 +1,274 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowArrayTests + { + + [Fact] + public void ThrowsWhenGetValueIndexOutOfBounds() + { + var array = new Int64Array.Builder().Append(1).Append(2).Build(); + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValue(-1)); + Assert.Equal(1, array.GetValue(0)); + Assert.Equal(2, array.GetValue(1)); + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValue(2)); + } + + [Fact] + public void ThrowsWhenGetValueAndOffsetIndexOutOfBounds() + { + var array = new BinaryArray.Builder().Append(1).Append(2).Build(); + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValueLength(-1)); + Assert.Equal(1, array.GetValueLength(0)); + Assert.Equal(1, array.GetValueLength(1)); + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValueLength(2)); + +#pragma warning disable 618 + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValueOffset(-1)); + Assert.Equal(0, array.GetValueOffset(0)); + Assert.Equal(1, array.GetValueOffset(1)); + Assert.Equal(2, array.GetValueOffset(2)); + Assert.Throws<ArgumentOutOfRangeException>(() => array.GetValueOffset(3)); +#pragma warning restore 618 + + Assert.Throws<IndexOutOfRangeException>(() => array.ValueOffsets[-1]); + Assert.Equal(0, array.ValueOffsets[0]); + Assert.Equal(1, array.ValueOffsets[1]); + Assert.Equal(2, array.ValueOffsets[2]); + Assert.Throws<IndexOutOfRangeException>(() => array.ValueOffsets[3]); + + } + + [Fact] + public void IsValidValue() + { + const int totalValueCount = 8; + const byte nullBitmap = 0b_11110011; + + var nullBitmapBuffer = new ArrowBuffer.Builder<byte>().Append(nullBitmap).Build(); + var valueBuffer = new ArrowBuffer.Builder<long>().Append(0).Append(1).Append(4).Append(5).Append(6).Append(7).Append(8).Build(); + + //Check all offset and length + for (var offset = 0; offset < totalValueCount; offset++) + { + var nullCount = totalValueCount - offset - BitUtility.CountBits(nullBitmapBuffer.Span, offset); + for (var length = 1; length + offset < totalValueCount; length++) + { + TestIsValid(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + } + } + + void TestIsValid(ArrowBuffer valueBuf, ArrowBuffer nullBitmapBuf, int length, int nullCount, int offset) + { + var array = new Int64Array(valueBuf, nullBitmapBuf, length, nullCount, offset); + for (var i = 0; i < length; i++) + { + if (BitUtility.GetBit(nullBitmap, i + offset)) + { + Assert.True(array.IsValid(i)); + } + else + { + Assert.False(array.IsValid(i)); + } + } + } + } + + [Fact] + public void SliceArray() + { + TestSlice<Int32Array, Int32Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<Int8Array, Int8Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<Int16Array, Int16Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<Int64Array, Int64Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<UInt8Array, UInt8Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<UInt16Array, UInt16Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<UInt32Array, UInt32Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<UInt64Array, UInt64Array.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<FloatArray, FloatArray.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<DoubleArray, DoubleArray.Builder>(x => x.Append(10).Append(20).Append(30)); + TestSlice<Date32Array, Date32Array.Builder>(x => x.Append(new DateTime(2019, 1, 1)).Append(new DateTime(2019, 1, 2)).Append(new DateTime(2019, 1, 3))); + TestSlice<Date64Array, Date64Array.Builder>(x => x.Append(new DateTime(2019, 1, 1)).Append(new DateTime(2019, 1, 2)).Append(new DateTime(2019, 1, 3))); + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").Append("20").Append("30")); + } + + [Fact] + public void SlicePrimitiveArrayWithNulls() + { + TestSlice<Int32Array, Int32Array.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<Int8Array, Int8Array.Builder>(x => x.Append(10).AppendNull().Append(20).AppendNull().Append(30)); + TestSlice<Int16Array, Int16Array.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<Int64Array, Int64Array.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<UInt8Array, UInt8Array.Builder>(x => x.Append(10).Append(20).Append(30).AppendNull()); + TestSlice<UInt16Array, UInt16Array.Builder>(x => x.Append(10).Append(20).AppendNull().AppendNull().Append(30)); + TestSlice<UInt32Array, UInt32Array.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<UInt64Array, UInt64Array.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<FloatArray, FloatArray.Builder>(x => x.AppendNull().Append(10).Append(20).AppendNull().Append(30)); + TestSlice<DoubleArray, DoubleArray.Builder>(x => x.Append(10).Append(20).AppendNull().Append(30)); + TestSlice<Date32Array, Date32Array.Builder>(x => x.Append(new DateTime(2019, 1, 1)).Append(new DateTime(2019, 1, 2)).AppendNull().Append(new DateTime(2019, 1, 3))); + TestSlice<Date64Array, Date64Array.Builder>(x => x.Append(new DateTime(2019, 1, 1)).Append(new DateTime(2019, 1, 2)).AppendNull().Append(new DateTime(2019, 1, 3))); + } + + [Fact] + public void SliceBooleanArray() + { + TestSlice<BooleanArray, BooleanArray.Builder>(x => x.Append(true).Append(false).Append(true)); + TestSlice<BooleanArray, BooleanArray.Builder>(x => x.Append(true).Append(false).AppendNull().Append(true)); + } + + [Fact] + public void SliceStringArrayWithNullsAndEmptyStrings() + { + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").AppendNull().Append("30")); + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").Append(string.Empty).Append("30")); + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").Append(string.Empty).AppendNull().Append("30")); + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").AppendNull().Append(string.Empty).Append("30")); + TestSlice<StringArray, StringArray.Builder>(x => x.Append("10").AppendNull().Append(string.Empty).AppendNull().Append("30")); + } + + private static void TestSlice<TArray, TArrayBuilder>(Action<TArrayBuilder> action) + where TArray : IArrowArray + where TArrayBuilder : IArrowArrayBuilder<TArray>, new() + { + var builder = new TArrayBuilder(); + action(builder); + var baseArray = builder.Build(default) as Array; + Assert.NotNull(baseArray); + var totalLength = baseArray.Length; + var validator = new ArraySliceValidator(baseArray); + + //Check all offset and length + for (var offset = 0; offset < totalLength; offset++) + { + for (var length = 1; length + offset <= totalLength; length++) + { + var targetArray = baseArray.Slice(offset, length); + targetArray.Accept(validator); + } + } + } + + private class ArraySliceValidator : + IArrowArrayVisitor<Int8Array>, + IArrowArrayVisitor<Int16Array>, + IArrowArrayVisitor<Int32Array>, + IArrowArrayVisitor<Int64Array>, + IArrowArrayVisitor<UInt8Array>, + IArrowArrayVisitor<UInt16Array>, + IArrowArrayVisitor<UInt32Array>, + IArrowArrayVisitor<UInt64Array>, + IArrowArrayVisitor<Date32Array>, + IArrowArrayVisitor<Date64Array>, + IArrowArrayVisitor<FloatArray>, + IArrowArrayVisitor<DoubleArray>, + IArrowArrayVisitor<BooleanArray>, + IArrowArrayVisitor<StringArray> + { + private readonly IArrowArray _baseArray; + + public ArraySliceValidator(IArrowArray baseArray) + { + _baseArray = baseArray; + } + + public void Visit(Int8Array array) => ValidateArrays(array); + public void Visit(Int16Array array) => ValidateArrays(array); + public void Visit(Int32Array array) => ValidateArrays(array); + public void Visit(Int64Array array) => ValidateArrays(array); + public void Visit(UInt8Array array) => ValidateArrays(array); + public void Visit(UInt16Array array) => ValidateArrays(array); + public void Visit(UInt32Array array) => ValidateArrays(array); + public void Visit(UInt64Array array) => ValidateArrays(array); + + public void Visit(Date32Array array) + { + ValidateArrays(array); + Assert.IsAssignableFrom<Date32Array>(_baseArray); + var baseArray = (Date32Array)_baseArray; + + Assert.Equal(baseArray.GetDateTimeOffset(array.Offset), array.GetDateTimeOffset(0)); + } + + public void Visit(Date64Array array) + { + ValidateArrays(array); + Assert.IsAssignableFrom<Date64Array>(_baseArray); + var baseArray = (Date64Array)_baseArray; + + Assert.Equal(baseArray.GetDateTimeOffset(array.Offset), array.GetDateTimeOffset(0)); + } + + public void Visit(FloatArray array) => ValidateArrays(array); + public void Visit(DoubleArray array) => ValidateArrays(array); + public void Visit(StringArray array) => ValidateArrays(array); + public void Visit(BooleanArray array) => ValidateArrays(array); + + public void Visit(IArrowArray array) => throw new NotImplementedException(); + + private void ValidateArrays<T>(PrimitiveArray<T> slicedArray) + where T : struct, IEquatable<T> + { + Assert.IsAssignableFrom<PrimitiveArray<T>>(_baseArray); + var baseArray = (PrimitiveArray<T>)_baseArray; + + Assert.True(baseArray.NullBitmapBuffer.Span.SequenceEqual(slicedArray.NullBitmapBuffer.Span)); + Assert.True( + baseArray.ValueBuffer.Span.CastTo<T>().Slice(slicedArray.Offset, slicedArray.Length) + .SequenceEqual(slicedArray.Values)); + + Assert.Equal(baseArray.GetValue(slicedArray.Offset), slicedArray.GetValue(0)); + } + + private void ValidateArrays(BooleanArray slicedArray) + { + Assert.IsAssignableFrom<BooleanArray>(_baseArray); + var baseArray = (BooleanArray)_baseArray; + + Assert.True(baseArray.NullBitmapBuffer.Span.SequenceEqual(slicedArray.NullBitmapBuffer.Span)); + Assert.True(baseArray.Values.SequenceEqual(slicedArray.Values)); + + Assert.True( + baseArray.ValueBuffer.Span.Slice(0, (int) Math.Ceiling(slicedArray.Length / 8.0)) + .SequenceEqual(slicedArray.Values)); + + Assert.Equal(baseArray.GetValue(slicedArray.Offset), slicedArray.GetValue(0)); + +#pragma warning disable CS0618 + Assert.Equal(baseArray.GetBoolean(slicedArray.Offset), slicedArray.GetBoolean(0)); +#pragma warning restore CS0618 + } + + private void ValidateArrays(BinaryArray slicedArray) + { + Assert.IsAssignableFrom<BinaryArray>(_baseArray); + var baseArray = (BinaryArray)_baseArray; + + Assert.True(baseArray.Values.SequenceEqual(slicedArray.Values)); + Assert.True(baseArray.NullBitmapBuffer.Span.SequenceEqual(slicedArray.NullBitmapBuffer.Span)); + Assert.True( + baseArray.ValueOffsetsBuffer.Span.CastTo<int>().Slice(slicedArray.Offset, slicedArray.Length + 1) + .SequenceEqual(slicedArray.ValueOffsets)); + + Assert.True(baseArray.GetBytes(slicedArray.Offset).SequenceEqual(slicedArray.GetBytes(0))); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs new file mode 100644 index 000000000..3a9734e84 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs @@ -0,0 +1,493 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Tests +{ + using System; + using System.Linq; + using Xunit; + + /// <summary> + /// The <see cref="ArrowBufferBitmapBuilderTests"/> class provides unit tests for the + /// <see cref="ArrowBuffer.BitmapBuilder"/> class. + /// </summary> + public class ArrowBufferBitmapBuilderTests + { + public class Append + { + [Theory] + [InlineData(new bool[] {}, false, 1, 0, 1)] + [InlineData(new bool[] {}, true, 1, 1, 0)] + [InlineData(new[] { true, false }, true, 3, 2, 1)] + [InlineData(new[] { true, false }, false, 3, 1, 2)] + public void IncreasesLength( + bool[] initialContents, + bool valueToAppend, + int expectedLength, + int expectedSetBitCount, + int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(initialContents); + + // Act + var actualReturnValue = builder.Append(valueToAppend); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + Assert.True(builder.Capacity >= expectedLength); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + } + + [Theory] + [InlineData(new bool[] {}, false)] + [InlineData(new bool[] {}, true)] + [InlineData(new[] { true, false }, true)] + [InlineData(new[] { true, false }, false)] + public void AfterClearIncreasesLength(bool[] initialContentsToClear, bool valueToAppend) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(initialContentsToClear); + builder.Clear(); + + // Act + var actualReturnValue = builder.Append(valueToAppend); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(1, builder.Length); + Assert.True(builder.Capacity >= 1); + Assert.Equal(valueToAppend ? 1 : 0, builder.SetBitCount); + Assert.Equal(valueToAppend ? 0 : 1, builder.UnsetBitCount); + } + + [Fact] + public void IncreasesCapacityWhenRequired() + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + int initialCapacity = builder.Capacity; + builder.AppendRange(Enumerable.Repeat(true, initialCapacity)); // Fill to capacity. + + // Act + var actualReturnValue = builder.Append(true); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(initialCapacity + 1, builder.Length); + Assert.True(builder.Capacity >= initialCapacity + 1); + } + } + + public class AppendRange + { + [Theory] + [InlineData(new bool[] {}, new bool[] {}, 0, 0, 0)] + [InlineData(new bool[] {}, new[] { true, false }, 2, 1, 1)] + [InlineData(new[] { true, false }, new bool[] {}, 2, 1, 1)] + [InlineData(new[] { true, false }, new[] { true, false }, 4, 2, 2)] + public void IncreasesLength( + bool[] initialContents, + bool[] toAppend, + int expectedLength, + int expectedSetBitCount, + int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(initialContents); + + // Act + var actualReturnValue = builder.AppendRange(toAppend); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + Assert.True(builder.Capacity >= expectedLength); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + } + } + + public class Build + { + [Theory] + [InlineData(new bool[] { }, new byte[] { })] + [InlineData(new[] { true, false, true, false }, new byte[] { 0b00000101 })] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false, true, false }, + new byte[] { 0b01010101, 0b00000101 })] + public void AppendedRangeBitPacks(bool[] contents, byte[] expectedBytes) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(contents); + + // Act + var buf = builder.Build(); + + // Assert + AssertBuffer(expectedBytes, buf); + } + } + + public class Clear + { + [Theory] + [InlineData(10)] + [InlineData(100)] + public void ClearingSetsBitCountToZero(int numBitsBeforeClear) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + var data = Enumerable.Repeat(true, numBitsBeforeClear).Select(x => x).ToArray(); + builder.AppendRange(data); + + // Act + var actualReturnValue = builder.Clear(); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(0, builder.Length); + } + } + + public class Resize + { + [Theory] + [InlineData(new bool[] {}, 256, 0, 256)] + [InlineData(new[] { true, true, true, true}, 256, 4, 252)] + [InlineData(new[] { false, false, false, false}, 256, 0, 256)] + [InlineData(new[] { true, true, true, true}, 2, 2, 0)] + [InlineData(new[] { true, true, true, true}, 0, 0, 0)] + public void LengthHasExpectedValueAfterResize( + bool[] bits, int newSize, int expectedSetBitCount, int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(bits); + + // Act + var actualReturnValue = builder.Resize(newSize); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.True(builder.Capacity >= newSize); + Assert.Equal(newSize, builder.Length); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + } + + [Fact] + public void NegativeLengthThrows() + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.Append(false); + builder.Append(true); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Resize(-1)); + } + } + + public class Reserve + { + [Theory] + [InlineData(0, 0, 0)] + [InlineData(0, 0, 8)] + [InlineData(8, 8, 8)] + [InlineData(8, 8, 16)] + public void CapacityIncreased(int initialCapacity, int numBitsToAppend, int additionalCapacity) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(initialCapacity); + builder.AppendRange(Enumerable.Repeat(true, numBitsToAppend)); + + // Act + var actualReturnValue = builder.Reserve(additionalCapacity); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.True(builder.Capacity >= numBitsToAppend + additionalCapacity); + } + + [Fact] + public void NegtativeCapacityThrows() + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Reserve(-1)); + } + } + + public class Set + { + [Theory] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 2, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 3, + new byte[] { 0b01011101, 0b00000001 }, + 6, 4)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 8, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 9, + new byte[] { 0b01010101, 0b00000011 }, + 6, 4)] + public void OverloadWithNoValueParameterSetsAsExpected( + bool[] bits, int indexToSet, byte[] expectedBytes, + int expectedSetBitCount, int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(bits); + + // Act + var actualReturnValue = builder.Set(indexToSet); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + var buf = builder.Build(); + AssertBuffer(expectedBytes, buf); + } + + [Theory] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 2, true, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 2, false, + new byte[] { 0b01010001, 0b00000001 }, + 4, 6)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 3, true, + new byte[] { 0b01011101, 0b00000001 }, + 6, 4)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 3, false, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 8, true, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 8, false, + new byte[] { 0b01010101, 0b00000000 }, + 4, 6)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 9, true, + new byte[] { 0b01010101, 0b00000011 }, + 6, 4)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 9, false, + new byte[] { 0b01010101, 0b00000001 }, + 5, 5)] + public void OverloadWithValueParameterSetsAsExpected( + bool[] bits, int indexToSet, bool valueToSet, byte[] expectedBytes, + int expectedSetBitCount, int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(bits); + + // Act + var actualReturnValue = builder.Set(indexToSet, valueToSet); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + var buf = builder.Build(); + AssertBuffer(expectedBytes, buf); + } + + [Theory] + [InlineData(0, -1)] + [InlineData(0, 0)] + [InlineData(1, 1)] + [InlineData(10, 10)] + [InlineData(10, 11)] + public void BadIndexThrows(int numBitsToAppend, int indexToSet) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + var bits = Enumerable.Repeat(true, numBitsToAppend); + builder.AppendRange(bits); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Set(indexToSet)); + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Set(indexToSet, true)); + } + } + + public class Swap + { + [Theory] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 0, 2, + new byte[] { 0b01010101, 0b00000001 })] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 0, 3, + new byte[] { 0b01011100, 0b00000001 })] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 4, 8, + new byte[] { 0b01010101, 0b00000001 })] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 4, 9, + new byte[] { 0b01000101, 0b00000011 })] + public void SwapsAsExpected(bool[] bits, int firstIndex, int secondIndex, byte[] expectedBytes) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(bits); + + // Act + var actualReturnValue = builder.Swap(firstIndex, secondIndex); + + // Assert + Assert.Equal(builder, actualReturnValue); + var buf = builder.Build(); + AssertBuffer(expectedBytes, buf); + } + + [Theory] + [InlineData(0, -1, 0)] + [InlineData(0, 0, -1)] + [InlineData(0, 0, 0)] + [InlineData(1, 0, 1)] + [InlineData(1, 1, 0)] + [InlineData(1, 0, -1)] + [InlineData(1, -1, 0)] + [InlineData(1, 1, 1)] + [InlineData(10, 10, 0)] + [InlineData(10, 0, 10)] + [InlineData(10, 10, 10)] + [InlineData(10, 11, 0)] + [InlineData(10, 0, 11)] + [InlineData(10, 11, 11)] + public void BadIndicesThrows(int numBitsToAppend, int firstIndex, int secondIndex) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + var bits = Enumerable.Repeat(true, numBitsToAppend); + builder.AppendRange(bits); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Swap(firstIndex, secondIndex)); + } + } + + public class Toggle + { + [Theory] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 2, + new byte[] { 0b01010001, 0b00000001 }, + 4, 6)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 3, + new byte[] { 0b01011101, 0b00000001 }, + 6, 4)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 8, + new byte[] { 0b01010101, 0b00000000 }, + 4, 6)] + [InlineData( + new[] { true, false, true, false, true, false, true, false, true, false}, + 9, + new byte[] { 0b01010101, 0b00000011 }, + 6, 4)] + public void TogglesAsExpected( + bool[] bits, int indexToToggle, byte[] expectedBytes, + int expectedSetBitCount, int expectedUnsetBitCount) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + builder.AppendRange(bits); + + // Act + var actualReturnValue = builder.Toggle(indexToToggle); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedSetBitCount, builder.SetBitCount); + Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount); + var buf = builder.Build(); + AssertBuffer(expectedBytes, buf); + } + + [Theory] + [InlineData(0, -1)] + [InlineData(0, 0)] + [InlineData(1, 1)] + [InlineData(10, 10)] + [InlineData(10, 11)] + public void BadIndexThrows(int numBitsToAppend, int indexToToggle) + { + // Arrange + var builder = new ArrowBuffer.BitmapBuilder(); + var bits = Enumerable.Repeat(true, numBitsToAppend); + builder.AppendRange(bits); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Toggle(indexToToggle)); + } + } + + private static void AssertBuffer(byte[] expectedBytes, ArrowBuffer buf) + { + Assert.True(buf.Length >= expectedBytes.Length); + for (int i = 0; i < expectedBytes.Length; i++) + { + Assert.Equal(expectedBytes[i], buf.Span[i]); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs new file mode 100644 index 000000000..495fc2e06 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowBufferBuilderTests + { + [Fact] + public void ThrowsWhenIndexOutOfBounds() + { + Assert.Throws<IndexOutOfRangeException>(() => + { + var builder = new ArrowBuffer.Builder<int>(); + builder.Span[100] = 100; + }); + } + + public class Append + { + [Fact] + public void DoesNotThrowWithNullParameters() + { + var builder = new ArrowBuffer.Builder<int>(); + + builder.AppendRange(null); + } + + [Fact] + public void CapacityOnlyGrowsWhenLengthWillExceedCapacity() + { + var builder = new ArrowBuffer.Builder<int>(1); + var capacity = builder.Capacity; + + builder.Append(1); + + Assert.Equal(capacity, builder.Capacity); + } + + [Fact] + public void CapacityGrowsAfterAppendWhenLengthExceedsCapacity() + { + var builder = new ArrowBuffer.Builder<int>(1); + var capacity = builder.Capacity; + + builder.Append(1); + builder.Append(2); + + Assert.True(builder.Capacity > capacity); + } + + [Fact] + public void CapacityGrowsAfterAppendSpan() + { + var builder = new ArrowBuffer.Builder<int>(1); + var capacity = builder.Capacity; + var data = Enumerable.Range(0, 10).Select(x => x).ToArray(); + + builder.Append(data); + + Assert.True(builder.Capacity > capacity); + } + + [Fact] + public void LengthIncrementsAfterAppend() + { + var builder = new ArrowBuffer.Builder<int>(1); + var length = builder.Length; + + builder.Append(1); + + Assert.Equal(length + 1, builder.Length); + } + + [Fact] + public void LengthGrowsBySpanLength() + { + var builder = new ArrowBuffer.Builder<int>(1); + var data = Enumerable.Range(0, 10).Select(x => x).ToArray(); + + builder.Append(data); + + Assert.Equal(10, builder.Length); + } + + [Fact] + public void BufferHasExpectedValues() + { + var builder = new ArrowBuffer.Builder<int>(1); + + builder.Append(10); + builder.Append(20); + + var buffer = builder.Build(); + var span = buffer.Span.CastTo<int>(); + + Assert.Equal(10, span[0]); + Assert.Equal(20, span[1]); + Assert.Equal(0, span[2]); + } + } + + public class AppendRange + { + [Fact] + public void CapacityGrowsAfterAppendEnumerable() + { + var builder = new ArrowBuffer.Builder<int>(1); + var capacity = builder.Capacity; + var data = Enumerable.Range(0, 10).Select(x => x); + + builder.AppendRange(data); + + Assert.True(builder.Capacity > capacity); + } + + [Fact] + public void LengthGrowsByEnumerableCount() + { + var builder = new ArrowBuffer.Builder<int>(1); + var length = builder.Length; + var data = Enumerable.Range(0, 10).Select(x => x).ToArray(); + var count = data.Length; + + builder.AppendRange(data); + + Assert.Equal(length + count, builder.Length); + } + + [Fact] + public void BufferHasExpectedValues() + { + var builder = new ArrowBuffer.Builder<int>(1); + var data = Enumerable.Range(0, 10).Select(x => x).ToArray(); + + builder.AppendRange(data); + + var buffer = builder.Build(); + var span = buffer.Span.CastTo<int>(); + + for (var i = 0; i < 10; i++) + { + Assert.Equal(i, span[i]); + } + } + } + + public class Clear + { + [Theory] + [InlineData(10)] + [InlineData(100)] + public void SetsAllValuesToDefault(int sizeBeforeClear) + { + var builder = new ArrowBuffer.Builder<int>(1); + var data = Enumerable.Range(0, sizeBeforeClear).Select(x => x).ToArray(); + + builder.AppendRange(data); + builder.Clear(); + builder.Append(0); + + var buffer = builder.Build(); + // No matter the sizeBeforeClear, we only appended a single 0, + // so the buffer length should be the smallest possible. + Assert.Equal(64, buffer.Length); + + // check all 16 int elements are default + var zeros = Enumerable.Range(0, 16).Select(x => 0).ToArray(); + var values = buffer.Span.CastTo<int>().Slice(0, 16).ToArray(); + + Assert.True(zeros.SequenceEqual(values)); + } + } + + public class Resize + { + [Fact] + public void LengthHasExpectedValueAfterResize() + { + var builder = new ArrowBuffer.Builder<int>(); + builder.Resize(8); + + Assert.True(builder.Capacity >= 8); + Assert.Equal(8, builder.Length); + } + + [Fact] + public void NegativeLengthThrows() + { + // Arrange + var builder = new ArrowBuffer.Builder<int>(); + builder.Append(10); + builder.Append(20); + + // Act/Assert + Assert.Throws<ArgumentOutOfRangeException>(() => builder.Resize(-1)); + } + } + + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs new file mode 100644 index 000000000..e6fa5256a --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowBufferTests.cs @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Tests.Fixtures; +using System; +using System.Runtime.InteropServices; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowBufferTests + { + public class Allocate : + IClassFixture<DefaultMemoryAllocatorFixture> + { + private readonly DefaultMemoryAllocatorFixture _memoryPoolFixture; + + public Allocate(DefaultMemoryAllocatorFixture memoryPoolFixture) + { + _memoryPoolFixture = memoryPoolFixture; + } + + /// <summary> + /// Ensure Arrow buffers are allocated in multiples of 64 bytes. + /// </summary> + /// <param name="size">number of bytes to allocate</param> + /// <param name="expectedCapacity">expected buffer capacity after allocation</param> + [Theory] + [InlineData(0, 0)] + [InlineData(1, 64)] + [InlineData(8, 64)] + [InlineData(9, 64)] + [InlineData(65, 128)] + public void AllocatesWithExpectedPadding(int size, int expectedCapacity) + { + var builder = new ArrowBuffer.Builder<byte>(size); + for (int i = 0; i < size; i++) + { + builder.Append(0); + } + var buffer = builder.Build(); + + Assert.Equal(expectedCapacity, buffer.Length); + } + + /// <summary> + /// Ensure allocated buffers are aligned to multiples of 64. + /// </summary> + [Theory] + [InlineData(1)] + [InlineData(8)] + [InlineData(64)] + [InlineData(128)] + public unsafe void AllocatesAlignedToMultipleOf64(int size) + { + var builder = new ArrowBuffer.Builder<byte>(size); + for (int i = 0; i < size; i++) + { + builder.Append(0); + } + var buffer = builder.Build(); + + fixed (byte* ptr = &buffer.Span.GetPinnableReference()) + { + Assert.True(new IntPtr(ptr).ToInt64() % 64 == 0); + } + } + + /// <summary> + /// Ensure padding in arrow buffers is initialized with zeroes. + /// </summary> + [Fact] + public void HasZeroPadding() + { + var buffer = new ArrowBuffer.Builder<byte>(10).Append(0).Build(); + + foreach (var b in buffer.Span) + { + Assert.Equal(0, b); + } + } + + } + + [Fact] + public void TestExternalMemoryWrappedAsArrowBuffer() + { + Memory<byte> memory = new byte[sizeof(int) * 3]; + Span<byte> spanOfBytes = memory.Span; + var span = spanOfBytes.CastTo<int>(); + span[0] = 0; + span[1] = 1; + span[2] = 2; + + ArrowBuffer buffer = new ArrowBuffer(memory); + Assert.Equal(2, buffer.Span.CastTo<int>()[2]); + + span[2] = 10; + Assert.Equal(10, buffer.Span.CastTo<int>()[2]); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs new file mode 100644 index 000000000..f0876c8b1 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Memory; +using System; +using System.IO; +using System.Threading.Tasks; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowFileReaderTests + { + [Fact] + public void Ctor_LeaveOpenDefault_StreamClosedOnDispose() + { + var stream = new MemoryStream(); + new ArrowFileReader(stream).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenFalse_StreamClosedOnDispose() + { + var stream = new MemoryStream(); + new ArrowFileReader(stream, leaveOpen: false).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenTrue_StreamValidOnDispose() + { + var stream = new MemoryStream(); + new ArrowFileReader(stream, leaveOpen: true).Dispose(); + Assert.Equal(0, stream.Position); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + + using (MemoryStream stream = new MemoryStream()) + { + ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + stream.Position = 0; + + var memoryPool = new TestMemoryAllocator(); + ArrowFileReader reader = new ArrowFileReader(stream, memoryPool, leaveOpen: shouldLeaveOpen); + reader.ReadNextRecordBatch(); + + Assert.Equal(1, memoryPool.Statistics.Allocations); + Assert.True(memoryPool.Statistics.BytesAllocated > 0); + + reader.Dispose(); + + if (shouldLeaveOpen) + { + Assert.True(stream.Position > 0); + } + else + { + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + } + } + + [Fact] + public async Task TestReadNextRecordBatch() + { + await TestReadRecordBatchHelper((reader, originalBatch) => + { + ArrowReaderVerifier.VerifyReader(reader, originalBatch); + return Task.CompletedTask; + }); + } + + [Fact] + public async Task TestReadNextRecordBatchAsync() + { + await TestReadRecordBatchHelper(ArrowReaderVerifier.VerifyReaderAsync); + } + + [Fact] + public async Task TestReadRecordBatchAsync() + { + await TestReadRecordBatchHelper(async (reader, originalBatch) => + { + RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); + ArrowReaderVerifier.CompareBatches(originalBatch, readBatch); + + // You should be able to read the same record batch again + RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(0); + ArrowReaderVerifier.CompareBatches(originalBatch, readBatch2); + }); + } + + private static async Task TestReadRecordBatchHelper( + Func<ArrowFileReader, RecordBatch, Task> verificationFunc) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + + using (MemoryStream stream = new MemoryStream()) + { + ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + stream.Position = 0; + + ArrowFileReader reader = new ArrowFileReader(stream); + await verificationFunc(reader, originalBatch); + } + } + + [Fact] + public async Task TestReadMultipleRecordBatchAsync() + { + RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100); + RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50); + + using (MemoryStream stream = new MemoryStream()) + { + ArrowFileWriter writer = new ArrowFileWriter(stream, originalBatch1.Schema); + await writer.WriteRecordBatchAsync(originalBatch1); + await writer.WriteRecordBatchAsync(originalBatch2); + await writer.WriteEndAsync(); + stream.Position = 0; + + ArrowFileReader reader = new ArrowFileReader(stream); + RecordBatch readBatch1 = await reader.ReadRecordBatchAsync(0); + ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch1); + + RecordBatch readBatch2 = await reader.ReadRecordBatchAsync(1); + ArrowReaderVerifier.CompareBatches(originalBatch2, readBatch2); + + // now read the first again, for random access + RecordBatch readBatch3 = await reader.ReadRecordBatchAsync(0); + ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs new file mode 100644 index 000000000..a310a3609 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using System; +using System.IO; +using System.Threading.Tasks; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowFileWriterTests + { + [Fact] + public void Ctor_LeaveOpenDefault_StreamClosedOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowFileWriter(stream, originalBatch.Schema).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenFalse_StreamClosedOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowFileWriter(stream, originalBatch.Schema, leaveOpen: false).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenTrue_StreamValidOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowFileWriter(stream, originalBatch.Schema, leaveOpen: true).Dispose(); + Assert.Equal(0, stream.Position); + } + + /// <summary> + /// Tests that writing an arrow file will always align the Block lengths + /// to 8 bytes. There are asserts in both the reader and writer which will fail + /// if this isn't the case. + /// </summary> + /// <returns></returns> + [Fact] + public async Task WritesFooterAlignedMulitpleOf8() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter( + stream, + originalBatch.Schema, + leaveOpen: true, + // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix + // which causes the length prefix to not be 8-byte aligned by default + new IpcOptions() { WriteLegacyIpcFormat = true }); + + writer.WriteRecordBatch(originalBatch); + writer.WriteEnd(); + + stream.Position = 0; + + await ValidateRecordBatchFile(stream, originalBatch); + } + + /// <summary> + /// Tests that writing an arrow file will always align the Block lengths + /// to 8 bytes. There are asserts in both the reader and writer which will fail + /// if this isn't the case. + /// </summary> + /// <returns></returns> + [Fact] + public async Task WritesFooterAlignedMulitpleOf8Async() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter( + stream, + originalBatch.Schema, + leaveOpen: true, + // use WriteLegacyIpcFormat, which only uses a 4-byte length prefix + // which causes the length prefix to not be 8-byte aligned by default + new IpcOptions() { WriteLegacyIpcFormat = true }); + + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + await ValidateRecordBatchFile(stream, originalBatch); + } + + private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch) + { + var reader = new ArrowFileReader(stream); + int count = await reader.RecordBatchCountAsync(); + Assert.Equal(1, count); + RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); + ArrowReaderVerifier.CompareBatches(recordBatch, readBatch); + } + + /// <summary> + /// Tests that writing an arrow file with no RecordBatches produces the correct + /// file. + /// </summary> + [Fact] + public async Task WritesEmptyFile() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter(stream, originalBatch.Schema); + + writer.WriteStart(); + writer.WriteEnd(); + + stream.Position = 0; + + var reader = new ArrowFileReader(stream); + int count = await reader.RecordBatchCountAsync(); + Assert.Equal(0, count); + RecordBatch readBatch = reader.ReadNextRecordBatch(); + Assert.Null(readBatch); + SchemaComparer.Compare(originalBatch.Schema, reader.Schema); + } + + /// <summary> + /// Tests that writing an arrow file with no RecordBatches produces the correct + /// file when using WriteStartAsync and WriteEndAsync. + /// </summary> + [Fact] + public async Task WritesEmptyFileAsync() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter(stream, originalBatch.Schema); + + await writer.WriteStartAsync(); + await writer.WriteEndAsync(); + + stream.Position = 0; + + var reader = new ArrowFileReader(stream); + int count = await reader.RecordBatchCountAsync(); + Assert.Equal(0, count); + RecordBatch readBatch = reader.ReadNextRecordBatch(); + Assert.Null(readBatch); + SchemaComparer.Compare(originalBatch.Schema, reader.Schema); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs new file mode 100644 index 000000000..a2c9a9ef7 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -0,0 +1,302 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Types; +using System; +using System.Linq; +using System.Threading.Tasks; +using Apache.Arrow.Arrays; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public static class ArrowReaderVerifier + { + public static void VerifyReader(ArrowStreamReader reader, RecordBatch originalBatch) + { + RecordBatch readBatch = reader.ReadNextRecordBatch(); + CompareBatches(originalBatch, readBatch); + + // There should only be one batch - calling ReadNextRecordBatch again should return null. + Assert.Null(reader.ReadNextRecordBatch()); + Assert.Null(reader.ReadNextRecordBatch()); + } + + public static async Task VerifyReaderAsync(ArrowStreamReader reader, RecordBatch originalBatch) + { + RecordBatch readBatch = await reader.ReadNextRecordBatchAsync(); + CompareBatches(originalBatch, readBatch); + + // There should only be one batch - calling ReadNextRecordBatchAsync again should return null. + Assert.Null(await reader.ReadNextRecordBatchAsync()); + Assert.Null(await reader.ReadNextRecordBatchAsync()); + } + + public static void CompareBatches(RecordBatch expectedBatch, RecordBatch actualBatch, bool strictCompare = true) + { + SchemaComparer.Compare(expectedBatch.Schema, actualBatch.Schema); + Assert.Equal(expectedBatch.Length, actualBatch.Length); + Assert.Equal(expectedBatch.ColumnCount, actualBatch.ColumnCount); + + for (int i = 0; i < expectedBatch.ColumnCount; i++) + { + IArrowArray expectedArray = expectedBatch.Arrays.ElementAt(i); + IArrowArray actualArray = actualBatch.Arrays.ElementAt(i); + + CompareArrays(expectedArray, actualArray, strictCompare); + } + } + + public static void CompareArrays(IArrowArray expectedArray, IArrowArray actualArray, bool strictCompare = true) + { + actualArray.Accept(new ArrayComparer(expectedArray, strictCompare)); + } + + private class ArrayComparer : + IArrowArrayVisitor<Int8Array>, + IArrowArrayVisitor<Int16Array>, + IArrowArrayVisitor<Int32Array>, + IArrowArrayVisitor<Int64Array>, + IArrowArrayVisitor<UInt8Array>, + IArrowArrayVisitor<UInt16Array>, + IArrowArrayVisitor<UInt32Array>, + IArrowArrayVisitor<UInt64Array>, + IArrowArrayVisitor<FloatArray>, + IArrowArrayVisitor<DoubleArray>, + IArrowArrayVisitor<BooleanArray>, + IArrowArrayVisitor<TimestampArray>, + IArrowArrayVisitor<Date32Array>, + IArrowArrayVisitor<Date64Array>, + IArrowArrayVisitor<ListArray>, + IArrowArrayVisitor<StringArray>, + IArrowArrayVisitor<FixedSizeBinaryArray>, + IArrowArrayVisitor<BinaryArray>, + IArrowArrayVisitor<StructArray>, + IArrowArrayVisitor<Decimal128Array>, + IArrowArrayVisitor<Decimal256Array>, + IArrowArrayVisitor<DictionaryArray> + { + private readonly IArrowArray _expectedArray; + private readonly ArrayTypeComparer _arrayTypeComparer; + private readonly bool _strictCompare; + + public ArrayComparer(IArrowArray expectedArray, bool strictCompare) + { + _expectedArray = expectedArray; + _arrayTypeComparer = new ArrayTypeComparer(expectedArray.Data.DataType); + _strictCompare = strictCompare; + } + + public void Visit(Int8Array array) => CompareArrays(array); + public void Visit(Int16Array array) => CompareArrays(array); + public void Visit(Int32Array array) => CompareArrays(array); + public void Visit(Int64Array array) => CompareArrays(array); + public void Visit(UInt8Array array) => CompareArrays(array); + public void Visit(UInt16Array array) => CompareArrays(array); + public void Visit(UInt32Array array) => CompareArrays(array); + public void Visit(UInt64Array array) => CompareArrays(array); + public void Visit(FloatArray array) => CompareArrays(array); + public void Visit(DoubleArray array) => CompareArrays(array); + public void Visit(BooleanArray array) => CompareArrays(array); + public void Visit(TimestampArray array) => CompareArrays(array); + public void Visit(Date32Array array) => CompareArrays(array); + public void Visit(Date64Array array) => CompareArrays(array); + public void Visit(ListArray array) => CompareArrays(array); + public void Visit(FixedSizeBinaryArray array) => CompareArrays(array); + public void Visit(Decimal128Array array) => CompareArrays(array); + public void Visit(Decimal256Array array) => CompareArrays(array); + public void Visit(StringArray array) => CompareBinaryArrays<StringArray>(array); + public void Visit(BinaryArray array) => CompareBinaryArrays<BinaryArray>(array); + + public void Visit(StructArray array) + { + Assert.IsAssignableFrom<StructArray>(_expectedArray); + StructArray expectedArray = (StructArray)_expectedArray; + + Assert.Equal(expectedArray.Length, array.Length); + Assert.Equal(expectedArray.NullCount, array.NullCount); + Assert.Equal(expectedArray.Offset, array.Offset); + Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); + Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); + + for (int i = 0; i < array.Fields.Count; i++) + { + array.Fields[i].Accept(new ArrayComparer(expectedArray.Fields[i], _strictCompare)); + } + } + + public void Visit(DictionaryArray array) + { + Assert.IsAssignableFrom<DictionaryArray>(_expectedArray); + DictionaryArray expectedArray = (DictionaryArray)_expectedArray; + var indicesComparer = new ArrayComparer(expectedArray.Indices, _strictCompare); + var dictionaryComparer = new ArrayComparer(expectedArray.Dictionary, _strictCompare); + array.Indices.Accept(indicesComparer); + array.Dictionary.Accept(dictionaryComparer); + } + + public void Visit(IArrowArray array) => throw new NotImplementedException(); + + private void CompareBinaryArrays<T>(BinaryArray actualArray) + where T : IArrowArray + { + Assert.IsAssignableFrom<T>(_expectedArray); + Assert.IsAssignableFrom<T>(actualArray); + + var expectedArray = (BinaryArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + Assert.Equal(expectedArray.Offset, actualArray.Offset); + + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + + if (_strictCompare) + { + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + Assert.True(expectedArray.Values.Slice(0, expectedArray.Length).SequenceEqual(actualArray.Values.Slice(0, actualArray.Length))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.True( + expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)), + $"BinaryArray values do not match at index {i}."); + } + } + } + + private void CompareArrays(FixedSizeBinaryArray actualArray) + { + Assert.IsAssignableFrom<FixedSizeBinaryArray>(_expectedArray); + Assert.IsAssignableFrom<FixedSizeBinaryArray>(actualArray); + + var expectedArray = (FixedSizeBinaryArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + Assert.Equal(expectedArray.Offset, actualArray.Offset); + + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + + if (_strictCompare) + { + Assert.True(expectedArray.ValueBuffer.Span.Slice(0, expectedArray.Length).SequenceEqual(actualArray.ValueBuffer.Span.Slice(0, actualArray.Length))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.True( + expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)), + $"FixedSizeBinaryArray values do not match at index {i}."); + } + } + } + + private void CompareArrays<T>(PrimitiveArray<T> actualArray) + where T : struct, IEquatable<T> + { + Assert.IsAssignableFrom<PrimitiveArray<T>>(_expectedArray); + PrimitiveArray<T> expectedArray = (PrimitiveArray<T>)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + Assert.Equal(expectedArray.Offset, actualArray.Offset); + + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + + if (_strictCompare) + { + Assert.True(expectedArray.Values.Slice(0, expectedArray.Length).SequenceEqual(actualArray.Values.Slice(0, actualArray.Length))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.Equal(expectedArray.GetValue(i), actualArray.GetValue(i)); + } + } + } + + private void CompareArrays(BooleanArray actualArray) + { + Assert.IsAssignableFrom<BooleanArray>(_expectedArray); + BooleanArray expectedArray = (BooleanArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + Assert.Equal(expectedArray.Offset, actualArray.Offset); + + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + + if (_strictCompare) + { + int booleanByteCount = BitUtility.ByteCount(expectedArray.Length); + Assert.True(expectedArray.Values.Slice(0, booleanByteCount).SequenceEqual(actualArray.Values.Slice(0, booleanByteCount))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.Equal(expectedArray.GetValue(i), actualArray.GetValue(i)); + } + } + } + + private void CompareArrays(ListArray actualArray) + { + Assert.IsAssignableFrom<ListArray>(_expectedArray); + ListArray expectedArray = (ListArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + Assert.Equal(expectedArray.Offset, actualArray.Offset); + + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + + actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); + } + + private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer expectedValidityBuffer, ArrowBuffer actualValidityBuffer) + { + if (_strictCompare) + { + Assert.True(expectedValidityBuffer.Span.SequenceEqual(actualValidityBuffer.Span)); + } + else if (nullCount != 0) + { + int validityBitmapByteCount = BitUtility.ByteCount(arrayLength); + Assert.True( + expectedValidityBuffer.Span.Slice(0, validityBitmapByteCount).SequenceEqual(actualValidityBuffer.Span.Slice(0, validityBitmapByteCount)), + "Validity buffers do not match."); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs new file mode 100644 index 000000000..973fc6a0a --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Memory; +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowStreamReaderTests + { + [Fact] + public void Ctor_LeaveOpenDefault_StreamClosedOnDispose() + { + var stream = new MemoryStream(); + new ArrowStreamReader(stream).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenFalse_StreamClosedOnDispose() + { + var stream = new MemoryStream(); + new ArrowStreamReader(stream, leaveOpen: false).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenTrue_StreamValidOnDispose() + { + var stream = new MemoryStream(); + new ArrowStreamReader(stream, leaveOpen: true).Dispose(); + Assert.Equal(0, stream.Position); + } + + [Theory] + [InlineData(true, true, 2)] + [InlineData(true, false, 1)] + [InlineData(false, true, 2)] + [InlineData(false, false, 1)] + public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen, bool createDictionaryArray, int expectedAllocations) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + + using (MemoryStream stream = new MemoryStream()) + { + ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + var memoryPool = new TestMemoryAllocator(); + ArrowStreamReader reader = new ArrowStreamReader(stream, memoryPool, shouldLeaveOpen); + reader.ReadNextRecordBatch(); + + Assert.Equal(expectedAllocations, memoryPool.Statistics.Allocations); + Assert.True(memoryPool.Statistics.BytesAllocated > 0); + + reader.Dispose(); + + if (shouldLeaveOpen) + { + Assert.True(stream.Position > 0); + } + else + { + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ReadRecordBatch_Memory(bool writeEnd) + { + await TestReaderFromMemory((reader, originalBatch) => + { + ArrowReaderVerifier.VerifyReader(reader, originalBatch); + return Task.CompletedTask; + }, writeEnd); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ReadRecordBatchAsync_Memory(bool writeEnd) + { + await TestReaderFromMemory(ArrowReaderVerifier.VerifyReaderAsync, writeEnd); + } + + private static async Task TestReaderFromMemory( + Func<ArrowStreamReader, RecordBatch, Task> verificationFunc, + bool writeEnd) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + + byte[] buffer; + using (MemoryStream stream = new MemoryStream()) + { + ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + if (writeEnd) + { + await writer.WriteEndAsync(); + } + buffer = stream.GetBuffer(); + } + + ArrowStreamReader reader = new ArrowStreamReader(buffer); + await verificationFunc(reader, originalBatch); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ReadRecordBatch_Stream(bool writeEnd, bool createDictionaryArray) + { + await TestReaderFromStream((reader, originalBatch) => + { + ArrowReaderVerifier.VerifyReader(reader, originalBatch); + return Task.CompletedTask; + }, writeEnd, createDictionaryArray); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ReadRecordBatchAsync_Stream(bool writeEnd, bool createDictionaryArray) + { + await TestReaderFromStream(ArrowReaderVerifier.VerifyReaderAsync, writeEnd, createDictionaryArray); + } + + private static async Task TestReaderFromStream( + Func<ArrowStreamReader, RecordBatch, Task> verificationFunc, + bool writeEnd, bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + + using (MemoryStream stream = new MemoryStream()) + { + ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + if (writeEnd) + { + await writer.WriteEndAsync(); + } + + stream.Position = 0; + + ArrowStreamReader reader = new ArrowStreamReader(stream); + await verificationFunc(reader, originalBatch); + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ReadRecordBatch_PartialReadStream(bool createDictionaryArray) + { + await TestReaderFromPartialReadStream((reader, originalBatch) => + { + ArrowReaderVerifier.VerifyReader(reader, originalBatch); + return Task.CompletedTask; + }, createDictionaryArray); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ReadRecordBatchAsync_PartialReadStream(bool createDictionaryArray) + { + await TestReaderFromPartialReadStream(ArrowReaderVerifier.VerifyReaderAsync, createDictionaryArray); + } + + /// <summary> + /// Verifies that the stream reader reads multiple times when a stream + /// only returns a subset of the data from each Read. + /// </summary> + private static async Task TestReaderFromPartialReadStream(Func<ArrowStreamReader, RecordBatch, Task> verificationFunc, bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + + using (PartialReadStream stream = new PartialReadStream()) + { + ArrowStreamWriter writer = new ArrowStreamWriter(stream, originalBatch.Schema); + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + ArrowStreamReader reader = new ArrowStreamReader(stream); + await verificationFunc(reader, originalBatch); + } + } + + /// <summary> + /// A stream class that only returns a part of the data at a time. + /// </summary> + private class PartialReadStream : MemoryStream + { + // by default return 20 bytes at a time + public int PartialReadLength { get; set; } = 20; + + public override int Read(Span<byte> destination) + { + if (destination.Length > PartialReadLength) + { + destination = destination.Slice(0, PartialReadLength); + } + + return base.Read(destination); + } + + public override ValueTask<int> ReadAsync(Memory<byte> destination, CancellationToken cancellationToken = default) + { + if (destination.Length > PartialReadLength) + { + destination = destination.Slice(0, PartialReadLength); + } + + return base.ReadAsync(destination, cancellationToken); + } + } + } +} + diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs new file mode 100644 index 000000000..4932217b1 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs @@ -0,0 +1,682 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Sockets; +using System.Threading.Tasks; +using Apache.Arrow.Ipc; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ArrowStreamWriterTests + { + [Fact] + public void Ctor_LeaveOpenDefault_StreamClosedOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowStreamWriter(stream, originalBatch.Schema).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenFalse_StreamClosedOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: false).Dispose(); + Assert.Throws<ObjectDisposedException>(() => stream.Position); + } + + [Fact] + public void Ctor_LeaveOpenTrue_StreamValidOnDispose() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var stream = new MemoryStream(); + new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true).Dispose(); + Assert.Equal(0, stream.Position); + } + + [Theory] + [InlineData(true, 32153)] + [InlineData(false, 32154)] + public void CanWriteToNetworkStream(bool createDictionaryArray, int port) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + + TcpListener listener = new TcpListener(IPAddress.Loopback, port); + listener.Start(); + + using (TcpClient sender = new TcpClient()) + { + sender.Connect(IPAddress.Loopback, port); + NetworkStream stream = sender.GetStream(); + + using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema)) + { + writer.WriteRecordBatch(originalBatch); + writer.WriteEnd(); + + stream.Flush(); + } + } + + using (TcpClient receiver = listener.AcceptTcpClient()) + { + NetworkStream stream = receiver.GetStream(); + using (var reader = new ArrowStreamReader(stream)) + { + RecordBatch newBatch = reader.ReadNextRecordBatch(); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + } + } + } + + [Theory] + [InlineData(true, 32155)] + [InlineData(false, 32156)] + public async Task CanWriteToNetworkStreamAsync(bool createDictionaryArray, int port) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + + TcpListener listener = new TcpListener(IPAddress.Loopback, port); + listener.Start(); + + using (TcpClient sender = new TcpClient()) + { + sender.Connect(IPAddress.Loopback, port); + NetworkStream stream = sender.GetStream(); + + using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema)) + { + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + + stream.Flush(); + } + } + + using (TcpClient receiver = listener.AcceptTcpClient()) + { + NetworkStream stream = receiver.GetStream(); + using (var reader = new ArrowStreamReader(stream)) + { + RecordBatch newBatch = reader.ReadNextRecordBatch(); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + } + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void WriteEmptyBatch(bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0, createDictionaryArray: createDictionaryArray); + + TestRoundTripRecordBatch(originalBatch); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task WriteEmptyBatchAsync(bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0, createDictionaryArray: createDictionaryArray); + + await TestRoundTripRecordBatchAsync(originalBatch); + } + + [Fact] + public void WriteBatchWithNulls() + { + RecordBatch originalBatch = new RecordBatch.Builder() + .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) + .Append("Column2", true, new Int32Array( + valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(), + length: 10, + nullCount: 2, + offset: 0)) + .Append("Column3", true, new Int32Array( + valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0x00).Append(0x00).Build(), + length: 10, + nullCount: 10, + offset: 0)) + .Append("NullableBooleanColumn", true, new BooleanArray( + valueBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xed).Append(0xff).Build(), + length: 10, + nullCount: 3, + offset: 0)) + .Build(); + + TestRoundTripRecordBatch(originalBatch); + } + + [Fact] + public async Task WriteBatchWithNullsAsync() + { + RecordBatch originalBatch = new RecordBatch.Builder() + .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) + .Append("Column2", true, new Int32Array( + valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(), + length: 10, + nullCount: 2, + offset: 0)) + .Append("Column3", true, new Int32Array( + valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0x00).Append(0x00).Build(), + length: 10, + nullCount: 10, + offset: 0)) + .Append("NullableBooleanColumn", true, new BooleanArray( + valueBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(), + nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xed).Append(0xff).Build(), + length: 10, + nullCount: 3, + offset: 0)) + .Build(); + + await TestRoundTripRecordBatchAsync(originalBatch); + } + + private static void TestRoundTripRecordBatches(List<RecordBatch> originalBatches, IpcOptions options = null) + { + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options)) + { + foreach (RecordBatch originalBatch in originalBatches) + { + writer.WriteRecordBatch(originalBatch); + } + writer.WriteEnd(); + } + + stream.Position = 0; + + using (var reader = new ArrowStreamReader(stream)) + { + foreach (RecordBatch originalBatch in originalBatches) + { + RecordBatch newBatch = reader.ReadNextRecordBatch(); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + } + } + } + } + + private static async Task TestRoundTripRecordBatchesAsync(List<RecordBatch> originalBatches, IpcOptions options = null) + { + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options)) + { + foreach (RecordBatch originalBatch in originalBatches) + { + await writer.WriteRecordBatchAsync(originalBatch); + } + await writer.WriteEndAsync(); + } + + stream.Position = 0; + + using (var reader = new ArrowStreamReader(stream)) + { + foreach (RecordBatch originalBatch in originalBatches) + { + RecordBatch newBatch = reader.ReadNextRecordBatch(); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + } + } + } + } + + private static void TestRoundTripRecordBatch(RecordBatch originalBatch, IpcOptions options = null) + { + TestRoundTripRecordBatches(new List<RecordBatch> { originalBatch }, options); + } + + private static async Task TestRoundTripRecordBatchAsync(RecordBatch originalBatch, IpcOptions options = null) + { + await TestRoundTripRecordBatchesAsync(new List<RecordBatch> { originalBatch }, options); + } + + [Fact] + public void WriteBatchWithCorrectPadding() + { + byte value1 = 0x04; + byte value2 = 0x14; + var batch = new RecordBatch( + new Schema.Builder() + .Field(f => f.Name("age").DataType(Int32Type.Default)) + .Field(f => f.Name("characterCount").DataType(Int32Type.Default)) + .Build(), + new IArrowArray[] + { + new Int32Array( + new ArrowBuffer(new byte[] { value1, value1, 0x00, 0x00 }), + ArrowBuffer.Empty, + length: 1, + nullCount: 0, + offset: 0), + new Int32Array( + new ArrowBuffer(new byte[] { value2, value2, 0x00, 0x00 }), + ArrowBuffer.Empty, + length: 1, + nullCount: 0, + offset: 0) + }, + length: 1); + + TestRoundTripRecordBatch(batch); + + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true)) + { + writer.WriteRecordBatch(batch); + writer.WriteEnd(); + } + + byte[] writtenBytes = stream.ToArray(); + + // ensure that the data buffers at the end are 8-byte aligned + Assert.Equal(value1, writtenBytes[writtenBytes.Length - 24]); + Assert.Equal(value1, writtenBytes[writtenBytes.Length - 23]); + for (int i = 22; i > 16; i--) + { + Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); + } + + Assert.Equal(value2, writtenBytes[writtenBytes.Length - 16]); + Assert.Equal(value2, writtenBytes[writtenBytes.Length - 15]); + for (int i = 14; i > 8; i--) + { + Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); + } + + // verify the EOS is written correctly + for (int i = 8; i > 4; i--) + { + Assert.Equal(0xFF, writtenBytes[writtenBytes.Length - i]); + } + for (int i = 4; i > 0; i--) + { + Assert.Equal(0x00, writtenBytes[writtenBytes.Length - i]); + } + } + } + + [Fact] + public async Task WriteBatchWithCorrectPaddingAsync() + { + byte value1 = 0x04; + byte value2 = 0x14; + var batch = new RecordBatch( + new Schema.Builder() + .Field(f => f.Name("age").DataType(Int32Type.Default)) + .Field(f => f.Name("characterCount").DataType(Int32Type.Default)) + .Build(), + new IArrowArray[] + { + new Int32Array( + new ArrowBuffer(new byte[] { value1, value1, 0x00, 0x00 }), + ArrowBuffer.Empty, + length: 1, + nullCount: 0, + offset: 0), + new Int32Array( + new ArrowBuffer(new byte[] { value2, value2, 0x00, 0x00 }), + ArrowBuffer.Empty, + length: 1, + nullCount: 0, + offset: 0) + }, + length: 1); + + await TestRoundTripRecordBatchAsync(batch); + + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true)) + { + await writer.WriteRecordBatchAsync(batch); + await writer.WriteEndAsync(); + } + + byte[] writtenBytes = stream.ToArray(); + + // ensure that the data buffers at the end are 8-byte aligned + Assert.Equal(value1, writtenBytes[writtenBytes.Length - 24]); + Assert.Equal(value1, writtenBytes[writtenBytes.Length - 23]); + for (int i = 22; i > 16; i--) + { + Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); + } + + Assert.Equal(value2, writtenBytes[writtenBytes.Length - 16]); + Assert.Equal(value2, writtenBytes[writtenBytes.Length - 15]); + for (int i = 14; i > 8; i--) + { + Assert.Equal(0, writtenBytes[writtenBytes.Length - i]); + } + + // verify the EOS is written correctly + for (int i = 8; i > 4; i--) + { + Assert.Equal(0xFF, writtenBytes[writtenBytes.Length - i]); + } + for (int i = 4; i > 0; i--) + { + Assert.Equal(0x00, writtenBytes[writtenBytes.Length - i]); + } + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void LegacyIpcFormatRoundTrips(bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + TestRoundTripRecordBatch(originalBatch, new IpcOptions() { WriteLegacyIpcFormat = true }); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task LegacyIpcFormatRoundTripsAsync(bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + await TestRoundTripRecordBatchAsync(originalBatch, new IpcOptions() { WriteLegacyIpcFormat = true }); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public void WriteLegacyIpcFormat(bool writeLegacyIpcFormat, bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + var options = new IpcOptions() { WriteLegacyIpcFormat = writeLegacyIpcFormat }; + + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true, options)) + { + writer.WriteRecordBatch(originalBatch); + writer.WriteEnd(); + } + + stream.Position = 0; + + // ensure the continuation is written correctly + byte[] buffer = stream.ToArray(); + int messageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer); + int endOfBuffer1 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 8)); + int endOfBuffer2 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 4)); + if (writeLegacyIpcFormat) + { + // the legacy IPC format doesn't have a continuation token at the start + Assert.NotEqual(-1, messageLength); + Assert.NotEqual(-1, endOfBuffer1); + } + else + { + // the latest IPC format has a continuation token at the start + Assert.Equal(-1, messageLength); + Assert.Equal(-1, endOfBuffer1); + } + + Assert.Equal(0, endOfBuffer2); + } + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task WriteLegacyIpcFormatAsync(bool writeLegacyIpcFormat, bool createDictionaryArray) + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); + var options = new IpcOptions() { WriteLegacyIpcFormat = writeLegacyIpcFormat }; + + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true, options)) + { + await writer.WriteRecordBatchAsync(originalBatch); + await writer.WriteEndAsync(); + } + + stream.Position = 0; + + // ensure the continuation is written correctly + byte[] buffer = stream.ToArray(); + int messageLength = BinaryPrimitives.ReadInt32LittleEndian(buffer); + int endOfBuffer1 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 8)); + int endOfBuffer2 = BinaryPrimitives.ReadInt32LittleEndian(buffer.AsSpan(buffer.Length - 4)); + if (writeLegacyIpcFormat) + { + // the legacy IPC format doesn't have a continuation token at the start + Assert.NotEqual(-1, messageLength); + Assert.NotEqual(-1, endOfBuffer1); + } + else + { + // the latest IPC format has a continuation token at the start + Assert.Equal(-1, messageLength); + Assert.Equal(-1, endOfBuffer1); + } + + Assert.Equal(0, endOfBuffer2); + } + } + + [Fact] + public void WritesMetadataCorrectly() + { + Schema.Builder schemaBuilder = new Schema.Builder() + .Metadata("index", "1, 2, 3, 4, 5") + .Metadata("reverseIndex", "5, 4, 3, 2, 1") + .Field(f => f + .Name("IntCol") + .DataType(UInt32Type.Default) + .Metadata("custom1", "false") + .Metadata("custom2", "true")) + .Field(f => f + .Name("StringCol") + .DataType(StringType.Default) + .Metadata("custom2", "false") + .Metadata("custom3", "4")) + .Field(f => f + .Name("StructCol") + .DataType(new StructType(new[] { + new Field("Inner1", FloatType.Default, nullable: false), + new Field("Inner2", DoubleType.Default, nullable: true, new Dictionary<string, string>() { { "customInner", "1" }, { "customInner2", "3" } }) + })) + .Metadata("custom4", "6.4") + .Metadata("custom1", "true")); + + var schema = schemaBuilder.Build(); + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(schema, length: 10); + + TestRoundTripRecordBatch(originalBatch); + } + + [Fact] + public async Task WriteMultipleDictionaryArraysAsync() + { + List<RecordBatch> originalRecordBatches = CreateMultipleDictionaryArraysTestData(); + await TestRoundTripRecordBatchesAsync(originalRecordBatches); + } + + [Fact] + public void WriteMultipleDictionaryArrays() + { + List<RecordBatch> originalRecordBatches = CreateMultipleDictionaryArraysTestData(); + TestRoundTripRecordBatches(originalRecordBatches); + } + + private List<RecordBatch> CreateMultipleDictionaryArraysTestData() + { + var dictionaryData = new List<string> { "a", "b", "c" }; + int length = dictionaryData.Count; + + var schemaForSimpleCase = new Schema(new List<Field> { + new Field("int8", Int8Type.Default, true), + new Field("uint8", UInt8Type.Default, true), + new Field("int16", Int16Type.Default, true), + new Field("uint16", UInt16Type.Default, true), + new Field("int32", Int32Type.Default, true), + new Field("uint32", UInt32Type.Default, true), + new Field("int64", Int64Type.Default, true), + new Field("uint64", UInt64Type.Default, true) + }, null); + + StringArray dictionary = new StringArray.Builder().AppendRange(dictionaryData).Build(); + IEnumerable<IArrowArray> indicesArraysForSimpleCase = TestData.CreateArrays(schemaForSimpleCase, length); + + var fields = new List<Field>(capacity: length + 1); + var testTargetArrays = new List<IArrowArray>(capacity: length + 1); + + foreach (IArrowArray indices in indicesArraysForSimpleCase) + { + var dictionaryArray = new DictionaryArray( + new DictionaryType(indices.Data.DataType, StringType.Default, false), + indices, dictionary); + testTargetArrays.Add(dictionaryArray); + fields.Add(new Field($"dictionaryField_{indices.Data.DataType.Name}", dictionaryArray.Data.DataType, false)); + } + + (Field dictionaryTypeListArrayField, ListArray dictionaryTypeListArray) = CreateDictionaryTypeListArrayTestData(dictionary); + + fields.Add(dictionaryTypeListArrayField); + testTargetArrays.Add(dictionaryTypeListArray); + + (Field listTypeDictionaryArrayField, DictionaryArray listTypeDictionaryArray) = CreateListTypeDictionaryArrayTestData(dictionaryData); + + fields.Add(listTypeDictionaryArrayField); + testTargetArrays.Add(listTypeDictionaryArray); + + var schema = new Schema(fields, null); + + return new List<RecordBatch> { + new RecordBatch(schema, testTargetArrays, length), + new RecordBatch(schema, testTargetArrays, length), + }; + } + + private Tuple<Field, ListArray> CreateDictionaryTypeListArrayTestData(StringArray dictionary) + { + Int32Array indiceArray = new Int32Array.Builder().AppendRange(Enumerable.Range(0, dictionary.Length)).Build(); + + //DictionaryArray has no Builder for now, so creating ListArray directly. + var dictionaryType = new DictionaryType(Int32Type.Default, StringType.Default, false); + var dictionaryArray = new DictionaryArray(dictionaryType, indiceArray, dictionary); + + var valueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>(); + var validityBufferBuilder = new ArrowBuffer.BitmapBuilder(); + + foreach (int i in Enumerable.Range(0, dictionary.Length + 1)) + { + valueOffsetsBufferBuilder.Append(i); + validityBufferBuilder.Append(true); + } + + var dictionaryField = new Field("dictionaryField_list", dictionaryType, false); + var listType = new ListType(dictionaryField); + var listArray = new ListArray(listType, valueOffsetsBufferBuilder.Length - 1, valueOffsetsBufferBuilder.Build(), dictionaryArray, valueOffsetsBufferBuilder.Build()); + + return Tuple.Create(new Field($"listField_{listType.ValueDataType.Name}", listType, false), listArray); + } + + private Tuple<Field, DictionaryArray> CreateListTypeDictionaryArrayTestData(List<string> dictionaryDataBase) + { + var listBuilder = new ListArray.Builder(StringType.Default); + var valueBuilder = listBuilder.ValueBuilder as StringArray.Builder; + + foreach(string data in dictionaryDataBase) { + listBuilder.Append(); + valueBuilder.Append(data); + } + + ListArray dictionary = listBuilder.Build(); + Int32Array indiceArray = new Int32Array.Builder().AppendRange(Enumerable.Range(0, dictionary.Length)).Build(); + var dictionaryArrayType = new DictionaryType(Int32Type.Default, dictionary.Data.DataType, false); + var dictionaryArray = new DictionaryArray(dictionaryArrayType, indiceArray, dictionary); + + return Tuple.Create(new Field($"dictionaryField_{dictionaryArray.Data.DataType.Name}", dictionaryArrayType, false), dictionaryArray); + } + + /// <summary> + /// Tests that writing an arrow stream with no RecordBatches produces the correct result. + /// </summary> + [Fact] + public void WritesEmptyFile() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); + + var stream = new MemoryStream(); + var writer = new ArrowStreamWriter(stream, originalBatch.Schema); + + writer.WriteStart(); + writer.WriteEnd(); + + stream.Position = 0; + + var reader = new ArrowStreamReader(stream); + RecordBatch readBatch = reader.ReadNextRecordBatch(); + Assert.Null(readBatch); + SchemaComparer.Compare(originalBatch.Schema, reader.Schema); + } + + /// <summary> + /// Tests that writing an arrow stream with no RecordBatches produces the correct + /// result when using WriteStartAsync and WriteEndAsync. + /// </summary> + [Fact] + public async Task WritesEmptyFileAsync() + { + RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 1); + + var stream = new MemoryStream(); + var writer = new ArrowStreamWriter(stream, originalBatch.Schema); + + await writer.WriteStartAsync(); + await writer.WriteEndAsync(); + + stream.Position = 0; + + var reader = new ArrowStreamReader(stream); + RecordBatch readBatch = reader.ReadNextRecordBatch(); + Assert.Null(readBatch); + SchemaComparer.Compare(originalBatch.Schema, reader.Schema); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs new file mode 100644 index 000000000..7f45ce857 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs @@ -0,0 +1,489 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Memory; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class BinaryArrayBuilderTests + { + private static readonly MemoryAllocator _allocator = new NativeMemoryAllocator(); + + // Various example byte arrays for use in testing. + private static readonly byte[] _exampleNull = null; + private static readonly byte[] _exampleEmpty = { }; + private static readonly byte[] _exampleNonEmpty1 = { 10, 20, 30, 40 }; + private static readonly byte[] _exampleNonEmpty2 = { 50, 60, 70, 80 }; + private static readonly byte[] _exampleNonEmpty3 = { 90 }; + + // Base set of single bytes that may be used to append to a builder in testing. + private static readonly byte[] _singleBytesToAppend = { 0, 123, 127, 255 }; + + // Base set of byte arrays that may be used to append to a builder in testing. + private static readonly byte[][] _byteArraysToAppend = + { + _exampleNull, + _exampleEmpty, + _exampleNonEmpty2, + _exampleNonEmpty3, + }; + + // Base set of multiple byte arrays that may be used to append to a builder in testing. + private static readonly byte[][][] _byteArrayArraysToAppend = + { + new byte[][] { }, + new[] { _exampleNull }, + new[] { _exampleEmpty }, + new[] { _exampleNonEmpty2 }, + new[] { _exampleNonEmpty2, _exampleNonEmpty3 }, + new[] { _exampleNonEmpty2, _exampleEmpty, _exampleNull }, + }; + + // Base set of byte arrays that can be used as "initial contents" of any builder under test. + private static readonly byte[][][] _initialContentsSet = + { + new byte[][] { }, + new[] { _exampleNull }, + new[] { _exampleEmpty }, + new[] { _exampleNonEmpty1 }, + new[] { _exampleNonEmpty1, _exampleNonEmpty3 }, + new[] { _exampleNonEmpty1, _exampleEmpty, _exampleNull }, + }; + + public class Append + { + public static IEnumerable<object[]> _appendSingleByteTestData = + from initialContents in _initialContentsSet + from singleByte in _singleBytesToAppend + select new object[] { initialContents, singleByte }; + + [Theory] + [MemberData(nameof(_appendSingleByteTestData))] + public void AppendSingleByte(byte[][] initialContents, byte singleByte) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + int expectedLength = initialLength + 1; + var expectedArrayContents = initialContents.Append(new[] { singleByte }); + + // Act + var actualReturnValue = builder.Append(singleByte); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + [Theory] + [MemberData(nameof(_appendSingleByteTestData))] + public void AppendSingleByteAfterClear(byte[][] initialContents, byte singleByte) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var expectedArrayContents = new[] { new[] { singleByte } }; + + // Act + var actualReturnValue = builder.Append(singleByte); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(1, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + public static readonly IEnumerable<object[]> _appendNullTestData = + from initialContents in _initialContentsSet + select new object[] { initialContents }; + + [Theory] + [MemberData(nameof(_appendNullTestData))] + public void AppendNull(byte[][] initialContents) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + int expectedLength = initialLength + 1; + var expectedArrayContents = initialContents.Append(null); + + // Act + var actualReturnValue = builder.AppendNull(); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + [Theory] + [MemberData(nameof(_appendNullTestData))] + public void AppendNullAfterClear(byte[][] initialContents) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var expectedArrayContents = new byte[][] { null }; + + // Act + var actualReturnValue = builder.AppendNull(); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(1, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + public static readonly IEnumerable<object[]> _appendNonNullByteArrayTestData = + from initialContents in _initialContentsSet + from bytes in _byteArraysToAppend + where bytes != null + select new object[] { initialContents, bytes }; + + [Theory] + [MemberData(nameof(_appendNonNullByteArrayTestData))] + public void AppendReadOnlySpan(byte[][] initialContents, byte[] bytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + var span = (ReadOnlySpan<byte>)bytes; + int expectedLength = initialLength + 1; + var expectedArrayContents = initialContents.Append(bytes); + + // Act + var actualReturnValue = builder.Append(span); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + [Theory] + [MemberData(nameof(_appendNonNullByteArrayTestData))] + public void AppendReadOnlySpanAfterClear(byte[][] initialContents, byte[] bytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var span = (ReadOnlySpan<byte>)bytes; + var expectedArrayContents = new[] { bytes }; + + // Act + var actualReturnValue = builder.Append(span); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(1, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + public static readonly IEnumerable<object[]> _appendByteArrayTestData = + from initialContents in _initialContentsSet + from bytes in _byteArraysToAppend + select new object[] { initialContents, bytes }; + + [Theory] + [MemberData(nameof(_appendByteArrayTestData))] + public void AppendEnumerable(byte[][] initialContents, byte[] bytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + int expectedLength = initialLength + 1; + var enumerable = (IEnumerable<byte>)bytes; + var expectedArrayContents = initialContents.Append(bytes); + + // Act + var actualReturnValue = builder.Append(enumerable); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + [Theory] + [MemberData(nameof(_appendByteArrayTestData))] + public void AppendEnumerableAfterClear(byte[][] initialContents, byte[] bytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var enumerable = (IEnumerable<byte>)bytes; + var expectedArrayContents = new[] { bytes }; + + // Act + var actualReturnValue = builder.Append(enumerable); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(1, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + } + + public class AppendRange + { + public static readonly IEnumerable<object[]> _appendRangeSingleBytesTestData = + from initialContents in _initialContentsSet + select new object[] { initialContents, _singleBytesToAppend }; + + [Theory] + [MemberData(nameof(_appendRangeSingleBytesTestData))] + public void AppendRangeSingleBytes(byte[][] initialContents, byte[] singleBytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + int expectedNewLength = initialLength + singleBytes.Length; + var expectedArrayContents = initialContents.Concat(singleBytes.Select(b => new[] { b })); + + // Act + var actualReturnValue = builder.AppendRange(singleBytes); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedNewLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + + } + + [Theory] + [MemberData(nameof(_appendRangeSingleBytesTestData))] + public void AppendRangeSingleBytesAfterClear(byte[][] initialContents, byte[] singleBytes) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var expectedArrayContents = singleBytes.Select(b => new[] { b }); + + // Act + var actualReturnValue = builder.AppendRange(singleBytes); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(singleBytes.Length, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + public static readonly IEnumerable<object[]> _appendRangeByteArraysTestData = + from initialContents in _initialContentsSet + from byteArrays in _byteArrayArraysToAppend + select new object[] { initialContents, byteArrays }; + + [Theory] + [MemberData(nameof(_appendRangeByteArraysTestData))] + public void AppendRangeArrays(byte[][] initialContents, byte[][] byteArrays) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + int initialLength = builder.Length; + int expectedNewLength = initialLength + byteArrays.Length; + var expectedArrayContents = initialContents.Concat(byteArrays); + + // Act + var actualReturnValue = builder.AppendRange(byteArrays); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(expectedNewLength, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + + [Theory] + [MemberData(nameof(_appendRangeByteArraysTestData))] + public void AppendRangeArraysAfterClear(byte[][] initialContents, byte[][] byteArrays) + { + // Arrange + var builder = new BinaryArray.Builder(); + if (initialContents.Length > 0) + builder.AppendRange(initialContents); + builder.Clear(); + var expectedArrayContents = byteArrays; + + // Act + var actualReturnValue = builder.AppendRange(byteArrays); + + // Assert + Assert.Equal(builder, actualReturnValue); + Assert.Equal(byteArrays.Length, builder.Length); + var actualArray = builder.Build(_allocator); + AssertArrayContents(expectedArrayContents, actualArray); + } + } + + public class Clear + { + [Fact] + public void ClearEmpty() + { + // Arrange + var builder = new BinaryArray.Builder(); + + // Act + var actualReturnValue = builder.Clear(); + + // Assert + Assert.NotNull(actualReturnValue); + Assert.Equal(builder, actualReturnValue); + Assert.Equal(0, builder.Length); + var array = builder.Build(_allocator); + Assert.Equal(0, array.Length); + } + + public static readonly IEnumerable<object[]> _testData = + from byteArrays in _byteArrayArraysToAppend + select new object[] { byteArrays }; + + [Theory] + [MemberData(nameof(_testData))] + public void ClearNonEmpty(byte[][] byteArrays) + { + // Arrange + var builder = new BinaryArray.Builder(); + builder.AppendRange(byteArrays); + + // Act + var actualReturnValue = builder.Clear(); + + // Assert + Assert.NotNull(actualReturnValue); + Assert.Equal(builder, actualReturnValue); + Assert.Equal(0, builder.Length); + var array = builder.Build(_allocator); + Assert.Equal(0, array.Length); + } + } + + public class Build + { + [Fact] + public void BuildImmediately() + { + // Arrange + var builder = new BinaryArray.Builder(); + + // Act + var array = builder.Build(_allocator); + + // Assert + Assert.Equal(0, array.Length); + } + + public static readonly IEnumerable<object[]> _testData = + from ba1 in _initialContentsSet + from ba2 in _byteArrayArraysToAppend + select new object[] { ba1.Concat(ba2) }; + + [Theory] + [MemberData(nameof(_testData))] + public void AppendThenBuild(byte[][] byteArrays) + { + // Arrange + var builder = new BinaryArray.Builder(); + foreach (var byteArray in byteArrays) + { + // Test the type of byte array to ensure each Append() overload is exercised. + if (byteArray == null) + { + builder.AppendNull(); + } + else if (byteArray.Length == 1) + { + builder.Append(byteArray[0]); + } + else + { + builder.Append((ReadOnlySpan<byte>)byteArray); + } + } + + // Act + var array = builder.Build(_allocator); + + // Assert + AssertArrayContents(byteArrays, array); + } + + [Theory] + [MemberData(nameof(_testData))] + public void BuildMultipleTimes(byte[][] byteArrays) + { + // Arrange + var builder = new BinaryArray.Builder(); + builder.AppendRange(byteArrays); + builder.Build(_allocator); + + // Act + var array = builder.Build(_allocator); + + // Assert + AssertArrayContents(byteArrays, array); + } + } + + private static void AssertArrayContents(IEnumerable<byte[]> expectedContents, BinaryArray array) + { + var expectedContentsArr = expectedContents.ToArray(); + Assert.Equal(expectedContentsArr.Length, array.Length); + for (int i = 0; i < array.Length; i++) + { + var expectedArray = expectedContentsArr[i]; + var actualArray = array.IsNull(i) ? null : array.GetBytes(i).ToArray(); + Assert.Equal(expectedArray, actualArray); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs new file mode 100644 index 000000000..5e18716a0 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class BitUtilityTests + { + public class ByteCount + { + [Theory] + [InlineData(0, 0)] + [InlineData(1, 1)] + [InlineData(8, 1)] + [InlineData(9, 2)] + [InlineData(32, 4)] + public void HasExpectedResult(int n, int expected) + { + var count = BitUtility.ByteCount(n); + Assert.Equal(expected, count); + } + } + + public class CountBits + { + [Theory] + [InlineData(new byte[] { 0b00000000 }, 0)] + [InlineData(new byte[] { 0b00000001 }, 1)] + [InlineData(new byte[] { 0b11111111 }, 8)] + [InlineData(new byte[] { 0b01001001, 0b01010010 }, 6)] + public void CountsAllOneBits(byte[] data, int expectedCount) + { + Assert.Equal(expectedCount, + BitUtility.CountBits(data)); + } + + [Theory] + [InlineData(new byte[] { 0b11111111 }, 0, 8)] + [InlineData(new byte[] { 0b11111111 }, 3, 5)] + [InlineData(new byte[] { 0b11111111, 0b11111111 }, 9, 7)] + [InlineData(new byte[] { 0b11111111 }, -1, 0)] + public void CountsAllOneBitsFromAnOffset(byte[] data, int offset, int expectedCount) + { + Assert.Equal(expectedCount, + BitUtility.CountBits(data, offset)); + } + + [Theory] + [InlineData(new byte[] { 0b11111111 }, 0, 8, 8)] + [InlineData(new byte[] { 0b11111111 }, 0, 4, 4)] + [InlineData(new byte[] { 0b11111111 }, 3, 2, 2)] + [InlineData(new byte[] { 0b11111111 }, 3, 5, 5)] + [InlineData(new byte[] { 0b11111111, 0b11111111 }, 9, 7, 7)] + [InlineData(new byte[] { 0b11111111, 0b11111111 }, 7, 2, 2)] + [InlineData(new byte[] { 0b11111111, 0b11111111, 0b11111111 }, 0, 24, 24)] + [InlineData(new byte[] { 0b11111111, 0b11111111, 0b11111111 }, 8, 16, 16)] + [InlineData(new byte[] { 0b11111111, 0b11111111, 0b11111111 }, 0, 16, 16)] + [InlineData(new byte[] { 0b11111111, 0b11111111, 0b11111111 }, 3, 18, 18)] + [InlineData(new byte[] { 0b11111111 }, -1, 0, 0)] + public void CountsAllOneBitsFromOffsetWithinLength(byte[] data, int offset, int length, int expectedCount) + { + var actualCount = BitUtility.CountBits(data, offset, length); + Assert.Equal(expectedCount, actualCount); + } + + [Fact] + public void CountsZeroBitsWhenDataIsEmpty() + { + Assert.Equal(0, + BitUtility.CountBits(null)); + } + } + + public class GetBit + { + [Theory] + [InlineData(new byte[] { 0b01001001 }, 0, true)] + [InlineData(new byte[] { 0b01001001 }, 1, false)] + [InlineData(new byte[] { 0b01001001 }, 2, false)] + [InlineData(new byte[] { 0b01001001 }, 3, true)] + [InlineData(new byte[] { 0b01001001 }, 4, false)] + [InlineData(new byte[] { 0b01001001 }, 5, false)] + [InlineData(new byte[] { 0b01001001 }, 6, true)] + [InlineData(new byte[] { 0b01001001 }, 7, false)] + [InlineData(new byte[] { 0b01001001, 0b01010010 }, 8, false)] + [InlineData(new byte[] { 0b01001001, 0b01010010 }, 14, true)] + public void GetsCorrectBitForIndex(byte[] data, int index, bool expectedValue) + { + Assert.Equal(expectedValue, + BitUtility.GetBit(data, index)); + } + + [Theory] + [InlineData(null, 0)] + [InlineData(new byte[] { 0b00000000 }, -1)] + public void ThrowsWhenBitIndexOutOfRange(byte[] data, int index) + { + Assert.Throws<IndexOutOfRangeException>(() => + BitUtility.GetBit(data, index)); + } + } + + public class SetBit + { + [Theory] + [InlineData(new byte[] { 0b00000000 }, 0, new byte[] { 0b00000001 })] + [InlineData(new byte[] { 0b00000000 }, 2, new byte[] { 0b00000100 })] + [InlineData(new byte[] { 0b00000000 }, 7, new byte[] { 0b10000000 })] + [InlineData(new byte[] { 0b00000000, 0b00000000 }, 8, new byte[] { 0b00000000, 0b00000001 })] + [InlineData(new byte[] { 0b00000000, 0b00000000 }, 15, new byte[] { 0b00000000, 0b10000000 })] + public void SetsBitAtIndex(byte[] data, int index, byte[] expectedValue) + { + BitUtility.SetBit(data, index); + Assert.Equal(expectedValue, data); + } + } + + public class ClearBit + { + [Theory] + [InlineData(new byte[] { 0b00000001 }, 0, new byte[] { 0b00000000 })] + [InlineData(new byte[] { 0b00000010 }, 1, new byte[] { 0b00000000 })] + [InlineData(new byte[] { 0b10000001 }, 7, new byte[] { 0b00000001 })] + [InlineData(new byte[] { 0b11111111, 0b11111111 }, 15, new byte[] { 0b11111111, 0b01111111 })] + public void ClearsBitAtIndex(byte[] data, int index, byte[] expectedValue) + { + BitUtility.ClearBit(data, index); + Assert.Equal(expectedValue, data); + } + } + + public class RoundUpToMultipleOf64 + { + [Theory] + [InlineData(0, 0)] + [InlineData(1, 64)] + [InlineData(63, 64)] + [InlineData(64, 64)] + [InlineData(65, 128)] + [InlineData(129, 192)] + public void ReturnsNextMultiple(int size, int expectedSize) + { + Assert.Equal(expectedSize, + BitUtility.RoundUpToMultipleOf64(size)); + } + + [Theory] + [InlineData(0)] + [InlineData(-1)] + public void ReturnsZeroWhenSizeIsLessThanOrEqualToZero(int size) + { + Assert.Equal(0, + BitUtility.RoundUpToMultipleOf64(size)); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/BooleanArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/BooleanArrayTests.cs new file mode 100644 index 000000000..efac07dba --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/BooleanArrayTests.cs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class BooleanArrayTests + { + public class Builder + { + public class Append + { + [Theory] + [InlineData(1)] + [InlineData(3)] + public void IncrementsLength(int count) + { + var builder = new BooleanArray.Builder(); + + for (var i = 0; i < count; i++) + { + builder.Append(true); + } + + var array = builder.Build(); + + Assert.Equal(count, array.Length); + } + + [Fact] + public void AppendsExpectedBit() + { + var array1 = new BooleanArray.Builder() + .Append(false) + .Build(); + + Assert.False(array1.GetValue(0).Value); + + var array2 = new BooleanArray.Builder() + .Append(true) + .Build(); + + Assert.True(array2.GetValue(0).Value); + } + } + + public class Clear + { + [Fact] + public void SetsAllBitsToDefault() + { + var array = new BooleanArray.Builder() + .Resize(8) + .Set(0, true) + .Set(7, true) + .Clear() + .Build(); + + for (var i = 0; i < array.Length; i++) + { + Assert.False(array.GetValue(i).Value); + } + } + } + + public class Toggle + { + [Theory] + [InlineData(8, 1)] + [InlineData(16, 13)] + public void TogglesExpectedBitToFalse(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, true) + .Toggle(index) + .Build(); + + Assert.False(array.GetValue(index).Value); + } + + [Theory] + [InlineData(8, 1)] + [InlineData(16, 13)] + public void TogglesExpectedBitToTreu(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, false) + .Toggle(index) + .Build(); + + Assert.True(array.GetValue(index).Value); + } + + [Fact] + public void ThrowsWhenIndexOutOfRange() + { + Assert.Throws<ArgumentOutOfRangeException>(() => + { + var builder = new BooleanArray.Builder(); + builder.Toggle(8); + }); + } + } + + public class Swap + { + [Fact] + public void SwapsExpectedBits() + { + var array = new BooleanArray.Builder() + .AppendRange(Enumerable.Repeat(false, 8)) + .Set(0, true) + .Swap(0, 7) + .Build(); + + Assert.True(array.GetValue(0).HasValue); + Assert.False(array.GetValue(0).Value); + Assert.True(array.GetValue(7).HasValue); + Assert.True(array.GetValue(7).Value); + #pragma warning disable CS0618 + Assert.False(array.GetBoolean(0)); + Assert.True(array.GetBoolean(7)); + #pragma warning restore CS0618 + } + + [Fact] + public void ThrowsWhenIndexOutOfRange() + { + Assert.Throws<ArgumentOutOfRangeException>(() => + { + var builder = new BooleanArray.Builder(); + builder.Swap(0, 1); + }); + } + } + + public class Set + { + [Theory] + [InlineData(8, 0)] + [InlineData(8, 4)] + [InlineData(8, 7)] + [InlineData(16, 8)] + [InlineData(16, 15)] + public void SetsExpectedBitToTrue(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, true) + .Build(); + + Assert.True(array.GetValue(index).Value); + } + + [Theory] + [InlineData(8, 0)] + [InlineData(8, 4)] + [InlineData(8, 7)] + [InlineData(16, 8)] + [InlineData(16, 15)] + public void SetsExpectedBitsToFalse(int length, int index) + { + var array = new BooleanArray.Builder() + .Resize(length) + .Set(index, false) + .Build(); + + Assert.False(array.GetValue(index).Value); + } + + [Theory] + [InlineData(4)] + public void UnsetBitsAreUnchanged(int index) + { + var array = new BooleanArray.Builder() + .AppendRange(Enumerable.Repeat(false, 8)) + .Set(index, true) + .Build(); + + for (var i = 0; i < 8; i++) + { + if (i != index) + { + Assert.True(array.GetValue(i).HasValue); + Assert.False(array.GetValue(i).Value); + #pragma warning disable CS0618 + Assert.False(array.GetBoolean(i)); + #pragma warning restore CS0618 + } + } + } + + [Fact] + public void ThrowsWhenIndexOutOfRange() + { + Assert.Throws<ArgumentOutOfRangeException>(() => + { + var builder = new BooleanArray.Builder(); + builder.Set(builder.Length, false); + }); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/ColumnTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/ColumnTests.cs new file mode 100644 index 000000000..b90c68162 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/ColumnTests.cs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class ColumnTests + { + public static Array MakeIntArray(int length) + { + // The following should be improved once the ArrayBuilder PR goes in + var intBuilder = new ArrowBuffer.Builder<int>(); + intBuilder.AppendRange(Enumerable.Range(0, length).Select(x => x)); + ArrowBuffer buffer = intBuilder.Build(); + ArrayData intData = new ArrayData(Int32Type.Default, length, 0, 0, new[] { ArrowBuffer.Empty, buffer }); + Array intArray = ArrowArrayFactory.BuildArray(intData) as Array; + return intArray; + } + + [Fact] + public void TestColumn() + { + Array intArray = MakeIntArray(10); + Array intArrayCopy = MakeIntArray(10); + + Field field = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Column column = new Column(field, new[] { intArray, intArrayCopy }); + + Assert.True(column.Name == field.Name); + Assert.True(column.Field == field); + Assert.Equal(20, column.Length); + Assert.Equal(0, column.NullCount); + Assert.Equal(field.DataType, column.Type); + + Column slice5 = column.Slice(0, 5); + Assert.Equal(5, slice5.Length); + Column sliceFull = column.Slice(2); + Assert.Equal(18, sliceFull.Length); + Column sliceMore = column.Slice(0, 25); + Assert.Equal(20, sliceMore.Length); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs new file mode 100644 index 000000000..0d6aad96e --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class Date32ArrayTests + { + public static IEnumerable<object[]> GetDatesData() => + TestDateAndTimeData.ExampleDates.Select(d => new object[] { d }); + + public static IEnumerable<object[]> GetDateTimesData() => + TestDateAndTimeData.ExampleDateTimes.Select(dt => new object[] { dt }); + + public static IEnumerable<object[]> GetDateTimeOffsetsData() => + TestDateAndTimeData.ExampleDateTimeOffsets.Select(dto => new object[] { dto }); + + public class AppendNull + { + [Fact] + public void AppendThenGetGivesNull() + { + // Arrange + var builder = new Date32Array.Builder(); + + // Act + builder = builder.AppendNull(); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Null(array.GetDateTime(0)); + Assert.Null(array.GetDateTimeOffset(0)); + Assert.Null(array.GetValue(0)); + } + } + + public class AppendDateTime + { + [Theory] + [MemberData(nameof(GetDatesData), MemberType = typeof(Date32ArrayTests))] + public void AppendDateGivesSameDate(DateTime date) + { + // Arrange + var builder = new Date32Array.Builder(); + var expectedDateTime = date; + var expectedDateTimeOffset = + new DateTimeOffset(DateTime.SpecifyKind(date, DateTimeKind.Unspecified), TimeSpan.Zero); + int expectedValue = (int)date.Subtract(new DateTime(1970, 1, 1)).TotalDays; + + // Act + builder = builder.Append(date); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + } + + [Theory] + [MemberData(nameof(GetDateTimesData), MemberType = typeof(Date32ArrayTests))] + public void AppendWithTimeGivesSameWithTimeIgnored(DateTime dateTime) + { + // Arrange + var builder = new Date32Array.Builder(); + var expectedDateTime = dateTime.Date; + var expectedDateTimeOffset = + new DateTimeOffset(DateTime.SpecifyKind(dateTime.Date, DateTimeKind.Unspecified), TimeSpan.Zero); + int expectedValue = (int)dateTime.Date.Subtract(new DateTime(1970, 1, 1)).TotalDays; + + // Act + builder = builder.Append(dateTime); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + } + } + + public class AppendDateTimeOffset + { + [Theory] + [MemberData(nameof(GetDateTimeOffsetsData), MemberType = typeof(Date32ArrayTests))] + public void AppendGivesUtcDate(DateTimeOffset dateTimeOffset) + { + // Arrange + var builder = new Date32Array.Builder(); + var expectedDateTime = dateTimeOffset.UtcDateTime.Date; + var expectedDateTimeOffset = new DateTimeOffset(dateTimeOffset.UtcDateTime.Date, TimeSpan.Zero); + int expectedValue = (int)dateTimeOffset.UtcDateTime.Date.Subtract(new DateTime(1970, 1, 1)).TotalDays; + + // Act + builder = builder.Append(dateTimeOffset); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Date64ArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Date64ArrayTests.cs new file mode 100644 index 000000000..65cffc84e --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Date64ArrayTests.cs @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class Date64ArrayTests + { + private const long MillisecondsPerDay = 86400000; + + public static IEnumerable<object[]> GetDatesData() => + TestDateAndTimeData.ExampleDates.Select(d => new object[] { d }); + + public static IEnumerable<object[]> GetDateTimesData() => + TestDateAndTimeData.ExampleDateTimes.Select(dt => new object[] { dt }); + + public static IEnumerable<object[]> GetDateTimeOffsetsData() => + TestDateAndTimeData.ExampleDateTimeOffsets.Select(dto => new object[] { dto }); + + public class AppendNull + { + [Fact] + public void AppendThenGetGivesNull() + { + // Arrange + var builder = new Date64Array.Builder(); + + // Act + builder = builder.AppendNull(); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Null(array.GetDateTime(0)); + Assert.Null(array.GetDateTimeOffset(0)); + Assert.Null(array.GetValue(0)); + } + } + + public class AppendDateTime + { + [Theory] + [MemberData(nameof(GetDatesData), MemberType = typeof(Date64ArrayTests))] + public void AppendDateGivesSameDate(DateTime date) + { + // Arrange + var builder = new Date64Array.Builder(); + var expectedDateTime = date; + var expectedDateTimeOffset = + new DateTimeOffset(DateTime.SpecifyKind(date, DateTimeKind.Unspecified), TimeSpan.Zero); + long expectedValue = (long)date.Subtract(new DateTime(1970, 1, 1)).TotalDays * MillisecondsPerDay; + + // Act + builder = builder.Append(date); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + Assert.Equal(0, array.GetValue(0).Value % MillisecondsPerDay); + } + + [Theory] + [MemberData(nameof(GetDateTimesData), MemberType = typeof(Date64ArrayTests))] + public void AppendWithTimeGivesSameWithTimeIgnored(DateTime dateTime) + { + // Arrange + var builder = new Date64Array.Builder(); + var expectedDateTime = dateTime.Date; + var expectedDateTimeOffset = + new DateTimeOffset(DateTime.SpecifyKind(dateTime.Date, DateTimeKind.Unspecified), TimeSpan.Zero); + long expectedValue = + (long)dateTime.Date.Subtract(new DateTime(1970, 1, 1)).TotalDays * MillisecondsPerDay; + + // Act + builder = builder.Append(dateTime); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + Assert.Equal(0, array.GetValue(0).Value % MillisecondsPerDay); + } + } + + public class AppendDateTimeOffset + { + [Theory] + [MemberData(nameof(GetDateTimeOffsetsData), MemberType = typeof(Date64ArrayTests))] + public void AppendGivesUtcDate(DateTimeOffset dateTimeOffset) + { + // Arrange + var builder = new Date64Array.Builder(); + var expectedDateTime = dateTimeOffset.UtcDateTime.Date; + var expectedDateTimeOffset = new DateTimeOffset(dateTimeOffset.UtcDateTime.Date, TimeSpan.Zero); + long expectedValue = + (long)dateTimeOffset.UtcDateTime.Date.Subtract(new DateTime(1970, 1, 1)).TotalDays * + MillisecondsPerDay; + + // Act + builder = builder.Append(dateTimeOffset); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(expectedDateTime, array.GetDateTime(0)); + Assert.Equal(expectedDateTimeOffset, array.GetDateTimeOffset(0)); + Assert.Equal(expectedValue, array.GetValue(0)); + Assert.Equal(0, array.GetValue(0).Value % MillisecondsPerDay); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs new file mode 100644 index 000000000..68f8ee02b --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class Decimal128ArrayTests + { + public class Builder + { + public class AppendNull + { + [Fact] + public void AppendThenGetGivesNull() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(8,2)); + + // Act + + builder = builder.AppendNull(); + builder = builder.AppendNull(); + builder = builder.AppendNull(); + // Assert + var array = builder.Build(); + + Assert.Equal(3, array.Length); + Assert.Equal(array.Data.Buffers[1].Length, array.ByteWidth * 3); + Assert.Null(array.GetValue(0)); + Assert.Null(array.GetValue(1)); + Assert.Null(array.GetValue(2)); + } + } + + public class Append + { + [Theory] + [InlineData(200)] + public void AppendDecimal(int count) + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(14, 10)); + + // Act + decimal?[] testData = new decimal?[count]; + for (int i = 0; i < count; i++) + { + if (i == count - 2) + { + builder.AppendNull(); + testData[i] = null; + continue; + } + decimal rnd = i * (decimal)Math.Round(new Random().NextDouble(),10); + testData[i] = rnd; + builder.Append(rnd); + } + + // Assert + var array = builder.Build(); + Assert.Equal(count, array.Length); + for (int i = 0; i < count; i++) + { + Assert.Equal(testData[i], array.GetValue(i)); + } + } + + [Fact] + public void AppendLargeDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(26, 2)); + decimal large = 999999999999909999999999.80M; + // Act + builder.Append(large); + builder.Append(-large); + + // Assert + var array = builder.Build(); + Assert.Equal(large, array.GetValue(0)); + Assert.Equal(-large, array.GetValue(1)); + } + + [Fact] + public void AppendFractionalDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(26, 20)); + decimal fraction = 0.99999999999990999992M; + // Act + builder.Append(fraction); + builder.Append(-fraction); + + // Assert + var array = builder.Build(); + Assert.Equal(fraction, array.GetValue(0)); + Assert.Equal(-fraction, array.GetValue(1)); + } + + [Fact] + public void AppendRangeDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)); + var range = new decimal[] {2.123M, 1.5984M, -0.0000001M, 9878987987987987.1235407M}; + + // Act + builder.AppendRange(range); + builder.AppendNull(); + + // Assert + var array = builder.Build(); + for(int i = 0; i < range.Length; i ++) + { + Assert.Equal(range[i], array.GetValue(i)); + } + + Assert.Null( array.GetValue(range.Length)); + } + + [Fact] + public void AppendClearAppendDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)); + + // Act + builder.Append(1); + builder.Clear(); + builder.Append(10); + + // Assert + var array = builder.Build(); + Assert.Equal(10, array.GetValue(0)); + } + + [Fact] + public void AppendInvalidPrecisionAndScaleDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(2, 1)); + + // Assert + Assert.Throws<OverflowException>(() => builder.Append(100)); + Assert.Throws<OverflowException>(() => builder.Append(0.01M)); + builder.Append(-9.9M); + builder.Append(0); + builder.Append(9.9M); + } + } + + public class Set + { + [Fact] + public void SetDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)) + .Resize(1); + + // Act + builder.Set(0, 50.123456M); + builder.Set(0, 1.01M); + + // Assert + var array = builder.Build(); + Assert.Equal(1.01M, array.GetValue(0)); + } + + [Fact] + public void SetNull() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)) + .Resize(1); + + // Act + builder.Set(0, 50.123456M); + builder.SetNull(0); + + // Assert + var array = builder.Build(); + Assert.Null(array.GetValue(0)); + } + } + + public class Swap + { + [Fact] + public void SetDecimal() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)); + + // Act + builder.Append(123.45M); + builder.Append(678.9M); + builder.Swap(0, 1); + + // Assert + var array = builder.Build(); + Assert.Equal(678.9M, array.GetValue(0)); + Assert.Equal(123.45M, array.GetValue(1)); + } + + [Fact] + public void SwapNull() + { + // Arrange + var builder = new Decimal128Array.Builder(new Decimal128Type(24, 8)); + + // Act + builder.Append(123.456M); + builder.AppendNull(); + builder.Swap(0, 1); + + // Assert + var array = builder.Build(); + Assert.Null(array.GetValue(0)); + Assert.Equal(123.456M, array.GetValue(1)); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs new file mode 100644 index 000000000..35b68823d --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class Decimal256ArrayTests + { + public class Builder + { + public class AppendNull + { + [Fact] + public void AppendThenGetGivesNull() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(8,2)); + + // Act + + builder = builder.AppendNull(); + builder = builder.AppendNull(); + builder = builder.AppendNull(); + // Assert + var array = builder.Build(); + + Assert.Equal(3, array.Length); + Assert.Equal(array.Data.Buffers[1].Length, array.ByteWidth * 3); + Assert.Null(array.GetValue(0)); + Assert.Null(array.GetValue(1)); + Assert.Null(array.GetValue(2)); + } + } + + public class Append + { + [Theory] + [InlineData(200)] + public void AppendDecimal(int count) + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(14, 10)); + + // Act + decimal?[] testData = new decimal?[count]; + for (int i = 0; i < count; i++) + { + if (i == count - 2) + { + builder.AppendNull(); + testData[i] = null; + continue; + } + decimal rnd = i * (decimal)Math.Round(new Random().NextDouble(),10); + testData[i] = rnd; + builder.Append(rnd); + } + + // Assert + var array = builder.Build(); + Assert.Equal(count, array.Length); + for (int i = 0; i < count; i++) + { + Assert.Equal(testData[i], array.GetValue(i)); + } + } + + [Fact] + public void AppendLargeDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(26, 2)); + decimal large = 999999999999909999999999.80M; + // Act + builder.Append(large); + builder.Append(-large); + + // Assert + var array = builder.Build(); + Assert.Equal(large, array.GetValue(0)); + Assert.Equal(-large, array.GetValue(1)); + } + + [Fact] + public void AppendFractionalDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(26, 20)); + decimal fraction = 0.99999999999990999992M; + // Act + builder.Append(fraction); + builder.Append(-fraction); + + // Assert + var array = builder.Build(); + Assert.Equal(fraction, array.GetValue(0)); + Assert.Equal(-fraction, array.GetValue(1)); + } + + [Fact] + public void AppendRangeDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)); + var range = new decimal[] {2.123M, 1.5984M, -0.0000001M, 9878987987987987.1235407M}; + + // Act + builder.AppendRange(range); + builder.AppendNull(); + + // Assert + var array = builder.Build(); + for(int i = 0; i < range.Length; i ++) + { + Assert.Equal(range[i], array.GetValue(i)); + } + + Assert.Null( array.GetValue(range.Length)); + } + + [Fact] + public void AppendClearAppendDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)); + + // Act + builder.Append(1); + builder.Clear(); + builder.Append(10); + + // Assert + var array = builder.Build(); + Assert.Equal(10, array.GetValue(0)); + } + + [Fact] + public void AppendInvalidPrecisionAndScaleDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(2, 1)); + + // Assert + Assert.Throws<OverflowException>(() => builder.Append(100)); + Assert.Throws<OverflowException>(() => builder.Append(0.01M)); + builder.Append(-9.9M); + builder.Append(0); + builder.Append(9.9M); + } + } + + public class Set + { + [Fact] + public void SetDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)) + .Resize(1); + + // Act + builder.Set(0, 50.123456M); + builder.Set(0, 1.01M); + + // Assert + var array = builder.Build(); + Assert.Equal(1.01M, array.GetValue(0)); + } + + [Fact] + public void SetNull() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)) + .Resize(1); + + // Act + builder.Set(0, 50.123456M); + builder.SetNull(0); + + // Assert + var array = builder.Build(); + Assert.Null(array.GetValue(0)); + } + } + + public class Swap + { + [Fact] + public void SetDecimal() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)); + + // Act + builder.Append(123.45M); + builder.Append(678.9M); + builder.Swap(0, 1); + + // Assert + var array = builder.Build(); + Assert.Equal(678.9M, array.GetValue(0)); + Assert.Equal(123.45M, array.GetValue(1)); + } + + [Fact] + public void SwapNull() + { + // Arrange + var builder = new Decimal256Array.Builder(new Decimal256Type(24, 8)); + + // Act + builder.Append(123.456M); + builder.AppendNull(); + builder.Swap(0, 1); + + // Assert + var array = builder.Build(); + Assert.Null(array.GetValue(0)); + Assert.Equal(123.456M, array.GetValue(1)); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs new file mode 100644 index 000000000..d235524d9 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class DecimalUtilityTests + { + public class Overflow + { + [Theory] + [InlineData(100.123, 10, 4, false)] + [InlineData(100.123, 6, 4, false)] + [InlineData(100.123, 3, 3, true)] + [InlineData(100.123, 10, 2, true)] + [InlineData(100.123, 5, 2, true)] + [InlineData(100.123, 5, 3, true)] + [InlineData(100.123, 6, 3, false)] + public void HasExpectedResultOrThrows(decimal d, int precision , int scale, bool shouldThrow) + { + var builder = new Decimal128Array.Builder(new Decimal128Type(precision, scale)); + + if (shouldThrow) + { + Assert.Throws<OverflowException>(() => builder.Append(d)); + } + else + { + builder.Append(d); + var result = builder.Build(new TestMemoryAllocator()); + Assert.Equal(d, result.GetValue(0)); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/DictionaryArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/DictionaryArrayTests.cs new file mode 100644 index 000000000..da678563c --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/DictionaryArrayTests.cs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class DictionaryArrayTests + { + [Fact] + public void CreateTest() + { + (StringArray originalDictionary, Int32Array originalIndicesArray, DictionaryArray dictionaryArray) = + CreateSimpleTestData(); + + Assert.Equal(dictionaryArray.Dictionary, originalDictionary); + Assert.Equal(dictionaryArray.Indices, originalIndicesArray); + } + + [Fact] + public void SliceTest() + { + (StringArray originalDictionary, Int32Array originalIndicesArray, DictionaryArray dictionaryArray) = + CreateSimpleTestData(); + + int batchLength = originalIndicesArray.Length; + for (int offset = 0; offset < batchLength; offset++) + { + for (int length = 1; offset + length <= batchLength; length++) + { + var sliced = dictionaryArray.Slice(offset, length) as DictionaryArray; + var actualSlicedDictionary = sliced.Dictionary as StringArray; + var actualSlicedIndicesArray = sliced.Indices as Int32Array; + + var expectedSlicedIndicesArray = originalIndicesArray.Slice(offset, length) as Int32Array; + + //Dictionary is not sliced. + Assert.Equal(originalDictionary.Data, actualSlicedDictionary.Data); + Assert.Equal(expectedSlicedIndicesArray.ToList(), actualSlicedIndicesArray.ToList()); + } + } + } + + private Tuple<StringArray, Int32Array, DictionaryArray> CreateSimpleTestData() + { + StringArray originalDictionary = new StringArray.Builder().AppendRange(new[] { "a", "b", "c" }).Build(); + Int32Array originalIndicesArray = new Int32Array.Builder().AppendRange(new[] { 0, 0, 1, 1, 2, 2 }).Build(); + var dictionaryArray = new DictionaryArray(new DictionaryType(Int32Type.Default, StringType.Default, false), originalIndicesArray, originalDictionary); + + return Tuple.Create(originalDictionary, originalIndicesArray, dictionaryArray); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs new file mode 100644 index 000000000..4375c39cd --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace Apache.Arrow.Tests +{ + public static class DateTimeOffsetExtensions + { + public static DateTimeOffset Truncate(this DateTimeOffset dateTimeOffset, TimeSpan offset) + { + if (offset == TimeSpan.Zero) + { + return dateTimeOffset; + } + + if (dateTimeOffset == DateTimeOffset.MinValue || dateTimeOffset == DateTimeOffset.MaxValue) + { + return dateTimeOffset; + } + + return dateTimeOffset.AddTicks(-(dateTimeOffset.Ticks % offset.Ticks)); + } + + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/FieldComparer.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/FieldComparer.cs new file mode 100644 index 000000000..d7dcc398f --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/FieldComparer.cs @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public static class FieldComparer + { + public static void Compare(Field expected, Field actual) + { + if (ReferenceEquals(expected, actual)) + { + return; + } + + Assert.Equal(expected.Name, actual.Name); + Assert.Equal(expected.IsNullable, actual.IsNullable); + + Assert.Equal(expected.HasMetadata, actual.HasMetadata); + if (expected.HasMetadata) + { + Assert.Equal(expected.Metadata.Keys.Count(), actual.Metadata.Keys.Count()); + Assert.True(expected.Metadata.Keys.All(k => actual.Metadata.ContainsKey(k) && expected.Metadata[k] == actual.Metadata[k])); + Assert.True(actual.Metadata.Keys.All(k => expected.Metadata.ContainsKey(k) && actual.Metadata[k] == expected.Metadata[k])); + } + + actual.DataType.Accept(new ArrayTypeComparer(expected.DataType)); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryAllocatorFixture.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryAllocatorFixture.cs new file mode 100644 index 000000000..276caf1ba --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/Fixtures/DefaultMemoryAllocatorFixture.cs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; + +namespace Apache.Arrow.Tests.Fixtures +{ + public class DefaultMemoryAllocatorFixture + { + public MemoryAllocator MemoryAllocator { get; } + + public DefaultMemoryAllocatorFixture() + { + const int alignment = 64; + + MemoryAllocator = new NativeMemoryAllocator(alignment); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaBuilderTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaBuilderTests.cs new file mode 100644 index 000000000..6ddbcd204 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaBuilderTests.cs @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; +using Xunit.Sdk; + +namespace Apache.Arrow.Tests +{ + public class SchemaBuilderTests + { + public class Build + { + [Fact] + public void FieldsAreNullableByDefault() + { + var b = new Schema.Builder(); + + var schema = new Schema.Builder() + .Field(f => f.Name("f0").DataType(Int32Type.Default)) + .Build(); + + Assert.True(schema.Fields["f0"].IsNullable); + } + + [Fact] + public void FieldsHaveNullTypeByDefault() + { + var schema = new Schema.Builder() + .Field(f => f.Name("f0")) + .Build(); + + Assert.True(schema.Fields["f0"].DataType.GetType() == typeof(NullType)); + } + + [Fact] + public void FieldNameIsRequired() + { + Assert.Throws<ArgumentNullException>(() => + { + var schema = new Schema.Builder() + .Field(f => f.DataType(Int32Type.Default)) + .Build(); + }); + } + + [Fact] + public void GetFieldIndex() + { + var schema = new Schema.Builder() + .Field(f => f.Name("f0").DataType(Int32Type.Default)) + .Field(f => f.Name("f1").DataType(Int8Type.Default)) + .Build(); + Assert.True(schema.GetFieldIndex("f0") == 0 && schema.GetFieldIndex("f1") == 1); + } + + + [Fact] + public void GetFieldByName() + { + Field f0 = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1 = new Field.Builder().Name("f1").DataType(Int8Type.Default).Build(); + + var schema = new Schema.Builder() + .Field(f0) + .Field(f1) + .Build(); + Assert.True(schema.GetFieldByName("f0") == f0 && schema.GetFieldByName("f1") == f1); + } + + [Fact] + public void MetadataConstruction() + { + + var metadata0 = new Dictionary<string, string> { { "foo", "bar" }, { "bizz", "buzz" } }; + var metadata1 = new Dictionary<string, string> { { "foo", "bar" } }; + var metadata0Copy = new Dictionary<string, string>(metadata0); + var metadata1Copy = new Dictionary<string, string>(metadata1); + Field f0 = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1 = new Field.Builder().Name("f1").DataType(UInt8Type.Default).Nullable(false).Build(); + Field f2 = new Field.Builder().Name("f2").DataType(StringType.Default).Build(); + Field f3 = new Field.Builder().Name("f2").DataType(StringType.Default).Metadata(metadata1Copy).Build(); + + var schema0 = new Schema.Builder() + .Field(f0) + .Field(f1) + .Field(f2) + .Metadata(metadata0) + .Build(); + var schema1 = new Schema.Builder() + .Field(f0) + .Field(f1) + .Field(f2) + .Metadata(metadata1) + .Build(); + var schema2 = new Schema.Builder() + .Field(f0) + .Field(f1) + .Field(f2) + .Metadata(metadata0Copy) + .Build(); + var schema3 = new Schema.Builder() + .Field(f0) + .Field(f1) + .Field(f3) + .Metadata(metadata0Copy) + .Build(); + + Assert.True(metadata0.Keys.SequenceEqual(schema0.Metadata.Keys) && metadata0.Values.SequenceEqual(schema0.Metadata.Values)); + Assert.True(metadata1.Keys.SequenceEqual(schema1.Metadata.Keys) && metadata1.Values.SequenceEqual(schema1.Metadata.Values)); + Assert.True(metadata0.Keys.SequenceEqual(schema2.Metadata.Keys) && metadata0.Values.SequenceEqual(schema2.Metadata.Values)); + SchemaComparer.Compare(schema0, schema2); + Assert.Throws<EqualException>(() => SchemaComparer.Compare(schema0, schema1)); + Assert.Throws<EqualException>(() => SchemaComparer.Compare(schema2, schema1)); + Assert.Throws<EqualException>(() => SchemaComparer.Compare(schema2, schema3)); + } + + [Theory] + [MemberData(nameof(SampleSchema1))] + public void FieldsHaveExpectedValues(string name, IArrowType type, bool nullable) + { + var schema = new Schema.Builder() + .Field(f => f.Name(name).DataType(type).Nullable(nullable)) + .Build(); + + var field = schema.Fields[name]; + + Assert.Equal(name, field.Name); + Assert.Equal(type.Name, field.DataType.Name); + Assert.Equal(nullable, field.IsNullable); + } + + public static IEnumerable<object[]> SampleSchema1() + { + yield return new object[] {"f0", Int32Type.Default, true}; + yield return new object[] {"f1", DoubleType.Default, true}; + yield return new object[] {"f2", Int64Type.Default, false}; + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs new file mode 100644 index 000000000..3546d5e0c --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/SchemaComparer.cs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Linq; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public static class SchemaComparer + { + public static void Compare(Schema expected, Schema actual) + { + if (ReferenceEquals(expected, actual)) + { + return; + } + + Assert.Equal(expected.HasMetadata, actual.HasMetadata); + if (expected.HasMetadata) + { + Assert.Equal(expected.Metadata.Keys.Count(), actual.Metadata.Keys.Count()); + Assert.True(expected.Metadata.Keys.All(k => actual.Metadata.ContainsKey(k) && expected.Metadata[k] == actual.Metadata[k])); + Assert.True(actual.Metadata.Keys.All(k => expected.Metadata.ContainsKey(k) && actual.Metadata[k] == expected.Metadata[k])); + } + + Assert.Equal(expected.Fields.Count, actual.Fields.Count); + Assert.True(expected.Fields.Keys.All(k => actual.Fields.ContainsKey(k))); + foreach (string name in expected.Fields.Keys) + { + FieldComparer.Compare(expected.Fields[name], actual.Fields[name]); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs new file mode 100644 index 000000000..e2d0fa851 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Ipc; +using Apache.Arrow.Types; +using System.Collections.Generic; +using System.IO; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class StructArrayTests + { + [Fact] + public void TestStructArray() + { + // The following can be improved with a Builder class for StructArray. + List<Field> fields = new List<Field>(); + Field.Builder fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build()); + fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()); + StructType structType = new StructType(fields); + + StringArray.Builder stringBuilder = new StringArray.Builder(); + StringArray stringArray = stringBuilder.Append("joe").AppendNull().AppendNull().Append("mark").Build(); + Int32Array.Builder intBuilder = new Int32Array.Builder(); + Int32Array intArray = intBuilder.Append(1).Append(2).AppendNull().Append(4).Build(); + List<Array> arrays = new List<Array>(); + arrays.Add(stringArray); + arrays.Add(intArray); + + ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); + var nullBitmapBuffer = nullBitmap.Append(true).Append(true).Append(false).Append(true).Build(); + StructArray structs = new StructArray(structType, 4, arrays, nullBitmapBuffer, 1); + + Assert.Equal(4, structs.Length); + Assert.Equal(1, structs.NullCount); + ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array + Assert.Equal(2, childArrays.Length); + for (int i = 0; i < childArrays.Length; i++) + { + ArrayData arrayData = childArrays[i]; + Assert.Null(arrayData.Children); + if (i == 0) + { + Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); + Array array = new StringArray(arrayData); + StringArray structStringArray = array as StringArray; + Assert.NotNull(structStringArray); + Assert.Equal(structs.Length, structStringArray.Length); + Assert.Equal(stringArray.Length, structStringArray.Length); + Assert.Equal(stringArray.NullCount, structStringArray.NullCount); + for (int j = 0; j < stringArray.Length; j++) + { + Assert.Equal(stringArray.GetString(j), structStringArray.GetString(j)); + } + } + if (i == 1) + { + Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); + Array array = new Int32Array(arrayData); + Int32Array structIntArray = array as Int32Array; + Assert.NotNull(structIntArray); + Assert.Equal(structs.Length, structIntArray.Length); + Assert.Equal(intArray.Length, structIntArray.Length); + Assert.Equal(intArray.NullCount, structIntArray.NullCount); + for (int j = 0; j < intArray.Length; j++) + { + Assert.Equal(intArray.GetValue(j), structIntArray.GetValue(j)); + } + } + } + } + + [Fact] + public void TestListOfStructArray() + { + Schema.Builder builder = new Schema.Builder(); + Field structField = new Field( + "struct", + new StructType( + new[] + { + new Field("name", StringType.Default, nullable: false), + new Field("age", Int64Type.Default, nullable: false), + }), + nullable: false); + + Field listField = new Field("listOfStructs", new ListType(structField), nullable: false); + builder.Field(listField); + Schema schema = builder.Build(); + + StringArray stringArray = new StringArray.Builder() + .Append("joe").AppendNull().AppendNull().Append("mark").Append("abe").Append("phil").Build(); + Int64Array intArray = new Int64Array.Builder() + .Append(1).Append(2).AppendNull().Append(4).Append(10).Append(55).Build(); + + ArrowBuffer nullBitmapBuffer = new ArrowBuffer.BitmapBuilder() + .Append(true).Append(true).Append(false).Append(true).Append(true).Append(true).Build(); + + StructArray structs = new StructArray(structField.DataType, 6, new IArrowArray[] { stringArray, intArray }, nullBitmapBuffer, nullCount: 1); + + ArrowBuffer offsetsBuffer = new ArrowBuffer.Builder<int>() + .Append(0).Append(2).Append(5).Append(6).Build(); + ListArray listArray = new ListArray(listField.DataType, 3, offsetsBuffer, structs, ArrowBuffer.Empty); + + RecordBatch batch = new RecordBatch(schema, new[] { listArray }, 3); + TestRoundTripRecordBatch(batch); + } + + private static void TestRoundTripRecordBatch(RecordBatch originalBatch) + { + using (MemoryStream stream = new MemoryStream()) + { + using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true)) + { + writer.WriteRecordBatch(originalBatch); + writer.WriteEnd(); + } + + stream.Position = 0; + + using (var reader = new ArrowStreamReader(stream)) + { + RecordBatch newBatch = reader.ReadNextRecordBatch(); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + } + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/TableTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/TableTests.cs new file mode 100644 index 000000000..b919bf3b6 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/TableTests.cs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class TableTests + { + public static Table MakeTableWithOneColumnOfTwoIntArrays(int lengthOfEachArray) + { + Array intArray = ColumnTests.MakeIntArray(lengthOfEachArray); + Array intArrayCopy = ColumnTests.MakeIntArray(lengthOfEachArray); + + Field field = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Schema s0 = new Schema.Builder().Field(field).Build(); + + Column column = new Column(field, new List<Array> { intArray, intArrayCopy }); + Table table = new Table(s0, new List<Column> { column }); + return table; + } + + [Fact] + public void TestEmptyTable() + { + Table table = new Table(); + Assert.Equal(0, table.ColumnCount); + Assert.Equal(0, table.RowCount); + } + + [Fact] + public void TestTableBasics() + { + Table table = MakeTableWithOneColumnOfTwoIntArrays(10); + Assert.Equal(20, table.RowCount); + Assert.Equal(1, table.ColumnCount); + } + + [Fact] + public void TestTableAddRemoveAndSetColumn() + { + Table table = MakeTableWithOneColumnOfTwoIntArrays(10); + + Array nonEqualLengthIntArray = ColumnTests.MakeIntArray(10); + Field field1 = new Field.Builder().Name("f1").DataType(Int32Type.Default).Build(); + Column nonEqualLengthColumn = new Column(field1, new[] { nonEqualLengthIntArray}); + Assert.Throws<ArgumentException>(() => table.InsertColumn(-1, nonEqualLengthColumn)); + Assert.Throws<ArgumentException>(() => table.InsertColumn(1, nonEqualLengthColumn)); + + Array equalLengthIntArray = ColumnTests.MakeIntArray(20); + Field field2 = new Field.Builder().Name("f2").DataType(Int32Type.Default).Build(); + Column equalLengthColumn = new Column(field2, new[] { equalLengthIntArray}); + Column existingColumn = table.Column(0); + + Table newTable = table.InsertColumn(0, equalLengthColumn); + Assert.Equal(2, newTable.ColumnCount); + Assert.True(newTable.Column(0) == equalLengthColumn); + Assert.True(newTable.Column(1) == existingColumn); + + newTable = newTable.RemoveColumn(1); + Assert.Equal(1, newTable.ColumnCount); + Assert.True(newTable.Column(0) == equalLengthColumn); + + newTable = table.SetColumn(0, existingColumn); + Assert.True(newTable.Column(0) == existingColumn); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/TestData.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/TestData.cs new file mode 100644 index 000000000..9b6d0cf8b --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -0,0 +1,321 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow.Tests +{ + public static class TestData + { + public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false) + { + return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray); + } + + public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays) + { + Schema.Builder builder = new Schema.Builder(); + for (int i = 0; i < columnSetCount; i++) + { + builder.Field(CreateField(new ListType(Int64Type.Default), i)); + builder.Field(CreateField(BooleanType.Default, i)); + builder.Field(CreateField(UInt8Type.Default, i)); + builder.Field(CreateField(Int8Type.Default, i)); + builder.Field(CreateField(UInt16Type.Default, i)); + builder.Field(CreateField(Int16Type.Default, i)); + builder.Field(CreateField(UInt32Type.Default, i)); + builder.Field(CreateField(Int32Type.Default, i)); + builder.Field(CreateField(UInt64Type.Default, i)); + builder.Field(CreateField(Int64Type.Default, i)); + builder.Field(CreateField(FloatType.Default, i)); + builder.Field(CreateField(DoubleType.Default, i)); + builder.Field(CreateField(Date32Type.Default, i)); + builder.Field(CreateField(Date64Type.Default, i)); + builder.Field(CreateField(TimestampType.Default, i)); + builder.Field(CreateField(StringType.Default, i)); + builder.Field(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); + builder.Field(CreateField(new Decimal128Type(10, 6), i)); + builder.Field(CreateField(new Decimal256Type(16, 8), i)); + + if (createAdvancedTypeArrays) + { + builder.Field(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i)); + builder.Field(CreateField(new FixedSizeBinaryType(16), i)); + } + + //builder.Field(CreateField(HalfFloatType.Default)); + //builder.Field(CreateField(StringType.Default)); + //builder.Field(CreateField(Time32Type.Default)); + //builder.Field(CreateField(Time64Type.Default)); + } + + Schema schema = builder.Build(); + + return CreateSampleRecordBatch(schema, length); + } + + public static RecordBatch CreateSampleRecordBatch(Schema schema, int length) + { + IEnumerable<IArrowArray> arrays = CreateArrays(schema, length); + + return new RecordBatch(schema, arrays, length); + } + + private static Field CreateField(ArrowType type, int iteration) + { + return new Field(type.Name + iteration, type, nullable: false); + } + + public static IEnumerable<IArrowArray> CreateArrays(Schema schema, int length) + { + int fieldCount = schema.Fields.Count; + List<IArrowArray> arrays = new List<IArrowArray>(fieldCount); + for (int i = 0; i < fieldCount; i++) + { + Field field = schema.GetFieldByIndex(i); + arrays.Add(CreateArray(field, length)); + } + return arrays; + } + + private static IArrowArray CreateArray(Field field, int length) + { + var creator = new ArrayCreator(length); + + field.DataType.Accept(creator); + + return creator.Array; + } + + private class ArrayCreator : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<Date32Type>, + IArrowTypeVisitor<Date64Type>, + IArrowTypeVisitor<Int8Type>, + IArrowTypeVisitor<Int16Type>, + IArrowTypeVisitor<Int32Type>, + IArrowTypeVisitor<Int64Type>, + IArrowTypeVisitor<UInt8Type>, + IArrowTypeVisitor<UInt16Type>, + IArrowTypeVisitor<UInt32Type>, + IArrowTypeVisitor<UInt64Type>, + IArrowTypeVisitor<FloatType>, + IArrowTypeVisitor<DoubleType>, + IArrowTypeVisitor<TimestampType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType>, + IArrowTypeVisitor<Decimal128Type>, + IArrowTypeVisitor<Decimal256Type>, + IArrowTypeVisitor<DictionaryType>, + IArrowTypeVisitor<FixedSizeBinaryType> + { + private int Length { get; } + public IArrowArray Array { get; private set; } + + public ArrayCreator(int length) + { + Length = length; + } + + public void Visit(BooleanType type) => GenerateArray(new BooleanArray.Builder(), x => x % 2 == 0); + public void Visit(Int8Type type) => GenerateArray(new Int8Array.Builder(), x => (sbyte)x); + public void Visit(Int16Type type) => GenerateArray(new Int16Array.Builder(), x => (short)x); + public void Visit(Int32Type type) => GenerateArray(new Int32Array.Builder(), x => x); + public void Visit(Int64Type type) => GenerateArray(new Int64Array.Builder(), x => x); + public void Visit(UInt8Type type) => GenerateArray(new UInt8Array.Builder(), x => (byte)x); + public void Visit(UInt16Type type) => GenerateArray(new UInt16Array.Builder(), x => (ushort)x); + public void Visit(UInt32Type type) => GenerateArray(new UInt32Array.Builder(), x => (uint)x); + public void Visit(UInt64Type type) => GenerateArray(new UInt64Array.Builder(), x => (ulong)x); + public void Visit(FloatType type) => GenerateArray(new FloatArray.Builder(), x => ((float)x / Length)); + public void Visit(DoubleType type) => GenerateArray(new DoubleArray.Builder(), x => ((double)x / Length)); + public void Visit(Decimal128Type type) + { + var builder = new Decimal128Array.Builder(type).Reserve(Length); + + for (var i = 0; i < Length; i++) + { + builder.Append((decimal)i / Length); + } + + Array = builder.Build(); + } + + public void Visit(Decimal256Type type) + { + var builder = new Decimal256Array.Builder(type).Reserve(Length); + + for (var i = 0; i < Length; i++) + { + builder.Append((decimal)i / Length); + } + + Array = builder.Build(); + } + + public void Visit(Date32Type type) + { + var builder = new Date32Array.Builder().Reserve(Length); + + // Length can be greater than the number of days since DateTime.MinValue. + // Set a cap for how many days can be subtracted from now. + int maxDays = Math.Min(Length, 100_000); + var basis = DateTimeOffset.UtcNow.AddDays(-maxDays); + + for (var i = 0; i < Length; i++) + { + builder.Append(basis.AddDays(i % maxDays)); + } + + Array = builder.Build(); + } + + public void Visit(Date64Type type) + { + var builder = new Date64Array.Builder().Reserve(Length); + var basis = DateTimeOffset.UtcNow.AddSeconds(-Length); + + for (var i = 0; i < Length; i++) + { + builder.Append(basis.AddSeconds(i)); + } + + Array = builder.Build(); + } + + public void Visit(TimestampType type) + { + var builder = new TimestampArray.Builder().Reserve(Length); + var basis = DateTimeOffset.UtcNow.AddMilliseconds(-Length); + + for (var i = 0; i < Length; i++) + { + builder.Append(basis.AddMilliseconds(i)); + } + + Array = builder.Build(); + } + + public void Visit(StringType type) + { + var str = "hello"; + var builder = new StringArray.Builder(); + + for (var i = 0; i < Length; i++) + { + builder.Append(str); + } + + Array = builder.Build(); + } + + public void Visit(ListType type) + { + var builder = new ListArray.Builder(type.ValueField).Reserve(Length); + + //Todo : Support various types + var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1); + + for (var i = 0; i < Length; i++) + { + builder.Append(); + valueBuilder.Append(i); + } + //Add a value to check if Values.Length can exceed ListArray.Length + valueBuilder.Append(0); + + Array = builder.Build(); + } + + public void Visit(StructType type) + { + IArrowArray[] childArrays = new IArrowArray[type.Fields.Count]; + for (int i = 0; i < childArrays.Length; i++) + { + childArrays[i] = CreateArray(type.Fields[i], Length); + } + + ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); + for (int i = 0; i < Length; i++) + { + nullBitmap.Append(true); + } + + Array = new StructArray(type, Length, childArrays, nullBitmap.Build()); + } + + public void Visit(DictionaryType type) + { + Int32Array.Builder indicesBuilder = new Int32Array.Builder().Reserve(Length); + StringArray.Builder valueBuilder = new StringArray.Builder().Reserve(Length); + + for (int i = 0; i < Length; i++) + { + indicesBuilder.Append(i); + valueBuilder.Append($"{i}"); + } + + Array = new DictionaryArray(type, indicesBuilder.Build(), valueBuilder.Build()); + } + + public void Visit(FixedSizeBinaryType type) + { + ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>(); + + int valueSize = type.BitWidth; + for (int i = 0; i < Length; i++) + { + valueBuilder.Append(Enumerable.Repeat((byte)i, valueSize).ToArray()); + } + + ArrowBuffer validityBuffer = ArrowBuffer.Empty; + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + ArrayData arrayData = new ArrayData(type, Length, 0, 0, new[] { validityBuffer, valueBuffer }); + Array = new FixedSizeBinaryArray(arrayData); + } + + private void GenerateArray<T, TArray, TArrayBuilder>(IArrowArrayBuilder<T, TArray, TArrayBuilder> builder, Func<int, T> generator) + where TArrayBuilder : IArrowArrayBuilder<T, TArray, TArrayBuilder> + where TArray : IArrowArray + where T : struct + { + for (var i = 0; i < Length; i++) + { + if (i == Length - 2) + { + builder.AppendNull(); + } + else + { + var value = generator(i); + builder.Append(value); + } + } + + Array = builder.Build(default); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException(); + } + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/TestDateAndTimeData.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/TestDateAndTimeData.cs new file mode 100644 index 000000000..1f2eae45b --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/TestDateAndTimeData.cs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Apache.Arrow.Tests +{ + /// <summary> + /// The <see cref="TestDateAndTimeData"/> class holds example dates and times useful for testing. + /// </summary> + internal static class TestDateAndTimeData + { + private static readonly DateTime _earliestDate = new DateTime(1, 1, 1); + private static readonly DateTime _latestDate = new DateTime(9999, 12, 31); + + private static readonly DateTime[] _exampleDates = + { + _earliestDate, new DateTime(1969, 12, 31), new DateTime(1970, 1, 1), new DateTime(1970, 1, 2), + new DateTime(1972, 6, 30), new DateTime(2015, 6, 30), new DateTime(2016, 12, 31), new DateTime(2020, 2, 29), + new DateTime(2020, 7, 1), _latestDate, + }; + + private static readonly TimeSpan[] _exampleTimes = + { + new TimeSpan(0, 0, 1), new TimeSpan(12, 0, 0), new TimeSpan(23, 59, 59), + }; + + private static readonly DateTimeKind[] _exampleKinds = + { + DateTimeKind.Local, DateTimeKind.Unspecified, DateTimeKind.Utc, + }; + + private static readonly TimeSpan[] _exampleOffsets = + { + TimeSpan.FromHours(-2), + TimeSpan.Zero, + TimeSpan.FromHours(2), + }; + + /// <summary> + /// Gets a collection of example dates (i.e. with a zero time component), of all different kinds. + /// </summary> + public static IEnumerable<DateTime> ExampleDates => + from date in _exampleDates + from kind in _exampleKinds + select DateTime.SpecifyKind(date, kind); + + /// <summary> + /// Gets a collection of example date/times, of all different kinds. + /// </summary> + public static IEnumerable<DateTime> ExampleDateTimes => + from date in _exampleDates + from time in _exampleTimes + from kind in _exampleKinds + select DateTime.SpecifyKind(date.Add(time), kind); + + /// <summary> + /// Gets a collection of example date time offsets. + /// </summary> + /// <returns></returns> + public static IEnumerable<DateTimeOffset> ExampleDateTimeOffsets => + from date in _exampleDates + from time in _exampleTimes + from offset in _exampleOffsets + where !(date == _earliestDate && offset.Ticks > 0) + where !(date == _latestDate && offset.Ticks < 0) + select new DateTimeOffset(date.Add(time), offset); + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/TestMemoryAllocator.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/TestMemoryAllocator.cs new file mode 100644 index 000000000..e0e36af17 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/TestMemoryAllocator.cs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using System.Buffers; + +namespace Apache.Arrow.Tests +{ + public class TestMemoryAllocator : MemoryAllocator + { + protected override IMemoryOwner<byte> AllocateInternal(int length, out int bytesAllocated) + { + bytesAllocated = length; + return MemoryPool<byte>.Shared.Rent(length); + } + } +} diff --git a/src/arrow/csharp/test/Apache.Arrow.Tests/TypeTests.cs b/src/arrow/csharp/test/Apache.Arrow.Tests/TypeTests.cs new file mode 100644 index 000000000..c279d6984 --- /dev/null +++ b/src/arrow/csharp/test/Apache.Arrow.Tests/TypeTests.cs @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; +using Xunit.Sdk; + +namespace Apache.Arrow.Tests +{ + public class TypeTests + { + [Fact] + public void Basics() + { + Field.Builder fb = new Field.Builder(); + Field f0_nullable = fb.Name("f0").DataType(Int32Type.Default).Build(); + Field f0_nonnullable = fb.Name("f0").DataType(Int32Type.Default).Nullable(false).Build(); + + Assert.True(f0_nullable.Name == "f0"); + Assert.True(f0_nullable.DataType.Name == Int32Type.Default.Name); + + Assert.True(f0_nullable.IsNullable); + Assert.False(f0_nonnullable.IsNullable); + } + + [Fact] + public void Equality() + { + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f0_nonnullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Nullable(false).Build(); + Field f0_other = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f0_with_meta = new Field.Builder().Name("f0").DataType(Int32Type.Default).Nullable(true).Metadata("a", "1").Metadata("b", "2").Build(); + + FieldComparer.Compare(f0_nullable, f0_other); + Assert.Throws<EqualException>(() => FieldComparer.Compare(f0_nullable, f0_nonnullable)); + Assert.Throws<EqualException>(() => FieldComparer.Compare(f0_nullable, f0_with_meta)); + } + + [Fact] + public void TestMetadataConstruction() + { + var metadata = new Dictionary<string, string> { { "foo", "bar" }, { "bizz", "buzz" } }; + var metadata1 = new Dictionary<string, string>(metadata); + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Metadata(metadata).Build(); + Field f1_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Metadata(metadata1).Build(); + Assert.True(metadata.Keys.SequenceEqual(f0_nullable.Metadata.Keys) && metadata.Values.SequenceEqual(f0_nullable.Metadata.Values)); + FieldComparer.Compare(f0_nullable, f1_nullable); + } + + [Fact] + public void TestStructBasics() + { + + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1_nullable = new Field.Builder().Name("f1").DataType(StringType.Default).Build(); + Field f2_nullable = new Field.Builder().Name("f2").DataType(UInt8Type.Default).Build(); + + List<Field> fields = new List<Field>() { f0_nullable, f1_nullable, f2_nullable }; + StructType struct_type = new StructType(fields); + + var structFields = struct_type.Fields; + FieldComparer.Compare(structFields.ElementAt(0), f0_nullable); + FieldComparer.Compare(structFields.ElementAt(1), f1_nullable); + FieldComparer.Compare(structFields.ElementAt(2), f2_nullable); + } + + [Fact] + public void TestStructGetFieldByName() + { + + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1_nullable = new Field.Builder().Name("f1").DataType(StringType.Default).Build(); + Field f2_nullable = new Field.Builder().Name("f2").DataType(UInt8Type.Default).Build(); + + List<Field> fields = new List<Field>() { f0_nullable, f1_nullable, f2_nullable }; + StructType struct_type = new StructType(fields); + + FieldComparer.Compare(struct_type.GetFieldByName("f0"), f0_nullable); + FieldComparer.Compare(struct_type.GetFieldByName("f1"), f1_nullable); + FieldComparer.Compare(struct_type.GetFieldByName("f2"), f2_nullable); + Assert.True(struct_type.GetFieldByName("not_found") == null); + } + + [Fact] + public void TestStructGetFieldIndex() + { + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1_nullable = new Field.Builder().Name("f1").DataType(StringType.Default).Build(); + Field f2_nullable = new Field.Builder().Name("f2").DataType(UInt8Type.Default).Build(); + + StructType struct_type = new StructType(new[] { f0_nullable, f1_nullable, f2_nullable }); + + Assert.Equal(0, struct_type.GetFieldIndex("f0")); + Assert.Equal(1, struct_type.GetFieldIndex("f1")); + Assert.Equal(2, struct_type.GetFieldIndex("F2", StringComparer.OrdinalIgnoreCase)); + Assert.Equal(-1, struct_type.GetFieldIndex("F2")); + Assert.Equal(-1, struct_type.GetFieldIndex("F2", StringComparer.Ordinal)); + Assert.Equal(-1, struct_type.GetFieldIndex("not_found")); + } + + [Fact] + public void TestListTypeConstructor() + { + var stringField = new Field.Builder().Name("item").DataType(StringType.Default).Build(); + var stringType1 = new ListType(stringField); + var stringType2 = new ListType(StringType.Default); + + FieldComparer.Compare(stringType1.ValueField, stringType2.ValueField); + Assert.Equal(stringType1.ValueDataType.TypeId, stringType2.ValueDataType.TypeId); + } + + // Todo: StructType::GetFieldIndexDuplicate test + + + } +} diff --git a/src/arrow/csharp/test/Directory.Build.props b/src/arrow/csharp/test/Directory.Build.props new file mode 100644 index 000000000..4f17847df --- /dev/null +++ b/src/arrow/csharp/test/Directory.Build.props @@ -0,0 +1,26 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<Project> + + <Import Project="..\Directory.Build.props" /> + + <PropertyGroup> + <IsPackable>false</IsPackable> + </PropertyGroup> + +</Project> |