summaryrefslogtreecommitdiffstats
path: root/src/arrow/dev
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/dev/.gitignore20
-rw-r--r--src/arrow/dev/README.md189
-rw-r--r--src/arrow/dev/archery/MANIFEST.in4
-rw-r--r--src/arrow/dev/archery/README.md49
-rw-r--r--src/arrow/dev/archery/archery/__init__.py16
-rw-r--r--src/arrow/dev/archery/archery/benchmark/__init__.py16
-rw-r--r--src/arrow/dev/archery/archery/benchmark/codec.py97
-rw-r--r--src/arrow/dev/archery/archery/benchmark/compare.py173
-rw-r--r--src/arrow/dev/archery/archery/benchmark/core.py57
-rw-r--r--src/arrow/dev/archery/archery/benchmark/google.py174
-rw-r--r--src/arrow/dev/archery/archery/benchmark/jmh.py201
-rw-r--r--src/arrow/dev/archery/archery/benchmark/runner.py313
-rw-r--r--src/arrow/dev/archery/archery/bot.py267
-rw-r--r--src/arrow/dev/archery/archery/cli.py943
-rw-r--r--src/arrow/dev/archery/archery/compat.py59
-rw-r--r--src/arrow/dev/archery/archery/crossbow/__init__.py19
-rw-r--r--src/arrow/dev/archery/archery/crossbow/cli.py365
-rw-r--r--src/arrow/dev/archery/archery/crossbow/core.py1172
-rw-r--r--src/arrow/dev/archery/archery/crossbow/reports.py315
-rw-r--r--src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml51
-rw-r--r--src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md10
-rw-r--r--src/arrow/dev/archery/archery/crossbow/tests/test_core.py25
-rw-r--r--src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py43
-rw-r--r--src/arrow/dev/archery/archery/crossbow/tests/test_reports.py35
-rw-r--r--src/arrow/dev/archery/archery/docker.py402
-rw-r--r--src/arrow/dev/archery/archery/docker/__init__.py18
-rw-r--r--src/arrow/dev/archery/archery/docker/cli.py261
-rw-r--r--src/arrow/dev/archery/archery/docker/core.py417
-rw-r--r--src/arrow/dev/archery/archery/docker/tests/test_docker.py531
-rw-r--r--src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py201
-rw-r--r--src/arrow/dev/archery/archery/integration/__init__.py16
-rw-r--r--src/arrow/dev/archery/archery/integration/datagen.py1662
-rw-r--r--src/arrow/dev/archery/archery/integration/runner.py429
-rw-r--r--src/arrow/dev/archery/archery/integration/scenario.py29
-rw-r--r--src/arrow/dev/archery/archery/integration/tester.py62
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_cpp.py116
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_csharp.py67
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_go.py119
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_java.py140
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_js.py73
-rw-r--r--src/arrow/dev/archery/archery/integration/tester_rust.py115
-rw-r--r--src/arrow/dev/archery/archery/integration/util.py166
-rw-r--r--src/arrow/dev/archery/archery/lang/__init__.py16
-rw-r--r--src/arrow/dev/archery/archery/lang/cpp.py296
-rw-r--r--src/arrow/dev/archery/archery/lang/java.py77
-rw-r--r--src/arrow/dev/archery/archery/lang/python.py223
-rw-r--r--src/arrow/dev/archery/archery/linking.py75
-rw-r--r--src/arrow/dev/archery/archery/release.py535
-rw-r--r--src/arrow/dev/archery/archery/templates/release_changelog.md.j229
-rw-r--r--src/arrow/dev/archery/archery/templates/release_curation.txt.j241
-rw-r--r--src/arrow/dev/archery/archery/testing.py83
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl6
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl4
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json212
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json212
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json212
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json212
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json217
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json206
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json445
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/issue-19.json64
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/issue-26.json70
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json31
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json31
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json158
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json170
-rw-r--r--src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json329
-rw-r--r--src/arrow/dev/archery/archery/tests/test_benchmarks.py383
-rw-r--r--src/arrow/dev/archery/archery/tests/test_bot.py215
-rw-r--r--src/arrow/dev/archery/archery/tests/test_cli.py39
-rw-r--r--src/arrow/dev/archery/archery/tests/test_release.py333
-rw-r--r--src/arrow/dev/archery/archery/tests/test_testing.py62
-rw-r--r--src/arrow/dev/archery/archery/utils/__init__.py16
-rw-r--r--src/arrow/dev/archery/archery/utils/cache.py80
-rw-r--r--src/arrow/dev/archery/archery/utils/cli.py73
-rw-r--r--src/arrow/dev/archery/archery/utils/cmake.py215
-rw-r--r--src/arrow/dev/archery/archery/utils/command.py100
-rw-r--r--src/arrow/dev/archery/archery/utils/git.py100
-rw-r--r--src/arrow/dev/archery/archery/utils/lint.py429
-rw-r--r--src/arrow/dev/archery/archery/utils/logger.py29
-rw-r--r--src/arrow/dev/archery/archery/utils/maven.py204
-rw-r--r--src/arrow/dev/archery/archery/utils/rat.py70
-rw-r--r--src/arrow/dev/archery/archery/utils/report.py64
-rw-r--r--src/arrow/dev/archery/archery/utils/source.py211
-rw-r--r--src/arrow/dev/archery/archery/utils/tmpdir.py28
-rw-r--r--src/arrow/dev/archery/conftest.py70
-rwxr-xr-xsrc/arrow/dev/archery/generate_files_for_endian_test.sh43
-rw-r--r--src/arrow/dev/archery/requirements.txt4
-rwxr-xr-xsrc/arrow/dev/archery/setup.py59
-rw-r--r--src/arrow/dev/benchmarking/.env18
-rw-r--r--src/arrow/dev/benchmarking/.gitignore1
-rw-r--r--src/arrow/dev/benchmarking/Dockerfile23
-rw-r--r--src/arrow/dev/benchmarking/README.md255
-rw-r--r--src/arrow/dev/benchmarking/data_model.dot219
-rw-r--r--src/arrow/dev/benchmarking/data_model.rst373
-rw-r--r--src/arrow/dev/benchmarking/ddl/0_setup.sql23
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql45
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql63
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql43
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql57
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql35
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql31
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql46
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql39
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql69
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql37
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql51
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql54
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql112
-rw-r--r--src/arrow/dev/benchmarking/ddl/2_00_views.sql324
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql643
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql574
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql323
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql395
-rw-r--r--src/arrow/dev/benchmarking/ddl/4_00_triggers.sql61
-rw-r--r--src/arrow/dev/benchmarking/ddl/5_00_permissions.sql73
-rw-r--r--src/arrow/dev/benchmarking/docker-compose.yml43
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_example.json32
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_run_example.csv6
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_run_example.json97
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json73
-rw-r--r--src/arrow/dev/benchmarking/examples/example.sql232
-rw-r--r--src/arrow/dev/benchmarking/examples/example_graphql_mutation.json12
-rw-r--r--src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json3
-rw-r--r--src/arrow/dev/benchmarking/examples/machine.json22
-rwxr-xr-xsrc/arrow/dev/benchmarking/graphql_submit.sh75
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_data_model_rst.sh69
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_dotfile.sh70
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_machine_json.sh55
-rw-r--r--src/arrow/dev/conbench_envs/README.md214
-rw-r--r--src/arrow/dev/conbench_envs/benchmarks.env50
-rwxr-xr-xsrc/arrow/dev/conbench_envs/hooks.sh91
-rw-r--r--src/arrow/dev/merge.conf.sample25
-rwxr-xr-xsrc/arrow/dev/merge_arrow_pr.py613
-rwxr-xr-xsrc/arrow/dev/merge_arrow_pr.sh56
-rw-r--r--src/arrow/dev/release/.env.example32
-rw-r--r--src/arrow/dev/release/.gitignore21
-rw-r--r--src/arrow/dev/release/01-prepare-test.rb586
-rwxr-xr-xsrc/arrow/dev/release/01-prepare.sh103
-rw-r--r--src/arrow/dev/release/02-source-test.rb148
-rwxr-xr-xsrc/arrow/dev/release/02-source.sh164
-rwxr-xr-xsrc/arrow/dev/release/03-binary-submit.sh45
-rwxr-xr-xsrc/arrow/dev/release/04-binary-download.sh39
-rwxr-xr-xsrc/arrow/dev/release/05-binary-upload.sh122
-rw-r--r--src/arrow/dev/release/README.md24
-rw-r--r--src/arrow/dev/release/Rakefile37
-rw-r--r--src/arrow/dev/release/VERIFY.md76
-rw-r--r--src/arrow/dev/release/binary-task.rb1910
-rw-r--r--src/arrow/dev/release/binary/.dockerignore18
-rw-r--r--src/arrow/dev/release/binary/Dockerfile70
-rwxr-xr-xsrc/arrow/dev/release/binary/runner.sh36
-rw-r--r--src/arrow/dev/release/check-rat-report.py59
-rwxr-xr-xsrc/arrow/dev/release/download_rc_binaries.py184
-rwxr-xr-xsrc/arrow/dev/release/post-01-upload.sh71
-rwxr-xr-xsrc/arrow/dev/release/post-02-binary.sh101
-rwxr-xr-xsrc/arrow/dev/release/post-03-website.sh266
-rwxr-xr-xsrc/arrow/dev/release/post-04-ruby.sh92
-rwxr-xr-xsrc/arrow/dev/release/post-05-js.sh48
-rwxr-xr-xsrc/arrow/dev/release/post-06-csharp.sh60
-rwxr-xr-xsrc/arrow/dev/release/post-08-remove-rc.sh50
-rwxr-xr-xsrc/arrow/dev/release/post-09-docs.sh67
-rwxr-xr-xsrc/arrow/dev/release/post-10-python.sh52
-rwxr-xr-xsrc/arrow/dev/release/post-11-java.sh81
-rwxr-xr-xsrc/arrow/dev/release/post-12-bump-versions.sh79
-rw-r--r--src/arrow/dev/release/post-13-go.sh34
-rw-r--r--src/arrow/dev/release/rat_exclude_files.txt208
-rwxr-xr-xsrc/arrow/dev/release/run-rat.sh43
-rwxr-xr-xsrc/arrow/dev/release/run-test.rb31
-rw-r--r--src/arrow/dev/release/setup-gpg-agent.sh24
-rw-r--r--src/arrow/dev/release/test-helper.rb96
-rw-r--r--src/arrow/dev/release/utils-binary.sh86
-rw-r--r--src/arrow/dev/release/utils-prepare.sh145
-rwxr-xr-xsrc/arrow/dev/release/verify-apt.sh194
-rw-r--r--src/arrow/dev/release/verify-release-candidate-wheels.bat107
-rw-r--r--src/arrow/dev/release/verify-release-candidate.bat130
-rwxr-xr-xsrc/arrow/dev/release/verify-release-candidate.sh817
-rwxr-xr-xsrc/arrow/dev/release/verify-yum.sh204
-rw-r--r--src/arrow/dev/requirements_merge_arrow_pr.txt3
-rw-r--r--src/arrow/dev/tasks/README.md19
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml70
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml69
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml69
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml69
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml69
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml65
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml29
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml29
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml27
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml27
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml12
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml12
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh30
-rw-r--r--src/arrow/dev/tasks/conda-recipes/README.md67
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt68
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat44
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh99
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh49
-rw-r--r--src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml302
-rw-r--r--src/arrow/dev/tasks/conda-recipes/azure.clean.yml28
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/azure.linux.yml38
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/azure.osx.yml83
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/azure.win.yml77
-rw-r--r--src/arrow/dev/tasks/conda-recipes/azure.yml0
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/build_steps.sh55
-rw-r--r--src/arrow/dev/tasks/conda-recipes/clean.py80
-rw-r--r--src/arrow/dev/tasks/conda-recipes/conda-forge.yml1
-rw-r--r--src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml51
-rw-r--r--src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat9
-rw-r--r--src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh3
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh8
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/r-arrow/configure.win9
-rw-r--r--src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R5
-rw-r--r--src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml66
-rwxr-xr-xsrc/arrow/dev/tasks/conda-recipes/run_docker_build.sh77
-rw-r--r--src/arrow/dev/tasks/cpp-examples/github.linux.yml46
-rw-r--r--src/arrow/dev/tasks/docker-tests/azure.linux.yml52
-rw-r--r--src/arrow/dev/tasks/docker-tests/circle.linux.yml51
-rw-r--r--src/arrow/dev/tasks/docker-tests/github.linux.yml56
-rw-r--r--src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb69
-rw-r--r--src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb88
-rw-r--r--src/arrow/dev/tasks/homebrew-formulae/github.macos.yml56
-rw-r--r--src/arrow/dev/tasks/java-jars/README.md29
-rw-r--r--src/arrow/dev/tasks/java-jars/github.yml115
-rw-r--r--src/arrow/dev/tasks/linux-packages/.gitignore28
-rw-r--r--src/arrow/dev/tasks/linux-packages/README.md40
-rw-r--r--src/arrow/dev/tasks/linux-packages/Rakefile249
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile64
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile40
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile41
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install2
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog11
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control23
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright26
-rwxr-xr-xsrc/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules37
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile66
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo44
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile26
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile26
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in113
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile26
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile26
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile162
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile81
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile82
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile82
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile84
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile77
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile83
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile83
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog123
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in640
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright193
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install19
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install2
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install6
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install7
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install7
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install7
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install4
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base9
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links3
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series0
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install1
-rwxr-xr-xsrc/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules104
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format1
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch2
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile64
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile55
-rwxr-xr-xsrc/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static33
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in892
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile59
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from18
-rw-r--r--src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile64
-rwxr-xr-xsrc/arrow/dev/tasks/linux-packages/apt/build.sh117
-rw-r--r--src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml138
-rw-r--r--src/arrow/dev/tasks/linux-packages/helper.rb70
-rw-r--r--src/arrow/dev/tasks/linux-packages/package-task.rb645
-rw-r--r--src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml155
-rwxr-xr-xsrc/arrow/dev/tasks/linux-packages/yum/build.sh158
-rw-r--r--src/arrow/dev/tasks/macros.jinja198
-rw-r--r--src/arrow/dev/tasks/nightlies.sample.yml68
-rw-r--r--src/arrow/dev/tasks/nuget-packages/github.linux.yml43
-rw-r--r--src/arrow/dev/tasks/python-sdist/github.yml45
-rw-r--r--src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml56
-rw-r--r--src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml110
-rw-r--r--src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml157
-rw-r--r--src/arrow/dev/tasks/python-wheels/github.windows.yml58
-rw-r--r--src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml73
-rw-r--r--src/arrow/dev/tasks/r/azure.linux.yml65
-rw-r--r--src/arrow/dev/tasks/r/github.devdocs.yml78
-rw-r--r--src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml117
-rw-r--r--src/arrow/dev/tasks/r/github.linux.cran.yml79
-rw-r--r--src/arrow/dev/tasks/r/github.linux.offline.build.yml117
-rw-r--r--src/arrow/dev/tasks/r/github.linux.rchk.yml77
-rw-r--r--src/arrow/dev/tasks/r/github.linux.revdepcheck.yml77
-rw-r--r--src/arrow/dev/tasks/r/github.linux.versions.yml81
-rw-r--r--src/arrow/dev/tasks/r/github.macos-linux.local.yml87
-rw-r--r--src/arrow/dev/tasks/r/github.macos.autobrew.yml78
-rw-r--r--src/arrow/dev/tasks/tasks.yml1308
-rw-r--r--src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat86
-rw-r--r--src/arrow/dev/tasks/vcpkg-tests/github.windows.yml59
-rw-r--r--src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml77
-rw-r--r--src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml50
-rw-r--r--src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml48
-rw-r--r--src/arrow/dev/tasks/verify-rc/github.win.yml45
-rw-r--r--src/arrow/dev/test_merge_arrow_pr.py317
386 files changed, 41634 insertions, 0 deletions
diff --git a/src/arrow/dev/.gitignore b/src/arrow/dev/.gitignore
new file mode 100644
index 000000000..b0792939f
--- /dev/null
+++ b/src/arrow/dev/.gitignore
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Python virtual environments for dev tools
+.venv*/
+
diff --git a/src/arrow/dev/README.md b/src/arrow/dev/README.md
new file mode 100644
index 000000000..258792b80
--- /dev/null
+++ b/src/arrow/dev/README.md
@@ -0,0 +1,189 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+# Arrow Developer Scripts
+
+This directory contains scripts useful to developers when packaging,
+testing, or committing to Arrow.
+
+Merging a pull request requires being a committer on the project. In addition
+you need to have linked your GitHub and ASF accounts on
+https://gitbox.apache.org/setup/ to be able to push to GitHub as the main
+remote.
+
+NOTE: It may take some time (a few hours) between when you complete
+the setup at GitBox, and when your GitHub account will be added as a
+committer.
+
+## How to merge a Pull request
+
+Please don't merge PRs using the Github Web interface. Instead, set up
+your git clone such as to have a remote named ``apache`` pointing to the
+official Arrow repository:
+```
+git remote add apache git@github.com:apache/arrow.git
+```
+
+and then run the following command:
+```
+./dev/merge_arrow_pr.sh
+```
+
+This creates a new Python virtual environment under `dev/.venv[PY_VERSION]`
+and installs all the necessary dependencies to run the Arrow merge script.
+After installed, it runs the merge script.
+
+(we don't provide a wrapper script for Windows yet, so under Windows you'll
+have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py`
+directly)
+
+The merge script uses the GitHub REST API; if you encounter rate limit issues,
+you may set a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal
+Access Token.
+
+You can specify the username and the password of your JIRA account in
+`APACHE_JIRA_USERNAME` and `APACHE_JIRA_PASSWORD` environment variables.
+If these aren't supplied, the script will ask you the values of them.
+
+Note that the directory name of your Arrow git clone must be called `arrow`.
+
+example output:
+```
+Which pull request would you like to merge? (e.g. 34):
+```
+Type the pull request number (from https://github.com/apache/arrow/pulls) and hit enter.
+```
+=== Pull Request #X ===
+title Blah Blah Blah
+source repo/branch
+target master
+url https://api.github.com/repos/apache/arrow/pulls/X
+
+Proceed with merging pull request #3? (y/n):
+```
+If this looks good, type y and hit enter.
+```
+From git-wip-us.apache.org:/repos/asf/arrow.git
+ * [new branch] master -> PR_TOOL_MERGE_PR_3_MASTER
+Switched to branch 'PR_TOOL_MERGE_PR_3_MASTER'
+
+Merge complete (local ref PR_TOOL_MERGE_PR_3_MASTER). Push to apache? (y/n):
+```
+A local branch with the merge has been created.
+type y and hit enter to push it to apache master
+```
+Counting objects: 67, done.
+Delta compression using up to 4 threads.
+Compressing objects: 100% (26/26), done.
+Writing objects: 100% (36/36), 5.32 KiB, done.
+Total 36 (delta 17), reused 0 (delta 0)
+To git-wip-us.apache.org:/repos/arrow-mr.git
+ b767ac4..485658a PR_TOOL_MERGE_PR_X_MASTER -> master
+Restoring head pointer to b767ac4e
+Note: checking out 'b767ac4e'.
+
+You are in 'detached HEAD' state. You can look around, make experimental
+changes and commit them, and you can discard any commits you make in this
+state without impacting any branches by performing another checkout.
+
+If you want to create a new branch to retain commits you create, you may
+do so (now or later) by using -b with the checkout command again. Example:
+
+ git checkout -b new_branch_name
+
+HEAD is now at b767ac4... Update README.md
+Deleting local branch PR_TOOL_MERGE_PR_X
+Deleting local branch PR_TOOL_MERGE_PR_X_MASTER
+Pull request #X merged!
+Merge hash: 485658a5
+
+Would you like to pick 485658a5 into another branch? (y/n):
+```
+For now just say n as we have 1 branch
+
+## Verifying Release Candidates
+
+We have provided a script to assist with verifying release candidates:
+
+```shell
+bash dev/release/verify-release-candidate.sh 0.7.0 0
+```
+
+Currently this only works on Linux (patches to expand to macOS welcome!). Read
+the script for information about system dependencies.
+
+On Windows, we have a script that verifies C++ and Python (requires Visual
+Studio 2015):
+
+```
+dev/release/verify-release-candidate.bat apache-arrow-0.7.0.tar.gz
+```
+
+### Verifying the JavaScript release
+
+For JavaScript-specific releases, use a different verification script:
+
+```shell
+bash dev/release/js-verify-release-candidate.sh 0.7.0 0
+```
+
+# Integration testing
+
+Build the following base image used by multiple tests:
+
+```shell
+docker build -t arrow_integration_xenial_base -f docker_common/Dockerfile.xenial.base .
+```
+
+## HDFS C++ / Python support
+
+```shell
+docker-compose build conda-cpp
+docker-compose build conda-python
+docker-compose build conda-python-hdfs
+docker-compose run --rm conda-python-hdfs
+```
+
+## Apache Spark Integration Tests
+
+Tests can be run to ensure that the current snapshot of Java and Python Arrow
+works with Spark. This will run a docker image to build Arrow C++
+and Python in a Conda environment, build and install Arrow Java to the local
+Maven repository, build Spark with the new Arrow artifact, and run Arrow
+related unit tests in Spark for Java and Python. Any errors will exit with a
+non-zero value. To run, use the following command:
+
+```shell
+docker-compose build conda-cpp
+docker-compose build conda-python
+docker-compose build conda-python-spark
+docker-compose run --rm conda-python-spark
+```
+
+If you already are building Spark, these commands will map your local Maven
+repo to the image and save time by not having to download all dependencies.
+Be aware, that docker write files as root, which can cause problems for maven
+on the host.
+
+```shell
+docker-compose run --rm -v $HOME/.m2:/root/.m2 conda-python-spark
+```
+
+NOTE: If the Java API has breaking changes, a patched version of Spark might
+need to be used to successfully build.
diff --git a/src/arrow/dev/archery/MANIFEST.in b/src/arrow/dev/archery/MANIFEST.in
new file mode 100644
index 000000000..90fe034c2
--- /dev/null
+++ b/src/arrow/dev/archery/MANIFEST.in
@@ -0,0 +1,4 @@
+include ../../LICENSE.txt
+include ../../NOTICE.txt
+
+include archery/reports/*
diff --git a/src/arrow/dev/archery/README.md b/src/arrow/dev/archery/README.md
new file mode 100644
index 000000000..eff654416
--- /dev/null
+++ b/src/arrow/dev/archery/README.md
@@ -0,0 +1,49 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+# Developing with Archery
+
+Archery is documented on the Arrow website:
+
+* [Daily development using Archery](https://arrow.apache.org/docs/developers/archery.html)
+* [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/crossbow.html)
+* [Using Archer and Docker](https://arrow.apache.org/docs/developers/docker.html)
+
+# Installing Archery
+
+See the pages linked aboved for more details. As a general overview, Archery
+comes in a number of subpackages, each needing to be installed if you want
+to use the functionality of it:
+
+* lint – lint (and in some cases auto-format) code in the Arrow repo
+ To install: `pip install -e "arrow/dev/archery[lint]"`
+* benchmark – to run Arrow benchmarks using Archery
+ To install: `pip install -e "arrow/dev/archery[benchmark]"`
+* docker – to run docker-compose based tasks more easily
+ To install: `pip install -e "arrow/dev/archery[docker]"`
+* release – release related helpers
+ To install: `pip install -e "arrow/dev/archery[release]"`
+* crossbow – to trigger + interact with the crossbow build system
+ To install: `pip install -e "arrow/dev/archery[crossbow]"`
+* crossbow-upload
+ To install: `pip install -e "arrow/dev/archery[crossbow-upload]"`
+
+Additionally, if you would prefer to install everything at once,
+`pip install -e "arrow/dev/archery[all]"` is an alias for all of
+the above subpackages. \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/__init__.py b/src/arrow/dev/archery/archery/__init__.py
new file mode 100644
index 000000000..13a83393a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/src/arrow/dev/archery/archery/benchmark/__init__.py b/src/arrow/dev/archery/archery/benchmark/__init__.py
new file mode 100644
index 000000000..13a83393a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/src/arrow/dev/archery/archery/benchmark/codec.py b/src/arrow/dev/archery/archery/benchmark/codec.py
new file mode 100644
index 000000000..4157890d1
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/codec.py
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import json
+
+from ..benchmark.core import Benchmark, BenchmarkSuite
+from ..benchmark.runner import BenchmarkRunner, StaticBenchmarkRunner
+from ..benchmark.compare import BenchmarkComparator
+
+
+class JsonEncoder(json.JSONEncoder):
+ def default(self, o):
+ if isinstance(o, Benchmark):
+ return BenchmarkCodec.encode(o)
+
+ if isinstance(o, BenchmarkSuite):
+ return BenchmarkSuiteCodec.encode(o)
+
+ if isinstance(o, BenchmarkRunner):
+ return BenchmarkRunnerCodec.encode(o)
+
+ if isinstance(o, BenchmarkComparator):
+ return BenchmarkComparatorCodec.encode(o)
+
+ return json.JSONEncoder.default(self, o)
+
+
+class BenchmarkCodec:
+ @staticmethod
+ def encode(b):
+ return {
+ "name": b.name,
+ "unit": b.unit,
+ "less_is_better": b.less_is_better,
+ "values": b.values,
+ "time_unit": b.time_unit,
+ "times": b.times,
+ "counters": b.counters,
+ }
+
+ @staticmethod
+ def decode(dct, **kwargs):
+ return Benchmark(**dct, **kwargs)
+
+
+class BenchmarkSuiteCodec:
+ @staticmethod
+ def encode(bs):
+ return {
+ "name": bs.name,
+ "benchmarks": [BenchmarkCodec.encode(b) for b in bs.benchmarks]
+ }
+
+ @staticmethod
+ def decode(dct, **kwargs):
+ benchmarks = [BenchmarkCodec.decode(b)
+ for b in dct.pop("benchmarks", [])]
+ return BenchmarkSuite(benchmarks=benchmarks, **dct, **kwargs)
+
+
+class BenchmarkRunnerCodec:
+ @staticmethod
+ def encode(br):
+ return {"suites": [BenchmarkSuiteCodec.encode(s) for s in br.suites]}
+
+ @staticmethod
+ def decode(dct, **kwargs):
+ suites = [BenchmarkSuiteCodec.decode(s)
+ for s in dct.pop("suites", [])]
+ return StaticBenchmarkRunner(suites=suites, **dct, **kwargs)
+
+
+class BenchmarkComparatorCodec:
+ @staticmethod
+ def encode(bc):
+ comparator = bc.formatted
+
+ suite_name = bc.suite_name
+ if suite_name:
+ comparator["suite"] = suite_name
+
+ return comparator
diff --git a/src/arrow/dev/archery/archery/benchmark/compare.py b/src/arrow/dev/archery/archery/benchmark/compare.py
new file mode 100644
index 000000000..622b80179
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/compare.py
@@ -0,0 +1,173 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# Define a global regression threshold as 5%. This is purely subjective and
+# flawed. This does not track cumulative regression.
+DEFAULT_THRESHOLD = 0.05
+
+
+def items_per_seconds_fmt(value):
+ if value < 1000:
+ return "{} items/sec".format(value)
+ if value < 1000**2:
+ return "{:.3f}K items/sec".format(value / 1000)
+ if value < 1000**3:
+ return "{:.3f}M items/sec".format(value / 1000**2)
+ else:
+ return "{:.3f}G items/sec".format(value / 1000**3)
+
+
+def bytes_per_seconds_fmt(value):
+ if value < 1024:
+ return "{} bytes/sec".format(value)
+ if value < 1024**2:
+ return "{:.3f} KiB/sec".format(value / 1024)
+ if value < 1024**3:
+ return "{:.3f} MiB/sec".format(value / 1024**2)
+ if value < 1024**4:
+ return "{:.3f} GiB/sec".format(value / 1024**3)
+ else:
+ return "{:.3f} TiB/sec".format(value / 1024**4)
+
+
+def change_fmt(value):
+ return "{:.3%}".format(value)
+
+
+def formatter_for_unit(unit):
+ if unit == "bytes_per_second":
+ return bytes_per_seconds_fmt
+ elif unit == "items_per_second":
+ return items_per_seconds_fmt
+ else:
+ return lambda x: x
+
+
+class BenchmarkComparator:
+ """ Compares two benchmarks.
+
+ Encodes the logic of comparing two benchmarks and taking a decision on
+ if it induce a regression.
+ """
+
+ def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD,
+ suite_name=None):
+ self.contender = contender
+ self.baseline = baseline
+ self.threshold = threshold
+ self.suite_name = suite_name
+
+ @property
+ def name(self):
+ return self.baseline.name
+
+ @property
+ def less_is_better(self):
+ return self.baseline.less_is_better
+
+ @property
+ def unit(self):
+ return self.baseline.unit
+
+ @property
+ def change(self):
+ new = self.contender.value
+ old = self.baseline.value
+
+ if old == 0 and new == 0:
+ return 0.0
+ if old == 0:
+ return 0.0
+
+ return float(new - old) / abs(old)
+
+ @property
+ def confidence(self):
+ """ Indicate if a comparison of benchmarks should be trusted. """
+ return True
+
+ @property
+ def regression(self):
+ change = self.change
+ adjusted_change = change if self.less_is_better else -change
+ return (self.confidence and adjusted_change > self.threshold)
+
+ @property
+ def formatted(self):
+ fmt = formatter_for_unit(self.unit)
+ return {
+ "benchmark": self.name,
+ "change": change_fmt(self.change),
+ "regression": self.regression,
+ "baseline": fmt(self.baseline.value),
+ "contender": fmt(self.contender.value),
+ "unit": self.unit,
+ "less_is_better": self.less_is_better,
+ "counters": str(self.baseline.counters)
+ }
+
+ def compare(self, comparator=None):
+ return {
+ "benchmark": self.name,
+ "change": self.change,
+ "regression": self.regression,
+ "baseline": self.baseline.value,
+ "contender": self.contender.value,
+ "unit": self.unit,
+ "less_is_better": self.less_is_better,
+ "counters": self.baseline.counters
+ }
+
+ def __call__(self, **kwargs):
+ return self.compare(**kwargs)
+
+
+def pairwise_compare(contender, baseline):
+ dict_contender = {e.name: e for e in contender}
+ dict_baseline = {e.name: e for e in baseline}
+
+ for name in (dict_contender.keys() & dict_baseline.keys()):
+ yield name, (dict_contender[name], dict_baseline[name])
+
+
+class RunnerComparator:
+ """ Compares suites/benchmarks from runners.
+
+ It is up to the caller that ensure that runners are compatible (both from
+ the same language implementation).
+ """
+
+ def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD):
+ self.contender = contender
+ self.baseline = baseline
+ self.threshold = threshold
+
+ @property
+ def comparisons(self):
+ contender = self.contender.suites
+ baseline = self.baseline.suites
+ suites = pairwise_compare(contender, baseline)
+
+ for suite_name, (suite_cont, suite_base) in suites:
+ benchmarks = pairwise_compare(
+ suite_cont.benchmarks, suite_base.benchmarks)
+
+ for _, (bench_cont, bench_base) in benchmarks:
+ yield BenchmarkComparator(bench_cont, bench_base,
+ threshold=self.threshold,
+ suite_name=suite_name)
diff --git a/src/arrow/dev/archery/archery/benchmark/core.py b/src/arrow/dev/archery/archery/benchmark/core.py
new file mode 100644
index 000000000..5a92271a3
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/core.py
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def median(values):
+ n = len(values)
+ if n == 0:
+ raise ValueError("median requires at least one value")
+ elif n % 2 == 0:
+ return (values[(n // 2) - 1] + values[n // 2]) / 2
+ else:
+ return values[n // 2]
+
+
+class Benchmark:
+ def __init__(self, name, unit, less_is_better, values, time_unit,
+ times, counters=None):
+ self.name = name
+ self.unit = unit
+ self.less_is_better = less_is_better
+ self.values = sorted(values)
+ self.time_unit = time_unit
+ self.times = sorted(times)
+ self.median = median(self.values)
+ self.counters = counters or {}
+
+ @property
+ def value(self):
+ return self.median
+
+ def __repr__(self):
+ return "Benchmark[name={},value={}]".format(self.name, self.value)
+
+
+class BenchmarkSuite:
+ def __init__(self, name, benchmarks):
+ self.name = name
+ self.benchmarks = benchmarks
+
+ def __repr__(self):
+ return "BenchmarkSuite[name={}, benchmarks={}]".format(
+ self.name, self.benchmarks
+ )
diff --git a/src/arrow/dev/archery/archery/benchmark/google.py b/src/arrow/dev/archery/archery/benchmark/google.py
new file mode 100644
index 000000000..ebcc52636
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/google.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import filterfalse, groupby, tee
+import json
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from .core import Benchmark
+from ..utils.command import Command
+
+
+def partition(pred, iterable):
+ # adapted from python's examples
+ t1, t2 = tee(iterable)
+ return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+class GoogleBenchmarkCommand(Command):
+ """ Run a google benchmark binary.
+
+ This assumes the binary supports the standard command line options,
+ notably `--benchmark_filter`, `--benchmark_format`, etc...
+ """
+
+ def __init__(self, benchmark_bin, benchmark_filter=None):
+ self.bin = benchmark_bin
+ self.benchmark_filter = benchmark_filter
+
+ def list_benchmarks(self):
+ argv = ["--benchmark_list_tests"]
+ if self.benchmark_filter:
+ argv.append("--benchmark_filter={}".format(self.benchmark_filter))
+ result = self.run(*argv, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ return str.splitlines(result.stdout.decode("utf-8"))
+
+ def results(self, repetitions=1):
+ with NamedTemporaryFile() as out:
+ argv = ["--benchmark_repetitions={}".format(repetitions),
+ "--benchmark_out={}".format(out.name),
+ "--benchmark_out_format=json"]
+
+ if self.benchmark_filter:
+ argv.append(
+ "--benchmark_filter={}".format(self.benchmark_filter)
+ )
+
+ self.run(*argv, check=True)
+ return json.load(out)
+
+
+class GoogleBenchmarkObservation:
+ """ Represents one run of a single (google c++) benchmark.
+
+ Aggregates are reported by Google Benchmark executables alongside
+ other observations whenever repetitions are specified (with
+ `--benchmark_repetitions` on the bare benchmark, or with the
+ archery option `--repetitions`). Aggregate observations are not
+ included in `GoogleBenchmark.runs`.
+
+ RegressionSumKernel/32768/0 1 us 1 us 25.8077GB/s
+ RegressionSumKernel/32768/0 1 us 1 us 25.7066GB/s
+ RegressionSumKernel/32768/0 1 us 1 us 25.1481GB/s
+ RegressionSumKernel/32768/0 1 us 1 us 25.846GB/s
+ RegressionSumKernel/32768/0 1 us 1 us 25.6453GB/s
+ RegressionSumKernel/32768/0_mean 1 us 1 us 25.6307GB/s
+ RegressionSumKernel/32768/0_median 1 us 1 us 25.7066GB/s
+ RegressionSumKernel/32768/0_stddev 0 us 0 us 288.046MB/s
+ """
+
+ def __init__(self, name, real_time, cpu_time, time_unit, run_type,
+ size=None, bytes_per_second=None, items_per_second=None,
+ **counters):
+ self._name = name
+ self.real_time = real_time
+ self.cpu_time = cpu_time
+ self.time_unit = time_unit
+ self.run_type = run_type
+ self.size = size
+ self.bytes_per_second = bytes_per_second
+ self.items_per_second = items_per_second
+ self.counters = counters
+
+ @property
+ def is_aggregate(self):
+ """ Indicate if the observation is a run or an aggregate. """
+ return self.run_type == "aggregate"
+
+ @property
+ def is_realtime(self):
+ """ Indicate if the preferred value is realtime instead of cputime. """
+ return self.name.find("/real_time") != -1
+
+ @property
+ def name(self):
+ name = self._name
+ return name.rsplit("_", maxsplit=1)[0] if self.is_aggregate else name
+
+ @property
+ def time(self):
+ return self.real_time if self.is_realtime else self.cpu_time
+
+ @property
+ def value(self):
+ """ Return the benchmark value."""
+ return self.bytes_per_second or self.items_per_second or self.time
+
+ @property
+ def unit(self):
+ if self.bytes_per_second:
+ return "bytes_per_second"
+ elif self.items_per_second:
+ return "items_per_second"
+ else:
+ return self.time_unit
+
+ def __repr__(self):
+ return str(self.value)
+
+
+class GoogleBenchmark(Benchmark):
+ """ A set of GoogleBenchmarkObservations. """
+
+ def __init__(self, name, runs):
+ """ Initialize a GoogleBenchmark.
+
+ Parameters
+ ----------
+ name: str
+ Name of the benchmark
+ runs: list(GoogleBenchmarkObservation)
+ Repetitions of GoogleBenchmarkObservation run.
+
+ """
+ self.name = name
+ # exclude google benchmark aggregate artifacts
+ _, runs = partition(lambda b: b.is_aggregate, runs)
+ self.runs = sorted(runs, key=lambda b: b.value)
+ unit = self.runs[0].unit
+ time_unit = self.runs[0].time_unit
+ less_is_better = not unit.endswith("per_second")
+ values = [b.value for b in self.runs]
+ times = [b.real_time for b in self.runs]
+ # Slight kludge to extract the UserCounters for each benchmark
+ counters = self.runs[0].counters
+ super().__init__(name, unit, less_is_better, values, time_unit, times,
+ counters)
+
+ def __repr__(self):
+ return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs)
+
+ @classmethod
+ def from_json(cls, payload):
+ def group_key(x):
+ return x.name
+
+ benchmarks = map(lambda x: GoogleBenchmarkObservation(**x), payload)
+ groups = groupby(sorted(benchmarks, key=group_key), group_key)
+ return [cls(k, list(bs)) for k, bs in groups]
diff --git a/src/arrow/dev/archery/archery/benchmark/jmh.py b/src/arrow/dev/archery/archery/benchmark/jmh.py
new file mode 100644
index 000000000..f531b6de1
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/jmh.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import filterfalse, groupby, tee
+import json
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from .core import Benchmark
+from ..utils.command import Command
+from ..utils.maven import Maven
+
+
+def partition(pred, iterable):
+ # adapted from python's examples
+ t1, t2 = tee(iterable)
+ return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+class JavaMicrobenchmarkHarnessCommand(Command):
+ """ Run a Java Micro Benchmark Harness
+
+ This assumes the binary supports the standard command line options,
+ notably `-Dbenchmark_filter`
+ """
+
+ def __init__(self, build, benchmark_filter=None):
+ self.benchmark_filter = benchmark_filter
+ self.build = build
+ self.maven = Maven()
+
+ """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
+ Assume the following output:
+ ...
+ Benchmarks:
+ org.apache.arrow.vector.IntBenchmarks.setIntDirectly
+ ...
+ org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
+ org.apache.arrow.vector.IntBenchmarks.setWithWriter
+ ...
+ [INFO]
+ """
+
+ def list_benchmarks(self):
+ argv = []
+ if self.benchmark_filter:
+ argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
+ result = self.build.list(
+ *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ lists = []
+ benchmarks = False
+ for line in str.splitlines(result.stdout.decode("utf-8")):
+ if not benchmarks:
+ if line.startswith("Benchmarks:"):
+ benchmarks = True
+ else:
+ if line.startswith("org.apache.arrow"):
+ lists.append(line)
+ if line.startswith("[INFO]"):
+ break
+ return lists
+
+ def results(self, repetitions):
+ with NamedTemporaryFile(suffix=".json") as out:
+ argv = ["-Dbenchmark.runs={}".format(repetitions),
+ "-Dbenchmark.resultfile={}".format(out.name),
+ "-Dbenchmark.resultformat=json"]
+ if self.benchmark_filter:
+ argv.append(
+ "-Dbenchmark.filter={}".format(self.benchmark_filter)
+ )
+
+ self.build.benchmark(*argv, check=True)
+ return json.load(out)
+
+
+class JavaMicrobenchmarkHarnessObservation:
+ """ Represents one run of a single Java Microbenchmark Harness
+ """
+
+ def __init__(self, benchmark, primaryMetric,
+ forks, warmupIterations, measurementIterations, **counters):
+ self.name = benchmark
+ self.primaryMetric = primaryMetric
+ self.score = primaryMetric["score"]
+ self.score_unit = primaryMetric["scoreUnit"]
+ self.forks = forks
+ self.warmups = warmupIterations
+ self.runs = measurementIterations
+ self.counters = {
+ "mode": counters["mode"],
+ "threads": counters["threads"],
+ "warmups": warmupIterations,
+ "warmupTime": counters["warmupTime"],
+ "measurements": measurementIterations,
+ "measurementTime": counters["measurementTime"],
+ "jvmArgs": counters["jvmArgs"]
+ }
+ self.reciprocal_value = True if self.score_unit.endswith(
+ "/op") else False
+ if self.score_unit.startswith("ops/"):
+ idx = self.score_unit.find("/")
+ self.normalizePerSec(self.score_unit[idx+1:])
+ elif self.score_unit.endswith("/op"):
+ idx = self.score_unit.find("/")
+ self.normalizePerSec(self.score_unit[:idx])
+ else:
+ self.normalizeFactor = 1
+
+ @property
+ def value(self):
+ """ Return the benchmark value."""
+ val = 1 / self.score if self.reciprocal_value else self.score
+ return val * self.normalizeFactor
+
+ def normalizePerSec(self, unit):
+ if unit == "ns":
+ self.normalizeFactor = 1000 * 1000 * 1000
+ elif unit == "us":
+ self.normalizeFactor = 1000 * 1000
+ elif unit == "ms":
+ self.normalizeFactor = 1000
+ elif unit == "min":
+ self.normalizeFactor = 1 / 60
+ elif unit == "hr":
+ self.normalizeFactor = 1 / (60 * 60)
+ elif unit == "day":
+ self.normalizeFactor = 1 / (60 * 60 * 24)
+ else:
+ self.normalizeFactor = 1
+
+ @property
+ def unit(self):
+ if self.score_unit.startswith("ops/"):
+ return "items_per_second"
+ elif self.score_unit.endswith("/op"):
+ return "items_per_second"
+ else:
+ return "?"
+
+ def __repr__(self):
+ return str(self.value)
+
+
+class JavaMicrobenchmarkHarness(Benchmark):
+ """ A set of JavaMicrobenchmarkHarnessObservations. """
+
+ def __init__(self, name, runs):
+ """ Initialize a JavaMicrobenchmarkHarness.
+
+ Parameters
+ ----------
+ name: str
+ Name of the benchmark
+ forks: int
+ warmups: int
+ runs: int
+ runs: list(JavaMicrobenchmarkHarnessObservation)
+ Repetitions of JavaMicrobenchmarkHarnessObservation run.
+
+ """
+ self.name = name
+ self.runs = sorted(runs, key=lambda b: b.value)
+ unit = self.runs[0].unit
+ time_unit = "N/A"
+ less_is_better = not unit.endswith("per_second")
+ values = [b.value for b in self.runs]
+ times = []
+ # Slight kludge to extract the UserCounters for each benchmark
+ counters = self.runs[0].counters
+ super().__init__(name, unit, less_is_better, values, time_unit, times,
+ counters)
+
+ def __repr__(self):
+ return "JavaMicrobenchmark[name={},runs={}]".format(
+ self.name, self.runs)
+
+ @classmethod
+ def from_json(cls, payload):
+ def group_key(x):
+ return x.name
+
+ benchmarks = map(
+ lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
+ groups = groupby(sorted(benchmarks, key=group_key), group_key)
+ return [cls(k, list(bs)) for k, bs in groups]
diff --git a/src/arrow/dev/archery/archery/benchmark/runner.py b/src/arrow/dev/archery/archery/benchmark/runner.py
new file mode 100644
index 000000000..fc6d354b1
--- /dev/null
+++ b/src/arrow/dev/archery/archery/benchmark/runner.py
@@ -0,0 +1,313 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import glob
+import json
+import os
+import re
+
+from .core import BenchmarkSuite
+from .google import GoogleBenchmarkCommand, GoogleBenchmark
+from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness
+from ..lang.cpp import CppCMakeDefinition, CppConfiguration
+from ..lang.java import JavaMavenDefinition, JavaConfiguration
+from ..utils.cmake import CMakeBuild
+from ..utils.maven import MavenBuild
+from ..utils.logger import logger
+
+
+def regex_filter(re_expr):
+ if re_expr is None:
+ return lambda s: True
+ re_comp = re.compile(re_expr)
+ return lambda s: re_comp.search(s)
+
+
+DEFAULT_REPETITIONS = 1
+
+
+class BenchmarkRunner:
+ def __init__(self, suite_filter=None, benchmark_filter=None,
+ repetitions=DEFAULT_REPETITIONS):
+ self.suite_filter = suite_filter
+ self.benchmark_filter = benchmark_filter
+ self.repetitions = repetitions
+
+ @property
+ def suites(self):
+ raise NotImplementedError("BenchmarkRunner must implement suites")
+
+ @staticmethod
+ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
+ raise NotImplementedError(
+ "BenchmarkRunner must implement from_rev_or_path")
+
+
+class StaticBenchmarkRunner(BenchmarkRunner):
+ """ Run suites from a (static) set of suites. """
+
+ def __init__(self, suites, **kwargs):
+ self._suites = suites
+ super().__init__(**kwargs)
+
+ @property
+ def list_benchmarks(self):
+ for suite in self._suites:
+ for benchmark in suite.benchmarks:
+ yield "{}.{}".format(suite.name, benchmark.name)
+
+ @property
+ def suites(self):
+ suite_fn = regex_filter(self.suite_filter)
+ benchmark_fn = regex_filter(self.benchmark_filter)
+
+ for suite in (s for s in self._suites if suite_fn(s.name)):
+ benchmarks = [b for b in suite.benchmarks if benchmark_fn(b.name)]
+ yield BenchmarkSuite(suite.name, benchmarks)
+
+ @classmethod
+ def is_json_result(cls, path_or_str):
+ builder = None
+ try:
+ builder = cls.from_json(path_or_str)
+ except BaseException:
+ pass
+
+ return builder is not None
+
+ @staticmethod
+ def from_json(path_or_str, **kwargs):
+ # .codec imported here to break recursive imports
+ from .codec import BenchmarkRunnerCodec
+ if os.path.isfile(path_or_str):
+ with open(path_or_str) as f:
+ loaded = json.load(f)
+ else:
+ loaded = json.loads(path_or_str)
+ return BenchmarkRunnerCodec.decode(loaded, **kwargs)
+
+ def __repr__(self):
+ return "BenchmarkRunner[suites={}]".format(list(self.suites))
+
+
+class CppBenchmarkRunner(BenchmarkRunner):
+ """ Run suites from a CMakeBuild. """
+
+ def __init__(self, build, **kwargs):
+ """ Initialize a CppBenchmarkRunner. """
+ self.build = build
+ super().__init__(**kwargs)
+
+ @staticmethod
+ def default_configuration(**kwargs):
+ """ Returns the default benchmark configuration. """
+ return CppConfiguration(
+ build_type="release", with_tests=False, with_benchmarks=True,
+ with_compute=True,
+ with_csv=True,
+ with_dataset=True,
+ with_json=True,
+ with_parquet=True,
+ with_python=False,
+ with_brotli=True,
+ with_bz2=True,
+ with_lz4=True,
+ with_snappy=True,
+ with_zlib=True,
+ with_zstd=True,
+ **kwargs)
+
+ @property
+ def suites_binaries(self):
+ """ Returns a list of benchmark binaries for this build. """
+ # Ensure build is up-to-date to run benchmarks
+ self.build()
+ # Not the best method, but works for now
+ glob_expr = os.path.join(self.build.binaries_dir, "*-benchmark")
+ return {os.path.basename(b): b for b in glob.glob(glob_expr)}
+
+ def suite(self, name, suite_bin):
+ """ Returns the resulting benchmarks for a given suite. """
+ suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter)
+
+ # Ensure there will be data
+ benchmark_names = suite_cmd.list_benchmarks()
+ if not benchmark_names:
+ return None
+
+ results = suite_cmd.results(repetitions=self.repetitions)
+ benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
+ return BenchmarkSuite(name, benchmarks)
+
+ @property
+ def list_benchmarks(self):
+ for suite_name, suite_bin in self.suites_binaries.items():
+ suite_cmd = GoogleBenchmarkCommand(suite_bin)
+ for benchmark_name in suite_cmd.list_benchmarks():
+ yield "{}.{}".format(suite_name, benchmark_name)
+
+ @property
+ def suites(self):
+ """ Returns all suite for a runner. """
+ suite_matcher = regex_filter(self.suite_filter)
+
+ suite_and_binaries = self.suites_binaries
+ for suite_name in suite_and_binaries:
+ if not suite_matcher(suite_name):
+ logger.debug("Ignoring suite {}".format(suite_name))
+ continue
+
+ suite_bin = suite_and_binaries[suite_name]
+ suite = self.suite(suite_name, suite_bin)
+
+ # Filter may exclude all benchmarks
+ if not suite:
+ logger.debug("Suite {} executed but no results"
+ .format(suite_name))
+ continue
+
+ yield suite
+
+ @staticmethod
+ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
+ """ Returns a BenchmarkRunner from a path or a git revision.
+
+ First, it checks if `rev_or_path` is a valid path (or string) of a json
+ object that can deserialize to a BenchmarkRunner. If so, it initialize
+ a StaticBenchmarkRunner from it. This allows memoizing the result of a
+ run in a file or a string.
+
+ Second, it checks if `rev_or_path` points to a valid CMake build
+ directory. If so, it creates a CppBenchmarkRunner with this existing
+ CMakeBuild.
+
+ Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+ the given revision and create a fresh CMakeBuild.
+ """
+ build = None
+ if StaticBenchmarkRunner.is_json_result(rev_or_path):
+ return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+ elif CMakeBuild.is_build_dir(rev_or_path):
+ build = CMakeBuild.from_path(rev_or_path)
+ return CppBenchmarkRunner(build, **kwargs)
+ else:
+ # Revisions can references remote via the `/` character, ensure
+ # that the revision is path friendly
+ path_rev = rev_or_path.replace("/", "_")
+ root_rev = os.path.join(root, path_rev)
+ os.mkdir(root_rev)
+
+ clone_dir = os.path.join(root_rev, "arrow")
+ # Possibly checkout the sources at given revision, no need to
+ # perform cleanup on cloned repository as root_rev is reclaimed.
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
+ build_dir = os.path.join(root_rev, "build")
+ return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+
+
+class JavaBenchmarkRunner(BenchmarkRunner):
+ """ Run suites for Java. """
+
+ # default repetitions is 5 for Java microbenchmark harness
+ def __init__(self, build, **kwargs):
+ """ Initialize a JavaBenchmarkRunner. """
+ self.build = build
+ super().__init__(**kwargs)
+
+ @staticmethod
+ def default_configuration(**kwargs):
+ """ Returns the default benchmark configuration. """
+ return JavaConfiguration(**kwargs)
+
+ def suite(self, name):
+ """ Returns the resulting benchmarks for a given suite. """
+ # update .m2 directory, which installs target jars
+ self.build.build()
+
+ suite_cmd = JavaMicrobenchmarkHarnessCommand(
+ self.build, self.benchmark_filter)
+
+ # Ensure there will be data
+ benchmark_names = suite_cmd.list_benchmarks()
+ if not benchmark_names:
+ return None
+
+ results = suite_cmd.results(repetitions=self.repetitions)
+ benchmarks = JavaMicrobenchmarkHarness.from_json(results)
+ return BenchmarkSuite(name, benchmarks)
+
+ @property
+ def list_benchmarks(self):
+ """ Returns all suite names """
+ # Ensure build is up-to-date to run benchmarks
+ self.build.build()
+
+ suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build)
+ benchmark_names = suite_cmd.list_benchmarks()
+ for benchmark_name in benchmark_names:
+ yield "{}".format(benchmark_name)
+
+ @property
+ def suites(self):
+ """ Returns all suite for a runner. """
+ suite_name = "JavaBenchmark"
+ suite = self.suite(suite_name)
+
+ # Filter may exclude all benchmarks
+ if not suite:
+ logger.debug("Suite {} executed but no results"
+ .format(suite_name))
+ return
+
+ yield suite
+
+ @staticmethod
+ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
+ """ Returns a BenchmarkRunner from a path or a git revision.
+
+ First, it checks if `rev_or_path` is a valid path (or string) of a json
+ object that can deserialize to a BenchmarkRunner. If so, it initialize
+ a StaticBenchmarkRunner from it. This allows memoizing the result of a
+ run in a file or a string.
+
+ Second, it checks if `rev_or_path` points to a valid Maven build
+ directory. If so, it creates a JavaBenchmarkRunner with this existing
+ MavenBuild.
+
+ Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+ the given revision and create a fresh MavenBuild.
+ """
+ if StaticBenchmarkRunner.is_json_result(rev_or_path):
+ return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+ elif MavenBuild.is_build_dir(rev_or_path):
+ maven_def = JavaMavenDefinition(rev_or_path, maven_conf)
+ return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs)
+ else:
+ # Revisions can references remote via the `/` character, ensure
+ # that the revision is path friendly
+ path_rev = rev_or_path.replace("/", "_")
+ root_rev = os.path.join(root, path_rev)
+ os.mkdir(root_rev)
+
+ clone_dir = os.path.join(root_rev, "arrow")
+ # Possibly checkout the sources at given revision, no need to
+ # perform cleanup on cloned repository as root_rev is reclaimed.
+ src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+ maven_def = JavaMavenDefinition(src_rev.java, maven_conf)
+ build_dir = os.path.join(root_rev, "arrow/java")
+ return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs)
diff --git a/src/arrow/dev/archery/archery/bot.py b/src/arrow/dev/archery/archery/bot.py
new file mode 100644
index 000000000..e8fbbdd04
--- /dev/null
+++ b/src/arrow/dev/archery/archery/bot.py
@@ -0,0 +1,267 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shlex
+from pathlib import Path
+from functools import partial
+import tempfile
+
+import click
+import github
+
+from .utils.git import git
+from .utils.logger import logger
+from .crossbow import Repo, Queue, Config, Target, Job, CommentReport
+
+
+class EventError(Exception):
+ pass
+
+
+class CommandError(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+
+class _CommandMixin:
+
+ def get_help_option(self, ctx):
+ def show_help(ctx, param, value):
+ if value and not ctx.resilient_parsing:
+ raise click.UsageError(ctx.get_help())
+ option = super().get_help_option(ctx)
+ option.callback = show_help
+ return option
+
+ def __call__(self, message, **kwargs):
+ args = shlex.split(message)
+ try:
+ with self.make_context(self.name, args=args, obj=kwargs) as ctx:
+ return self.invoke(ctx)
+ except click.ClickException as e:
+ raise CommandError(e.format_message())
+
+
+class Command(_CommandMixin, click.Command):
+ pass
+
+
+class Group(_CommandMixin, click.Group):
+
+ def command(self, *args, **kwargs):
+ kwargs.setdefault('cls', Command)
+ return super().command(*args, **kwargs)
+
+ def group(self, *args, **kwargs):
+ kwargs.setdefault('cls', Group)
+ return super().group(*args, **kwargs)
+
+ def parse_args(self, ctx, args):
+ if not args and self.no_args_is_help and not ctx.resilient_parsing:
+ raise click.UsageError(ctx.get_help())
+ return super().parse_args(ctx, args)
+
+
+command = partial(click.command, cls=Command)
+group = partial(click.group, cls=Group)
+
+
+class CommentBot:
+
+ def __init__(self, name, handler, token=None):
+ # TODO(kszucs): validate
+ assert isinstance(name, str)
+ assert callable(handler)
+ self.name = name
+ self.handler = handler
+ self.github = github.Github(token)
+
+ def parse_command(self, payload):
+ # only allow users of apache org to submit commands, for more see
+ # https://developer.github.com/v4/enum/commentauthorassociation/
+ allowed_roles = {'OWNER', 'MEMBER', 'CONTRIBUTOR'}
+ mention = '@{}'.format(self.name)
+ comment = payload['comment']
+
+ if payload['sender']['login'] == self.name:
+ raise EventError("Don't respond to itself")
+ elif payload['action'] not in {'created', 'edited'}:
+ raise EventError("Don't respond to comment deletion")
+ elif comment['author_association'] not in allowed_roles:
+ raise EventError(
+ "Don't respond to comments from non-authorized users"
+ )
+ elif not comment['body'].lstrip().startswith(mention):
+ raise EventError("The bot is not mentioned")
+
+ # Parse the comment, removing the bot mentioned (and everything
+ # before it)
+ command = payload['comment']['body'].split(mention)[-1]
+
+ # then split on newlines and keep only the first line
+ # (ignoring all other lines)
+ return command.split("\n")[0].strip()
+
+ def handle(self, event, payload):
+ try:
+ command = self.parse_command(payload)
+ except EventError as e:
+ logger.error(e)
+ # see the possible reasons in the validate method
+ return
+
+ if event == 'issue_comment':
+ return self.handle_issue_comment(command, payload)
+ elif event == 'pull_request_review_comment':
+ return self.handle_review_comment(command, payload)
+ else:
+ raise ValueError("Unexpected event type {}".format(event))
+
+ def handle_issue_comment(self, command, payload):
+ repo = self.github.get_repo(payload['repository']['id'], lazy=True)
+ issue = repo.get_issue(payload['issue']['number'])
+
+ try:
+ pull = issue.as_pull_request()
+ except github.GithubException:
+ return issue.create_comment(
+ "The comment bot only listens to pull request comments!"
+ )
+
+ comment = pull.get_issue_comment(payload['comment']['id'])
+ try:
+ self.handler(command, issue=issue, pull_request=pull,
+ comment=comment)
+ except CommandError as e:
+ logger.error(e)
+ pull.create_issue_comment("```\n{}\n```".format(e.message))
+ except Exception as e:
+ logger.exception(e)
+ comment.create_reaction('-1')
+ else:
+ comment.create_reaction('+1')
+
+ def handle_review_comment(self, payload):
+ raise NotImplementedError()
+
+
+@group(name='@github-actions')
+@click.pass_context
+def actions(ctx):
+ """Ursabot"""
+ ctx.ensure_object(dict)
+
+
+@actions.group()
+@click.option('--crossbow', '-c', default='ursacomputing/crossbow',
+ help='Crossbow repository on github to use')
+@click.pass_obj
+def crossbow(obj, crossbow):
+ """
+ Trigger crossbow builds for this pull request
+ """
+ obj['crossbow_repo'] = crossbow
+
+
+def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
+ """
+ Clone the repositories and initialize crossbow objects.
+
+ Parameters
+ ----------
+ dest : Path
+ Filesystem path to clone the repositories to.
+ crossbow_repo : str
+ Github repository name, like kszucs/crossbow.
+ pull_request : pygithub.PullRequest
+ Object containing information about the pull request the comment bot
+ was triggered from.
+ """
+ arrow_path = dest / 'arrow'
+ queue_path = dest / 'crossbow'
+
+ # clone arrow and checkout the pull request's branch
+ pull_request_ref = 'pull/{}/head:{}'.format(
+ pull_request.number, pull_request.head.ref
+ )
+ git.clone(pull_request.base.repo.clone_url, str(arrow_path))
+ git.fetch('origin', pull_request_ref, git_dir=arrow_path)
+ git.checkout(pull_request.head.ref, git_dir=arrow_path)
+
+ # clone crossbow repository
+ crossbow_url = 'https://github.com/{}'.format(crossbow_repo)
+ git.clone(crossbow_url, str(queue_path))
+
+ # initialize crossbow objects
+ github_token = os.environ['CROSSBOW_GITHUB_TOKEN']
+ arrow = Repo(arrow_path)
+ queue = Queue(queue_path, github_token=github_token, require_https=True)
+
+ return (arrow, queue)
+
+
+@crossbow.command()
+@click.argument('tasks', nargs=-1, required=False)
+@click.option('--group', '-g', 'groups', multiple=True,
+ help='Submit task groups as defined in tests.yml')
+@click.option('--param', '-p', 'params', multiple=True,
+ help='Additional task parameters for rendering the CI templates')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
+@click.pass_obj
+def submit(obj, tasks, groups, params, arrow_version):
+ """
+ Submit crossbow testing tasks.
+
+ See groups defined in arrow/dev/tasks/tasks.yml
+ """
+ crossbow_repo = obj['crossbow_repo']
+ pull_request = obj['pull_request']
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmpdir = Path(tmpdir)
+ arrow, queue = _clone_arrow_and_crossbow(
+ dest=Path(tmpdir),
+ crossbow_repo=crossbow_repo,
+ pull_request=pull_request,
+ )
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml")
+ config.validate()
+
+ # initialize the crossbow build's target repository
+ target = Target.from_repo(arrow, version=arrow_version,
+ remote=pull_request.head.repo.clone_url,
+ branch=pull_request.head.ref)
+
+ # parse additional job parameters
+ params = dict([p.split("=") for p in params])
+
+ # instantiate the job object
+ job = Job.from_config(config=config, target=target, tasks=tasks,
+ groups=groups, params=params)
+
+ # add the job to the crossbow queue and push to the remote repository
+ queue.put(job, prefix="actions")
+ queue.push()
+
+ # render the response comment's content
+ report = CommentReport(job, crossbow_repo=crossbow_repo)
+
+ # send the response
+ pull_request.create_issue_comment(report.show())
diff --git a/src/arrow/dev/archery/archery/cli.py b/src/arrow/dev/archery/archery/cli.py
new file mode 100644
index 000000000..d408be3cc
--- /dev/null
+++ b/src/arrow/dev/archery/archery/cli.py
@@ -0,0 +1,943 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import namedtuple
+from io import StringIO
+import click
+import errno
+import json
+import logging
+import os
+import pathlib
+import sys
+
+from .benchmark.codec import JsonEncoder
+from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
+from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
+from .compat import _import_pandas
+from .lang.cpp import CppCMakeDefinition, CppConfiguration
+from .utils.cli import ArrowBool, validate_arrow_sources, add_optional_command
+from .utils.lint import linter, python_numpydoc, LintValidationException
+from .utils.logger import logger, ctx as log_ctx
+from .utils.source import ArrowSources
+from .utils.tmpdir import tmpdir
+
+# Set default logging to INFO in command line.
+logging.basicConfig(level=logging.INFO)
+
+
+BOOL = ArrowBool()
+
+
+@click.group()
+@click.option("--debug", type=BOOL, is_flag=True, default=False,
+ help="Increase logging with debugging output.")
+@click.option("--pdb", type=BOOL, is_flag=True, default=False,
+ help="Invoke pdb on uncaught exception.")
+@click.option("-q", "--quiet", type=BOOL, is_flag=True, default=False,
+ help="Silence executed commands.")
+@click.pass_context
+def archery(ctx, debug, pdb, quiet):
+ """ Apache Arrow developer utilities.
+
+ See sub-commands help with `archery <cmd> --help`.
+
+ """
+ # Ensure ctx.obj exists
+ ctx.ensure_object(dict)
+
+ log_ctx.quiet = quiet
+ if debug:
+ logger.setLevel(logging.DEBUG)
+
+ ctx.debug = debug
+
+ if pdb:
+ import pdb
+ sys.excepthook = lambda t, v, e: pdb.pm()
+
+
+build_dir_type = click.Path(dir_okay=True, file_okay=False, resolve_path=True)
+# Supported build types
+build_type = click.Choice(["debug", "relwithdebinfo", "release"],
+ case_sensitive=False)
+# Supported warn levels
+warn_level_type = click.Choice(["everything", "checkin", "production"],
+ case_sensitive=False)
+
+simd_level = click.Choice(["NONE", "SSE4_2", "AVX2", "AVX512"],
+ case_sensitive=True)
+
+
+def cpp_toolchain_options(cmd):
+ options = [
+ click.option("--cc", metavar="<compiler>", help="C compiler."),
+ click.option("--cxx", metavar="<compiler>", help="C++ compiler."),
+ click.option("--cxx-flags", help="C++ compiler flags."),
+ click.option("--cpp-package-prefix",
+ help=("Value to pass for ARROW_PACKAGE_PREFIX and "
+ "use ARROW_DEPENDENCY_SOURCE=SYSTEM"))
+ ]
+ return _apply_options(cmd, options)
+
+
+def java_toolchain_options(cmd):
+ options = [
+ click.option("--java-home", metavar="<java_home>",
+ help="Path to Java Developers Kit."),
+ click.option("--java-options", help="java compiler options."),
+ ]
+ return _apply_options(cmd, options)
+
+
+def _apply_options(cmd, options):
+ for option in options:
+ cmd = option(cmd)
+ return cmd
+
+
+@archery.command(short_help="Initialize an Arrow C++ build")
+@click.option("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory")
+# toolchain
+@cpp_toolchain_options
+@click.option("--build-type", default=None, type=build_type,
+ help="CMake's CMAKE_BUILD_TYPE")
+@click.option("--warn-level", default="production", type=warn_level_type,
+ help="Controls compiler warnings -W(no-)error.")
+@click.option("--use-gold-linker", default=True, type=BOOL,
+ help="Toggles ARROW_USE_LD_GOLD option.")
+@click.option("--simd-level", default="SSE4_2", type=simd_level,
+ help="Toggles ARROW_SIMD_LEVEL option.")
+# Tests and benchmarks
+@click.option("--with-tests", default=True, type=BOOL,
+ help="Build with tests.")
+@click.option("--with-benchmarks", default=None, type=BOOL,
+ help="Build with benchmarks.")
+@click.option("--with-examples", default=None, type=BOOL,
+ help="Build with examples.")
+@click.option("--with-integration", default=None, type=BOOL,
+ help="Build with integration test executables.")
+# Static checks
+@click.option("--use-asan", default=None, type=BOOL,
+ help="Toggle ARROW_USE_ASAN sanitizer.")
+@click.option("--use-tsan", default=None, type=BOOL,
+ help="Toggle ARROW_USE_TSAN sanitizer.")
+@click.option("--use-ubsan", default=None, type=BOOL,
+ help="Toggle ARROW_USE_UBSAN sanitizer.")
+@click.option("--with-fuzzing", default=None, type=BOOL,
+ help="Toggle ARROW_FUZZING.")
+# Components
+@click.option("--with-compute", default=None, type=BOOL,
+ help="Build the Arrow compute module.")
+@click.option("--with-csv", default=None, type=BOOL,
+ help="Build the Arrow CSV parser module.")
+@click.option("--with-cuda", default=None, type=BOOL,
+ help="Build the Arrow CUDA extensions.")
+@click.option("--with-dataset", default=None, type=BOOL,
+ help="Build the Arrow dataset module.")
+@click.option("--with-filesystem", default=None, type=BOOL,
+ help="Build the Arrow filesystem layer.")
+@click.option("--with-flight", default=None, type=BOOL,
+ help="Build with Flight rpc support.")
+@click.option("--with-gandiva", default=None, type=BOOL,
+ help="Build with Gandiva expression compiler support.")
+@click.option("--with-hdfs", default=None, type=BOOL,
+ help="Build the Arrow HDFS bridge.")
+@click.option("--with-hiveserver2", default=None, type=BOOL,
+ help="Build the HiveServer2 client and arrow adapater.")
+@click.option("--with-ipc", default=None, type=BOOL,
+ help="Build the Arrow IPC extensions.")
+@click.option("--with-json", default=None, type=BOOL,
+ help="Build the Arrow JSON parser module.")
+@click.option("--with-jni", default=None, type=BOOL,
+ help="Build the Arrow JNI lib.")
+@click.option("--with-mimalloc", default=None, type=BOOL,
+ help="Build the Arrow mimalloc based allocator.")
+@click.option("--with-parquet", default=None, type=BOOL,
+ help="Build with Parquet file support.")
+@click.option("--with-plasma", default=None, type=BOOL,
+ help="Build with Plasma object store support.")
+@click.option("--with-python", default=None, type=BOOL,
+ help="Build the Arrow CPython extesions.")
+@click.option("--with-r", default=None, type=BOOL,
+ help="Build the Arrow R extensions. This is not a CMake option, "
+ "it will toggle required options")
+@click.option("--with-s3", default=None, type=BOOL,
+ help="Build Arrow with S3 support.")
+# Compressions
+@click.option("--with-brotli", default=None, type=BOOL,
+ help="Build Arrow with brotli compression.")
+@click.option("--with-bz2", default=None, type=BOOL,
+ help="Build Arrow with bz2 compression.")
+@click.option("--with-lz4", default=None, type=BOOL,
+ help="Build Arrow with lz4 compression.")
+@click.option("--with-snappy", default=None, type=BOOL,
+ help="Build Arrow with snappy compression.")
+@click.option("--with-zlib", default=None, type=BOOL,
+ help="Build Arrow with zlib compression.")
+@click.option("--with-zstd", default=None, type=BOOL,
+ help="Build Arrow with zstd compression.")
+# CMake extra feature
+@click.option("--cmake-extras", type=str, multiple=True,
+ help="Extra flags/options to pass to cmake invocation. "
+ "Can be stacked")
+@click.option("--install-prefix", type=str,
+ help="Destination directory where files are installed. Expand to"
+ "CMAKE_INSTALL_PREFIX. Defaults to to $CONDA_PREFIX if the"
+ "variable exists.")
+# misc
+@click.option("-f", "--force", type=BOOL, is_flag=True, default=False,
+ help="Delete existing build directory if found.")
+@click.option("--targets", type=str, multiple=True,
+ help="Generator targets to run. Can be stacked.")
+@click.argument("build_dir", type=build_dir_type)
+@click.pass_context
+def build(ctx, src, build_dir, force, targets, **kwargs):
+ """ Initialize a C++ build directory.
+
+ The build command creates a directory initialized with Arrow's cpp source
+ cmake and configuration. It can also optionally invoke the generator to
+ test the build (and used in scripts).
+
+ Note that archery will carry the caller environment. It will also not touch
+ an existing directory, one must use the `--force` option to remove the
+ existing directory.
+
+ Examples:
+
+ \b
+ # Initialize build with clang8 and avx2 support in directory `clang8-build`
+ \b
+ archery build --cc=clang-8 --cxx=clang++-8 --cxx-flags=-mavx2 clang8-build
+
+ \b
+ # Builds and run test
+ archery build --targets=all --targets=test build
+ """
+ # Arrow's cpp cmake configuration
+ conf = CppConfiguration(**kwargs)
+ # This is a closure around cmake invocation, e.g. calling `def.build()`
+ # yields a directory ready to be run with the generator
+ cmake_def = CppCMakeDefinition(src.cpp, conf)
+ # Create build directory
+ build = cmake_def.build(build_dir, force=force)
+
+ for target in targets:
+ build.run(target)
+
+
+LintCheck = namedtuple('LintCheck', ('option_name', 'help'))
+
+lint_checks = [
+ LintCheck('clang-format', "Format C++ files with clang-format."),
+ LintCheck('clang-tidy', "Lint C++ files with clang-tidy."),
+ LintCheck('cpplint', "Lint C++ files with cpplint."),
+ LintCheck('iwyu', "Lint changed C++ files with Include-What-You-Use."),
+ LintCheck('python',
+ "Format and lint Python files with autopep8 and flake8."),
+ LintCheck('numpydoc', "Lint Python files with numpydoc."),
+ LintCheck('cmake-format', "Format CMake files with cmake-format.py."),
+ LintCheck('rat',
+ "Check all sources files for license texts via Apache RAT."),
+ LintCheck('r', "Lint R files."),
+ LintCheck('docker', "Lint Dockerfiles with hadolint."),
+]
+
+
+def decorate_lint_command(cmd):
+ """
+ Decorate the lint() command function to add individual per-check options.
+ """
+ for check in lint_checks:
+ option = click.option("--{0}/--no-{0}".format(check.option_name),
+ default=None, help=check.help)
+ cmd = option(cmd)
+ return cmd
+
+
+@archery.command(short_help="Check Arrow source tree for errors")
+@click.option("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory")
+@click.option("--fix", is_flag=True, type=BOOL, default=False,
+ help="Toggle fixing the lint errors if the linter supports it.")
+@click.option("--iwyu_all", is_flag=True, type=BOOL, default=False,
+ help="Run IWYU on all C++ files if enabled")
+@click.option("-a", "--all", is_flag=True, default=False,
+ help="Enable all checks.")
+@decorate_lint_command
+@click.pass_context
+def lint(ctx, src, fix, iwyu_all, **checks):
+ if checks.pop('all'):
+ # "--all" is given => enable all non-selected checks
+ for k, v in checks.items():
+ if v is None:
+ checks[k] = True
+ if not any(checks.values()):
+ raise click.UsageError(
+ "Need to enable at least one lint check (try --help)")
+ try:
+ linter(src, fix, iwyu_all=iwyu_all, **checks)
+ except LintValidationException:
+ sys.exit(1)
+
+
+@archery.command(short_help="Lint python docstring with NumpyDoc")
+@click.argument('symbols', nargs=-1)
+@click.option("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory")
+@click.option("--allow-rule", "-a", multiple=True,
+ help="Allow only these rules")
+@click.option("--disallow-rule", "-d", multiple=True,
+ help="Disallow these rules")
+def numpydoc(src, symbols, allow_rule, disallow_rule):
+ """
+ Pass list of modules or symbols as arguments to restrict the validation.
+
+ By default all modules of pyarrow are tried to be validated.
+
+ Examples
+ --------
+ archery numpydoc pyarrow.dataset
+ archery numpydoc pyarrow.csv pyarrow.json pyarrow.parquet
+ archery numpydoc pyarrow.array
+ """
+ disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
+ try:
+ results = python_numpydoc(symbols, allow_rules=allow_rule,
+ disallow_rules=disallow_rule)
+ for result in results:
+ result.ok()
+ except LintValidationException:
+ sys.exit(1)
+
+
+@archery.group()
+@click.pass_context
+def benchmark(ctx):
+ """ Arrow benchmarking.
+
+ Use the diff sub-command to benchmark revisions, and/or build directories.
+ """
+ pass
+
+
+def benchmark_common_options(cmd):
+ def check_language(ctx, param, value):
+ if value not in {"cpp", "java"}:
+ raise click.BadParameter("cpp or java is supported now")
+ return value
+
+ options = [
+ click.option("--src", metavar="<arrow_src>", show_default=True,
+ default=None, callback=validate_arrow_sources,
+ help="Specify Arrow source directory"),
+ click.option("--preserve", type=BOOL, default=False, show_default=True,
+ is_flag=True,
+ help="Preserve workspace for investigation."),
+ click.option("--output", metavar="<output>",
+ type=click.File("w", encoding="utf8"), default="-",
+ help="Capture output result into file."),
+ click.option("--language", metavar="<lang>", type=str, default="cpp",
+ show_default=True, callback=check_language,
+ help="Specify target language for the benchmark"),
+ click.option("--build-extras", type=str, multiple=True,
+ help="Extra flags/options to pass to mvn build. "
+ "Can be stacked. For language=java"),
+ click.option("--benchmark-extras", type=str, multiple=True,
+ help="Extra flags/options to pass to mvn benchmark. "
+ "Can be stacked. For language=java"),
+ click.option("--cmake-extras", type=str, multiple=True,
+ help="Extra flags/options to pass to cmake invocation. "
+ "Can be stacked. For language=cpp")
+ ]
+
+ cmd = java_toolchain_options(cmd)
+ cmd = cpp_toolchain_options(cmd)
+ return _apply_options(cmd, options)
+
+
+def benchmark_filter_options(cmd):
+ options = [
+ click.option("--suite-filter", metavar="<regex>", show_default=True,
+ type=str, default=None,
+ help="Regex filtering benchmark suites."),
+ click.option("--benchmark-filter", metavar="<regex>",
+ show_default=True, type=str, default=None,
+ help="Regex filtering benchmarks.")
+ ]
+ return _apply_options(cmd, options)
+
+
+@benchmark.command(name="list", short_help="List benchmark suite")
+@click.argument("rev_or_path", metavar="[<rev_or_path>]",
+ default="WORKSPACE", required=False)
+@benchmark_common_options
+@click.pass_context
+def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
+ java_home, java_options, build_extras, benchmark_extras,
+ language, **kwargs):
+ """ List benchmark suite.
+ """
+ with tmpdir(preserve=preserve) as root:
+ logger.debug("Running benchmark {}".format(rev_or_path))
+
+ if language == "cpp":
+ conf = CppBenchmarkRunner.default_configuration(
+ cmake_extras=cmake_extras, **kwargs)
+
+ runner_base = CppBenchmarkRunner.from_rev_or_path(
+ src, root, rev_or_path, conf)
+
+ elif language == "java":
+ for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+ del kwargs[key]
+ conf = JavaBenchmarkRunner.default_configuration(
+ java_home=java_home, java_options=java_options,
+ build_extras=build_extras, benchmark_extras=benchmark_extras,
+ **kwargs)
+
+ runner_base = JavaBenchmarkRunner.from_rev_or_path(
+ src, root, rev_or_path, conf)
+
+ for b in runner_base.list_benchmarks:
+ click.echo(b, file=output)
+
+
+@benchmark.command(name="run", short_help="Run benchmark suite")
+@click.argument("rev_or_path", metavar="[<rev_or_path>]",
+ default="WORKSPACE", required=False)
+@benchmark_common_options
+@benchmark_filter_options
+@click.option("--repetitions", type=int, default=-1,
+ help=("Number of repetitions of each benchmark. Increasing "
+ "may improve result precision. "
+ "[default: 1 for cpp, 5 for java"))
+@click.pass_context
+def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
+ java_home, java_options, build_extras, benchmark_extras,
+ language, suite_filter, benchmark_filter, repetitions,
+ **kwargs):
+ """ Run benchmark suite.
+
+ This command will run the benchmark suite for a single build. This is
+ used to capture (and/or publish) the results.
+
+ The caller can optionally specify a target which is either a git revision
+ (commit, tag, special values like HEAD) or a cmake build directory.
+
+ When a commit is referenced, a local clone of the arrow sources (specified
+ via --src) is performed and the proper branch is created. This is done in
+ a temporary directory which can be left intact with the `--preserve` flag.
+
+ The special token "WORKSPACE" is reserved to specify the current git
+ workspace. This imply that no clone will be performed.
+
+ Examples:
+
+ \b
+ # Run the benchmarks on current git workspace
+ \b
+ archery benchmark run
+
+ \b
+ # Run the benchmarks on current previous commit
+ \b
+ archery benchmark run HEAD~1
+
+ \b
+ # Run the benchmarks on current previous commit
+ \b
+ archery benchmark run --output=run.json
+ """
+ with tmpdir(preserve=preserve) as root:
+ logger.debug("Running benchmark {}".format(rev_or_path))
+
+ if language == "cpp":
+ conf = CppBenchmarkRunner.default_configuration(
+ cmake_extras=cmake_extras, **kwargs)
+
+ repetitions = repetitions if repetitions != -1 else 1
+ runner_base = CppBenchmarkRunner.from_rev_or_path(
+ src, root, rev_or_path, conf,
+ repetitions=repetitions,
+ suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+
+ elif language == "java":
+ for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+ del kwargs[key]
+ conf = JavaBenchmarkRunner.default_configuration(
+ java_home=java_home, java_options=java_options,
+ build_extras=build_extras, benchmark_extras=benchmark_extras,
+ **kwargs)
+
+ repetitions = repetitions if repetitions != -1 else 5
+ runner_base = JavaBenchmarkRunner.from_rev_or_path(
+ src, root, rev_or_path, conf,
+ repetitions=repetitions,
+ benchmark_filter=benchmark_filter)
+
+ json.dump(runner_base, output, cls=JsonEncoder)
+
+
+@benchmark.command(name="diff", short_help="Compare benchmark suites")
+@benchmark_common_options
+@benchmark_filter_options
+@click.option("--threshold", type=float, default=DEFAULT_THRESHOLD,
+ show_default=True,
+ help="Regression failure threshold in percentage.")
+@click.option("--repetitions", type=int, default=1, show_default=True,
+ help=("Number of repetitions of each benchmark. Increasing "
+ "may improve result precision. "
+ "[default: 1 for cpp, 5 for java"))
+@click.option("--no-counters", type=BOOL, default=False, is_flag=True,
+ help="Hide counters field in diff report.")
+@click.argument("contender", metavar="[<contender>",
+ default=ArrowSources.WORKSPACE, required=False)
+@click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
+ required=False)
+@click.pass_context
+def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
+ suite_filter, benchmark_filter, repetitions, no_counters,
+ java_home, java_options, build_extras, benchmark_extras,
+ threshold, contender, baseline, **kwargs):
+ """Compare (diff) benchmark runs.
+
+ This command acts like git-diff but for benchmark results.
+
+ The caller can optionally specify both the contender and the baseline. If
+ unspecified, the contender will default to the current workspace (like git)
+ and the baseline will default to master.
+
+ Each target (contender or baseline) can either be a git revision
+ (commit, tag, special values like HEAD) or a cmake build directory. This
+ allow comparing git commits, and/or different compilers and/or compiler
+ flags.
+
+ When a commit is referenced, a local clone of the arrow sources (specified
+ via --src) is performed and the proper branch is created. This is done in
+ a temporary directory which can be left intact with the `--preserve` flag.
+
+ The special token "WORKSPACE" is reserved to specify the current git
+ workspace. This imply that no clone will be performed.
+
+ Examples:
+
+ \b
+ # Compare workspace (contender) with master (baseline)
+ \b
+ archery benchmark diff
+
+ \b
+ # Compare master (contender) with latest version (baseline)
+ \b
+ export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1)
+ \b
+ archery benchmark diff master "$LAST"
+
+ \b
+ # Compare g++7 (contender) with clang++-8 (baseline) builds
+ \b
+ archery build --with-benchmarks=true \\
+ --cxx-flags=-ftree-vectorize \\
+ --cc=gcc-7 --cxx=g++-7 gcc7-build
+ \b
+ archery build --with-benchmarks=true \\
+ --cxx-flags=-flax-vector-conversions \\
+ --cc=clang-8 --cxx=clang++-8 clang8-build
+ \b
+ archery benchmark diff gcc7-build clang8-build
+
+ \b
+ # Compare default targets but scoped to the suites matching
+ # `^arrow-compute-aggregate` and benchmarks matching `(Sum|Mean)Kernel`.
+ \b
+ archery benchmark diff --suite-filter="^arrow-compute-aggregate" \\
+ --benchmark-filter="(Sum|Mean)Kernel"
+
+ \b
+ # Capture result in file `result.json`
+ \b
+ archery benchmark diff --output=result.json
+ \b
+ # Equivalently with no stdout clutter.
+ archery --quiet benchmark diff > result.json
+
+ \b
+ # Comparing with a cached results from `archery benchmark run`
+ \b
+ archery benchmark run --output=run.json HEAD~1
+ \b
+ # This should not recompute the benchmark from run.json
+ archery --quiet benchmark diff WORKSPACE run.json > result.json
+ """
+ with tmpdir(preserve=preserve) as root:
+ logger.debug("Comparing {} (contender) with {} (baseline)"
+ .format(contender, baseline))
+
+ if language == "cpp":
+ conf = CppBenchmarkRunner.default_configuration(
+ cmake_extras=cmake_extras, **kwargs)
+
+ repetitions = repetitions if repetitions != -1 else 1
+ runner_cont = CppBenchmarkRunner.from_rev_or_path(
+ src, root, contender, conf,
+ repetitions=repetitions,
+ suite_filter=suite_filter,
+ benchmark_filter=benchmark_filter)
+ runner_base = CppBenchmarkRunner.from_rev_or_path(
+ src, root, baseline, conf,
+ repetitions=repetitions,
+ suite_filter=suite_filter,
+ benchmark_filter=benchmark_filter)
+
+ elif language == "java":
+ for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+ del kwargs[key]
+ conf = JavaBenchmarkRunner.default_configuration(
+ java_home=java_home, java_options=java_options,
+ build_extras=build_extras, benchmark_extras=benchmark_extras,
+ **kwargs)
+
+ repetitions = repetitions if repetitions != -1 else 5
+ runner_cont = JavaBenchmarkRunner.from_rev_or_path(
+ src, root, contender, conf,
+ repetitions=repetitions,
+ benchmark_filter=benchmark_filter)
+ runner_base = JavaBenchmarkRunner.from_rev_or_path(
+ src, root, baseline, conf,
+ repetitions=repetitions,
+ benchmark_filter=benchmark_filter)
+
+ runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
+
+ # TODO(kszucs): test that the output is properly formatted jsonlines
+ comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+ ren_counters = language == "java"
+ formatted = _format_comparisons_with_pandas(comparisons_json,
+ no_counters, ren_counters)
+ output.write(formatted)
+ output.write('\n')
+
+
+def _get_comparisons_as_json(comparisons):
+ buf = StringIO()
+ for comparator in comparisons:
+ json.dump(comparator, buf, cls=JsonEncoder)
+ buf.write("\n")
+
+ return buf.getvalue()
+
+
+def _format_comparisons_with_pandas(comparisons_json, no_counters,
+ ren_counters):
+ pd = _import_pandas()
+ df = pd.read_json(StringIO(comparisons_json), lines=True)
+ # parse change % so we can sort by it
+ df['change %'] = df.pop('change').str[:-1].map(float)
+ first_regression = len(df) - df['regression'].sum()
+
+ fields = ['benchmark', 'baseline', 'contender', 'change %']
+ if not no_counters:
+ fields += ['counters']
+
+ df = df[fields]
+ if ren_counters:
+ df = df.rename(columns={'counters': 'configurations'})
+ df = df.sort_values(by='change %', ascending=False)
+
+ def labelled(title, df):
+ if len(df) == 0:
+ return ''
+ title += ': ({})'.format(len(df))
+ df_str = df.to_string(index=False)
+ bar = '-' * df_str.index('\n')
+ return '\n'.join([bar, title, bar, df_str])
+
+ return '\n\n'.join([labelled('Non-regressions', df[:first_regression]),
+ labelled('Regressions', df[first_regression:])])
+
+
+# ----------------------------------------------------------------------
+# Integration testing
+
+def _set_default(opt, default):
+ if opt is None:
+ return default
+ return opt
+
+
+@archery.command(short_help="Execute protocol and Flight integration tests")
+@click.option('--with-all', is_flag=True, default=False,
+ help=('Include all known languages by default '
+ 'in integration tests'))
+@click.option('--random-seed', type=int, default=12345,
+ help="Seed for PRNG when generating test data")
+@click.option('--with-cpp', type=bool, default=False,
+ help='Include C++ in integration tests')
+@click.option('--with-csharp', type=bool, default=False,
+ help='Include C# in integration tests')
+@click.option('--with-java', type=bool, default=False,
+ help='Include Java in integration tests')
+@click.option('--with-js', type=bool, default=False,
+ help='Include JavaScript in integration tests')
+@click.option('--with-go', type=bool, default=False,
+ help='Include Go in integration tests')
+@click.option('--with-rust', type=bool, default=False,
+ help='Include Rust in integration tests',
+ envvar="ARCHERY_INTEGRATION_WITH_RUST")
+@click.option('--write_generated_json', default=False,
+ help='Generate test JSON to indicated path')
+@click.option('--run-flight', is_flag=True, default=False,
+ help='Run Flight integration tests')
+@click.option('--debug', is_flag=True, default=False,
+ help='Run executables in debug mode as relevant')
+@click.option('--serial', is_flag=True, default=False,
+ help='Run tests serially, rather than in parallel')
+@click.option('--tempdir', default=None,
+ help=('Directory to use for writing '
+ 'integration test temporary files'))
+@click.option('stop_on_error', '-x', '--stop-on-error',
+ is_flag=True, default=False,
+ help='Stop on first error')
+@click.option('--gold-dirs', multiple=True,
+ help="gold integration test file paths")
+@click.option('-k', '--match',
+ help=("Substring for test names to include in run, "
+ "e.g. -k primitive"))
+def integration(with_all=False, random_seed=12345, **args):
+ from .integration.runner import write_js_test_json, run_all_tests
+ import numpy as np
+
+ # FIXME(bkietz) Include help strings for individual testers.
+ # For example, CPPTester's ARROW_CPP_EXE_PATH environment variable.
+
+ # Make runs involving data generation deterministic
+ np.random.seed(random_seed)
+
+ gen_path = args['write_generated_json']
+
+ languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust']
+
+ enabled_languages = 0
+ for lang in languages:
+ param = 'with_{}'.format(lang)
+ if with_all:
+ args[param] = with_all
+
+ if args[param]:
+ enabled_languages += 1
+
+ if gen_path:
+ try:
+ os.makedirs(gen_path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+ write_js_test_json(gen_path)
+ else:
+ if enabled_languages == 0:
+ raise Exception("Must enable at least 1 language to test")
+ run_all_tests(**args)
+
+
+@archery.command()
+@click.option('--event-name', '-n', required=True)
+@click.option('--event-payload', '-p', type=click.File('r', encoding='utf8'),
+ default='-', required=True)
+@click.option('--arrow-token', envvar='ARROW_GITHUB_TOKEN',
+ help='OAuth token for responding comment in the arrow repo')
+def trigger_bot(event_name, event_payload, arrow_token):
+ from .bot import CommentBot, actions
+
+ event_payload = json.loads(event_payload.read())
+
+ bot = CommentBot(name='github-actions', handler=actions, token=arrow_token)
+ bot.handle(event_name, event_payload)
+
+
+@archery.group('release')
+@click.option("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory.")
+@click.option("--jira-cache", type=click.Path(), default=None,
+ help="File path to cache queried JIRA issues per version.")
+@click.pass_obj
+def release(obj, src, jira_cache):
+ """Release releated commands."""
+ from .release import Jira, CachedJira
+
+ jira = Jira()
+ if jira_cache is not None:
+ jira = CachedJira(jira_cache, jira=jira)
+
+ obj['jira'] = jira
+ obj['repo'] = src.path
+
+
+@release.command('curate')
+@click.argument('version')
+@click.pass_obj
+def release_curate(obj, version):
+ """Release curation."""
+ from .release import Release
+
+ release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
+ curation = release.curate()
+
+ click.echo(curation.render('console'))
+
+
+@release.group('changelog')
+def release_changelog():
+ """Release changelog."""
+ pass
+
+
+@release_changelog.command('add')
+@click.argument('version')
+@click.pass_obj
+def release_changelog_add(obj, version):
+ """Prepend the changelog with the current release"""
+ from .release import Release
+
+ jira, repo = obj['jira'], obj['repo']
+
+ # just handle the current version
+ release = Release.from_jira(version, jira=jira, repo=repo)
+ if release.is_released:
+ raise ValueError('This version has been already released!')
+
+ changelog = release.changelog()
+ changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+
+ current_content = changelog_path.read_text()
+ new_content = changelog.render('markdown') + current_content
+
+ changelog_path.write_text(new_content)
+ click.echo("CHANGELOG.md is updated!")
+
+
+@release_changelog.command('generate')
+@click.argument('version')
+@click.argument('output', type=click.File('w', encoding='utf8'), default='-')
+@click.pass_obj
+def release_changelog_generate(obj, version, output):
+ """Generate the changelog of a specific release."""
+ from .release import Release
+
+ jira, repo = obj['jira'], obj['repo']
+
+ # just handle the current version
+ release = Release.from_jira(version, jira=jira, repo=repo)
+
+ changelog = release.changelog()
+ output.write(changelog.render('markdown'))
+
+
+@release_changelog.command('regenerate')
+@click.pass_obj
+def release_changelog_regenerate(obj):
+ """Regeneretate the whole CHANGELOG.md file"""
+ from .release import Release
+
+ jira, repo = obj['jira'], obj['repo']
+ changelogs = []
+
+ for version in jira.project_versions('ARROW'):
+ if not version.released:
+ continue
+ release = Release.from_jira(version, jira=jira, repo=repo)
+ click.echo('Querying changelog for version: {}'.format(version))
+ changelogs.append(release.changelog())
+
+ click.echo('Rendering new CHANGELOG.md file...')
+ changelog_path = pathlib.Path(repo) / 'CHANGELOG.md'
+ with changelog_path.open('w') as fp:
+ for cl in changelogs:
+ fp.write(cl.render('markdown'))
+
+
+@release.command('cherry-pick')
+@click.argument('version')
+@click.option('--dry-run/--execute', default=True,
+ help="Display the git commands instead of executing them.")
+@click.option('--recreate/--continue', default=True,
+ help="Recreate the maintenance branch or only apply unapplied "
+ "patches.")
+@click.pass_obj
+def release_cherry_pick(obj, version, dry_run, recreate):
+ """
+ Cherry pick commits.
+ """
+ from .release import Release, MinorRelease, PatchRelease
+
+ release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo'])
+ if not isinstance(release, (MinorRelease, PatchRelease)):
+ raise click.UsageError('Cherry-pick command only supported for minor '
+ 'and patch releases')
+
+ if not dry_run:
+ release.cherry_pick_commits(recreate_branch=recreate)
+ click.echo('Executed the following commands:\n')
+
+ click.echo(
+ 'git checkout {} -b {}'.format(release.previous.tag, release.branch)
+ )
+ for commit in release.commits_to_pick():
+ click.echo('git cherry-pick {}'.format(commit.hexsha))
+
+
+@archery.group("linking")
+@click.pass_obj
+def linking(obj):
+ """
+ Quick and dirty utilities for checking library linkage.
+ """
+ pass
+
+
+@linking.command("check-dependencies")
+@click.argument("paths", nargs=-1)
+@click.option("--allow", "-a", "allowed", multiple=True,
+ help="Name of the allowed libraries")
+@click.option("--disallow", "-d", "disallowed", multiple=True,
+ help="Name of the disallowed libraries")
+@click.pass_obj
+def linking_check_dependencies(obj, allowed, disallowed, paths):
+ from .linking import check_dynamic_library_dependencies, DependencyError
+
+ allowed, disallowed = set(allowed), set(disallowed)
+ try:
+ for path in map(pathlib.Path, paths):
+ check_dynamic_library_dependencies(path, allowed=allowed,
+ disallowed=disallowed)
+ except DependencyError as e:
+ raise click.ClickException(str(e))
+
+
+add_optional_command("docker", module=".docker.cli", function="docker",
+ parent=archery)
+add_optional_command("crossbow", module=".crossbow.cli", function="crossbow",
+ parent=archery)
+
+
+if __name__ == "__main__":
+ archery(obj={})
diff --git a/src/arrow/dev/archery/archery/compat.py b/src/arrow/dev/archery/archery/compat.py
new file mode 100644
index 000000000..bb0b15428
--- /dev/null
+++ b/src/arrow/dev/archery/archery/compat.py
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+import sys
+
+
+def _is_path_like(path):
+ # PEP519 filesystem path protocol is available from python 3.6, so pathlib
+ # doesn't implement __fspath__ for earlier versions
+ return (isinstance(path, str) or
+ hasattr(path, '__fspath__') or
+ isinstance(path, pathlib.Path))
+
+
+def _ensure_path(path):
+ if isinstance(path, pathlib.Path):
+ return path
+ else:
+ return pathlib.Path(_stringify_path(path))
+
+
+def _stringify_path(path):
+ """
+ Convert *path* to a string or unicode path if possible.
+ """
+ if isinstance(path, str):
+ return path
+
+ # checking whether path implements the filesystem protocol
+ try:
+ return path.__fspath__() # new in python 3.6
+ except AttributeError:
+ # fallback pathlib ckeck for earlier python versions than 3.6
+ if isinstance(path, pathlib.Path):
+ return str(path)
+
+ raise TypeError("not a path-like object")
+
+
+def _import_pandas():
+ # ARROW-13425: avoid importing PyArrow from Pandas
+ sys.modules['pyarrow'] = None
+ import pandas as pd
+ return pd
diff --git a/src/arrow/dev/archery/archery/crossbow/__init__.py b/src/arrow/dev/archery/archery/crossbow/__init__.py
new file mode 100644
index 000000000..bc72e81f0
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/__init__.py
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import Config, Repo, Queue, Target, Job # noqa
+from .reports import CommentReport, ConsoleReport, EmailReport # noqa
diff --git a/src/arrow/dev/archery/archery/crossbow/cli.py b/src/arrow/dev/archery/archery/crossbow/cli.py
new file mode 100644
index 000000000..1d0610343
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/cli.py
@@ -0,0 +1,365 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import click
+
+from .core import Config, Repo, Queue, Target, Job, CrossbowError
+from .reports import EmailReport, ConsoleReport
+from ..utils.source import ArrowSources
+
+
+_default_arrow_path = ArrowSources.find().path
+_default_queue_path = _default_arrow_path.parent / "crossbow"
+_default_config_path = _default_arrow_path / "dev" / "tasks" / "tasks.yml"
+
+
+@click.group()
+@click.option('--github-token', '-t', default=None,
+ envvar="CROSSBOW_GITHUB_TOKEN",
+ help='OAuth token for GitHub authentication')
+@click.option('--arrow-path', '-a',
+ type=click.Path(), default=_default_arrow_path,
+ help='Arrow\'s repository path. Defaults to the repository of '
+ 'this script')
+@click.option('--queue-path', '-q',
+ type=click.Path(), default=_default_queue_path,
+ help='The repository path used for scheduling the tasks. '
+ 'Defaults to crossbow directory placed next to arrow')
+@click.option('--queue-remote', '-qr', default=None,
+ help='Force to use this remote URL for the Queue repository')
+@click.option('--output-file', metavar='<output>',
+ type=click.File('w', encoding='utf8'), default='-',
+ help='Capture output result into file.')
+@click.pass_context
+def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote,
+ output_file):
+ """
+ Schedule packaging tasks or nightly builds on CI services.
+ """
+ ctx.ensure_object(dict)
+ ctx.obj['output'] = output_file
+ ctx.obj['arrow'] = Repo(arrow_path)
+ ctx.obj['queue'] = Queue(queue_path, remote_url=queue_remote,
+ github_token=github_token, require_https=True)
+
+
+@crossbow.command()
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.pass_obj
+def check_config(obj, config_path):
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(config_path)
+ config.validate()
+
+ output = obj['output']
+ config.show(output)
+
+
+@crossbow.command()
+@click.argument('tasks', nargs=-1, required=False)
+@click.option('--group', '-g', 'groups', multiple=True,
+ help='Submit task groups as defined in task.yml')
+@click.option('--param', '-p', 'params', multiple=True,
+ help='Additional task parameters for rendering the CI templates')
+@click.option('--job-prefix', default='build',
+ help='Arbitrary prefix for branch names, e.g. nightly')
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
+@click.option('--arrow-remote', '-r', default=None,
+ help='Set GitHub remote explicitly, which is going to be cloned '
+ 'on the CI services. Note, that no validation happens '
+ 'locally. Examples: https://github.com/apache/arrow or '
+ 'https://github.com/kszucs/arrow.')
+@click.option('--arrow-branch', '-b', default=None,
+ help='Give the branch name explicitly, e.g. master, ARROW-1949.')
+@click.option('--arrow-sha', '-t', default=None,
+ help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
+ 'apache-arrow-0.11.1.')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.option('--dry-run/--commit', default=False,
+ help='Just display the rendered CI configurations without '
+ 'committing them')
+@click.option('--no-push/--push', default=False,
+ help='Don\'t push the changes')
+@click.pass_obj
+def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version,
+ arrow_remote, arrow_branch, arrow_sha, fetch, dry_run, no_push):
+ output = obj['output']
+ queue, arrow = obj['queue'], obj['arrow']
+
+ # load available tasks configuration and groups from yaml
+ config = Config.load_yaml(config_path)
+ try:
+ config.validate()
+ except CrossbowError as e:
+ raise click.ClickException(str(e))
+
+ # Override the detected repo url / remote, branch and sha - this aims to
+ # make release procedure a bit simpler.
+ # Note, that the target resivion's crossbow templates must be
+ # compatible with the locally checked out version of crossbow (which is
+ # in case of the release procedure), because the templates still
+ # contain some business logic (dependency installation, deployments)
+ # which will be reduced to a single command in the future.
+ target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch,
+ head=arrow_sha, version=arrow_version)
+
+ # parse additional job parameters
+ params = dict([p.split("=") for p in params])
+
+ # instantiate the job object
+ try:
+ job = Job.from_config(config=config, target=target, tasks=tasks,
+ groups=groups, params=params)
+ except CrossbowError as e:
+ raise click.ClickException(str(e))
+
+ job.show(output)
+ if dry_run:
+ return
+
+ if fetch:
+ queue.fetch()
+ queue.put(job, prefix=job_prefix)
+
+ if no_push:
+ click.echo('Branches and commits created but not pushed: `{}`'
+ .format(job.branch))
+ else:
+ queue.push()
+ click.echo('Pushed job identifier is: `{}`'.format(job.branch))
+
+
+@crossbow.command()
+@click.argument('task', required=True)
+@click.option('--config-path', '-c',
+ type=click.Path(exists=True), default=_default_config_path,
+ help='Task configuration yml. Defaults to tasks.yml')
+@click.option('--arrow-version', '-v', default=None,
+ help='Set target version explicitly.')
+@click.option('--arrow-remote', '-r', default=None,
+ help='Set GitHub remote explicitly, which is going to be cloned '
+ 'on the CI services. Note, that no validation happens '
+ 'locally. Examples: https://github.com/apache/arrow or '
+ 'https://github.com/kszucs/arrow.')
+@click.option('--arrow-branch', '-b', default=None,
+ help='Give the branch name explicitly, e.g. master, ARROW-1949.')
+@click.option('--arrow-sha', '-t', default=None,
+ help='Set commit SHA or Tag name explicitly, e.g. f67a515, '
+ 'apache-arrow-0.11.1.')
+@click.option('--param', '-p', 'params', multiple=True,
+ help='Additional task parameters for rendering the CI templates')
+@click.pass_obj
+def render(obj, task, config_path, arrow_version, arrow_remote, arrow_branch,
+ arrow_sha, params):
+ """
+ Utility command to check the rendered CI templates.
+ """
+ from .core import _flatten
+
+ def highlight(code):
+ try:
+ from pygments import highlight
+ from pygments.lexers import YamlLexer
+ from pygments.formatters import TerminalFormatter
+ return highlight(code, YamlLexer(), TerminalFormatter())
+ except ImportError:
+ return code
+
+ arrow = obj['arrow']
+
+ target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch,
+ head=arrow_sha, version=arrow_version)
+ config = Config.load_yaml(config_path)
+ params = dict([p.split("=") for p in params])
+ params["queue_remote_url"] = "https://github.com/org/crossbow"
+ job = Job.from_config(config=config, target=target, tasks=[task],
+ params=params)
+
+ for task_name, rendered_files in job.render_tasks().items():
+ for path, content in _flatten(rendered_files).items():
+ click.echo('#' * 80)
+ click.echo('### {:^72} ###'.format("/".join(path)))
+ click.echo('#' * 80)
+ click.echo(highlight(content))
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.option('--task-filter', '-f', 'task_filters', multiple=True,
+ help='Glob pattern for filtering relevant tasks')
+@click.pass_obj
+def status(obj, job_name, fetch, task_filters):
+ output = obj['output']
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+ job = queue.get(job_name)
+
+ report = ConsoleReport(job, task_filters=task_filters)
+ report.show(output)
+
+
+@crossbow.command()
+@click.argument('prefix', required=True)
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def latest_prefix(obj, prefix, fetch):
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+ latest = queue.latest_for_prefix(prefix)
+ click.echo(latest.branch)
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('--sender-name', '-n',
+ help='Name to use for report e-mail.')
+@click.option('--sender-email', '-e',
+ help='E-mail to use for report e-mail.')
+@click.option('--recipient-email', '-r',
+ help='Where to send the e-mail report')
+@click.option('--smtp-user', '-u',
+ help='E-mail address to use for SMTP login')
+@click.option('--smtp-password', '-P',
+ help='SMTP password to use for report e-mail.')
+@click.option('--smtp-server', '-s', default='smtp.gmail.com',
+ help='SMTP server to use for report e-mail.')
+@click.option('--smtp-port', '-p', default=465,
+ help='SMTP port to use for report e-mail.')
+@click.option('--poll/--no-poll', default=False,
+ help='Wait for completion if there are tasks pending')
+@click.option('--poll-max-minutes', default=180,
+ help='Maximum amount of time waiting for job completion')
+@click.option('--poll-interval-minutes', default=10,
+ help='Number of minutes to wait to check job status again')
+@click.option('--send/--dry-run', default=False,
+ help='Just display the report, don\'t send it')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.pass_obj
+def report(obj, job_name, sender_name, sender_email, recipient_email,
+ smtp_user, smtp_password, smtp_server, smtp_port, poll,
+ poll_max_minutes, poll_interval_minutes, send, fetch):
+ """
+ Send an e-mail report showing success/failure of tasks in a Crossbow run
+ """
+ output = obj['output']
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+
+ job = queue.get(job_name)
+ report = EmailReport(
+ job=job,
+ sender_name=sender_name,
+ sender_email=sender_email,
+ recipient_email=recipient_email
+ )
+
+ if poll:
+ job.wait_until_finished(
+ poll_max_minutes=poll_max_minutes,
+ poll_interval_minutes=poll_interval_minutes
+ )
+
+ if send:
+ report.send(
+ smtp_user=smtp_user,
+ smtp_password=smtp_password,
+ smtp_server=smtp_server,
+ smtp_port=smtp_port
+ )
+ else:
+ report.show(output)
+
+
+@crossbow.command()
+@click.argument('job-name', required=True)
+@click.option('-t', '--target-dir',
+ default=_default_arrow_path / 'packages',
+ type=click.Path(file_okay=False, dir_okay=True),
+ help='Directory to download the build artifacts')
+@click.option('--dry-run/--execute', default=False,
+ help='Just display process, don\'t download anything')
+@click.option('--fetch/--no-fetch', default=True,
+ help='Fetch references (branches and tags) from the remote')
+@click.option('--task-filter', '-f', 'task_filters', multiple=True,
+ help='Glob pattern for filtering relevant tasks')
+@click.option('--validate-patterns/--skip-pattern-validation', default=True,
+ help='Whether to validate artifact name patterns or not')
+@click.pass_obj
+def download_artifacts(obj, job_name, target_dir, dry_run, fetch,
+ validate_patterns, task_filters):
+ """Download build artifacts from GitHub releases"""
+ output = obj['output']
+
+ # fetch the queue repository
+ queue = obj['queue']
+ if fetch:
+ queue.fetch()
+
+ # query the job's artifacts
+ job = queue.get(job_name)
+
+ # create directory to download the assets to
+ target_dir = Path(target_dir).absolute() / job_name
+ target_dir.mkdir(parents=True, exist_ok=True)
+
+ # download the assets while showing the job status
+ def asset_callback(task_name, task, asset):
+ if asset is not None:
+ path = target_dir / task_name / asset.name
+ path.parent.mkdir(exist_ok=True)
+ if not dry_run:
+ asset.download(path)
+
+ click.echo('Downloading {}\'s artifacts.'.format(job_name))
+ click.echo('Destination directory is {}'.format(target_dir))
+ click.echo()
+
+ report = ConsoleReport(job, task_filters=task_filters)
+ report.show(
+ output,
+ asset_callback=asset_callback,
+ validate_patterns=validate_patterns
+ )
+
+
+@crossbow.command()
+@click.argument('patterns', nargs=-1, required=True)
+@click.option('--sha', required=True, help='Target committish')
+@click.option('--tag', required=True, help='Target tag')
+@click.option('--method', default='curl', help='Use cURL to upload')
+@click.pass_obj
+def upload_artifacts(obj, tag, sha, patterns, method):
+ queue = obj['queue']
+ queue.github_overwrite_release_assets(
+ tag_name=tag, target_commitish=sha, method=method, patterns=patterns
+ )
diff --git a/src/arrow/dev/archery/archery/crossbow/core.py b/src/arrow/dev/archery/archery/crossbow/core.py
new file mode 100644
index 000000000..0f2309e47
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/core.py
@@ -0,0 +1,1172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import fnmatch
+import glob
+import time
+import logging
+import mimetypes
+import subprocess
+import textwrap
+from io import StringIO
+from pathlib import Path
+from datetime import date
+
+import jinja2
+from ruamel.yaml import YAML
+
+try:
+ import github3
+ _have_github3 = True
+except ImportError:
+ github3 = object
+ _have_github3 = False
+
+try:
+ import pygit2
+except ImportError:
+ PygitRemoteCallbacks = object
+else:
+ PygitRemoteCallbacks = pygit2.RemoteCallbacks
+
+from ..utils.source import ArrowSources
+
+
+for pkg in ["requests", "urllib3", "github3"]:
+ logging.getLogger(pkg).setLevel(logging.WARNING)
+
+logger = logging.getLogger("crossbow")
+
+
+class CrossbowError(Exception):
+ pass
+
+
+def _flatten(mapping):
+ """Converts a hierarchical mapping to a flat dictionary"""
+ result = {}
+ for k, v in mapping.items():
+ if isinstance(v, dict):
+ for ik, iv in _flatten(v).items():
+ ik = ik if isinstance(ik, tuple) else (ik,)
+ result[(k,) + ik] = iv
+ elif isinstance(v, list):
+ for ik, iv in enumerate(_flatten(v)):
+ ik = ik if isinstance(ik, tuple) else (ik,)
+ result[(k,) + ik] = iv
+ else:
+ result[(k,)] = v
+ return result
+
+
+def _unflatten(mapping):
+ """Converts a flat tuple => object mapping to hierarchical one"""
+ result = {}
+ for path, value in mapping.items():
+ parents, leaf = path[:-1], path[-1]
+ # create the hierarchy until we reach the leaf value
+ temp = result
+ for parent in parents:
+ temp.setdefault(parent, {})
+ temp = temp[parent]
+ # set the leaf value
+ temp[leaf] = value
+
+ return result
+
+
+def _unflatten_tree(files):
+ """Converts a flat path => object mapping to a hierarchical directories
+
+ Input:
+ {
+ 'path/to/file.a': a_content,
+ 'path/to/file.b': b_content,
+ 'path/file.c': c_content
+ }
+ Output:
+ {
+ 'path': {
+ 'to': {
+ 'file.a': a_content,
+ 'file.b': b_content
+ },
+ 'file.c': c_content
+ }
+ }
+ """
+ files = {tuple(k.split('/')): v for k, v in files.items()}
+ return _unflatten(files)
+
+
+def _render_jinja_template(searchpath, template, params):
+ def format_all(items, pattern):
+ return [pattern.format(item) for item in items]
+
+ loader = jinja2.FileSystemLoader(searchpath)
+ env = jinja2.Environment(loader=loader, trim_blocks=True,
+ lstrip_blocks=True,
+ undefined=jinja2.StrictUndefined)
+ env.filters['format_all'] = format_all
+ template = env.get_template(template)
+ return template.render(**params)
+
+
+# configurations for setting up branch skipping
+# - appveyor has a feature to skip builds without an appveyor.yml
+# - travis reads from the master branch and applies the rules
+# - circle requires the configuration to be present on all branch, even ones
+# that are configured to be skipped
+# - azure skips branches without azure-pipelines.yml by default
+# - github skips branches without .github/workflows/ by default
+
+_default_travis_yml = """
+branches:
+ only:
+ - master
+ - /.*-travis-.*/
+
+os: linux
+dist: trusty
+language: generic
+"""
+
+_default_circle_yml = """
+version: 2
+
+jobs:
+ build:
+ machine: true
+
+workflows:
+ version: 2
+ build:
+ jobs:
+ - build:
+ filters:
+ branches:
+ only:
+ - /.*-circle-.*/
+"""
+
+_default_tree = {
+ '.travis.yml': _default_travis_yml,
+ '.circleci/config.yml': _default_circle_yml
+}
+
+
+class GitRemoteCallbacks(PygitRemoteCallbacks):
+
+ def __init__(self, token):
+ self.token = token
+ self.attempts = 0
+ super().__init__()
+
+ def push_update_reference(self, refname, message):
+ pass
+
+ def update_tips(self, refname, old, new):
+ pass
+
+ def credentials(self, url, username_from_url, allowed_types):
+ # its a libgit2 bug, that it infinitely retries the authentication
+ self.attempts += 1
+
+ if self.attempts >= 5:
+ # pygit2 doesn't propagate the exception properly
+ msg = 'Wrong oauth personal access token'
+ print(msg)
+ raise CrossbowError(msg)
+
+ if (allowed_types &
+ pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT):
+ return pygit2.UserPass(self.token, 'x-oauth-basic')
+ else:
+ return None
+
+
+def _git_ssh_to_https(url):
+ return url.replace('git@github.com:', 'https://github.com/')
+
+
+class Repo:
+ """
+ Base class for interaction with local git repositories
+
+ A high level wrapper used for both reading revision information from
+ arrow's repository and pushing continuous integration tasks to the queue
+ repository.
+
+ Parameters
+ ----------
+ require_https : boolean, default False
+ Raise exception for SSH origin URLs
+ """
+
+ def __init__(self, path, github_token=None, remote_url=None,
+ require_https=False):
+ self.path = Path(path)
+ self.github_token = github_token
+ self.require_https = require_https
+ self._remote_url = remote_url
+ self._pygit_repo = None
+ self._github_repo = None # set by as_github_repo()
+ self._updated_refs = []
+
+ def __str__(self):
+ tpl = textwrap.dedent('''
+ Repo: {remote}@{branch}
+ Commit: {head}
+ ''')
+ return tpl.format(
+ remote=self.remote_url,
+ branch=self.branch.branch_name,
+ head=self.head
+ )
+
+ @property
+ def repo(self):
+ if self._pygit_repo is None:
+ self._pygit_repo = pygit2.Repository(str(self.path))
+ return self._pygit_repo
+
+ @property
+ def origin(self):
+ remote = self.repo.remotes['origin']
+ if self.require_https and remote.url.startswith('git@github.com'):
+ raise CrossbowError("Change SSH origin URL to HTTPS to use "
+ "Crossbow: {}".format(remote.url))
+ return remote
+
+ def fetch(self):
+ refspec = '+refs/heads/*:refs/remotes/origin/*'
+ self.origin.fetch([refspec])
+
+ def push(self, refs=None, github_token=None):
+ github_token = github_token or self.github_token
+ if github_token is None:
+ raise RuntimeError(
+ 'Could not determine GitHub token. Please set the '
+ 'CROSSBOW_GITHUB_TOKEN environment variable to a '
+ 'valid GitHub access token or pass one to --github-token.'
+ )
+ callbacks = GitRemoteCallbacks(github_token)
+ refs = refs or []
+ try:
+ self.origin.push(refs + self._updated_refs, callbacks=callbacks)
+ except pygit2.GitError:
+ raise RuntimeError('Failed to push updated references, '
+ 'potentially because of credential issues: {}'
+ .format(self._updated_refs))
+ else:
+ self.updated_refs = []
+
+ @property
+ def head(self):
+ """Currently checked out commit's sha"""
+ return self.repo.head
+
+ @property
+ def branch(self):
+ """Currently checked out branch"""
+ try:
+ return self.repo.branches[self.repo.head.shorthand]
+ except KeyError:
+ return None # detached
+
+ @property
+ def remote(self):
+ """Currently checked out branch's remote counterpart"""
+ try:
+ return self.repo.remotes[self.branch.upstream.remote_name]
+ except (AttributeError, KeyError):
+ return None # cannot detect
+
+ @property
+ def remote_url(self):
+ """Currently checked out branch's remote counterpart URL
+
+ If an SSH github url is set, it will be replaced by the https
+ equivalent usable with GitHub OAuth token.
+ """
+ try:
+ return self._remote_url or _git_ssh_to_https(self.remote.url)
+ except AttributeError:
+ return None
+
+ @property
+ def user_name(self):
+ try:
+ return next(self.repo.config.get_multivar('user.name'))
+ except StopIteration:
+ return os.environ.get('GIT_COMMITTER_NAME', 'unknown')
+
+ @property
+ def user_email(self):
+ try:
+ return next(self.repo.config.get_multivar('user.email'))
+ except StopIteration:
+ return os.environ.get('GIT_COMMITTER_EMAIL', 'unknown')
+
+ @property
+ def signature(self):
+ return pygit2.Signature(self.user_name, self.user_email,
+ int(time.time()))
+
+ def create_tree(self, files):
+ builder = self.repo.TreeBuilder()
+
+ for filename, content in files.items():
+ if isinstance(content, dict):
+ # create a subtree
+ tree_id = self.create_tree(content)
+ builder.insert(filename, tree_id, pygit2.GIT_FILEMODE_TREE)
+ else:
+ # create a file
+ blob_id = self.repo.create_blob(content)
+ builder.insert(filename, blob_id, pygit2.GIT_FILEMODE_BLOB)
+
+ tree_id = builder.write()
+ return tree_id
+
+ def create_commit(self, files, parents=None, message='',
+ reference_name=None):
+ if parents is None:
+ # by default use the main branch as the base of the new branch
+ # required to reuse github actions cache across crossbow tasks
+ commit, _ = self.repo.resolve_refish("master")
+ parents = [commit.id]
+ tree_id = self.create_tree(files)
+
+ author = committer = self.signature
+ commit_id = self.repo.create_commit(reference_name, author, committer,
+ message, tree_id, parents)
+ return self.repo[commit_id]
+
+ def create_branch(self, branch_name, files, parents=None, message='',
+ signature=None):
+ # create commit with the passed tree
+ commit = self.create_commit(files, parents=parents, message=message)
+
+ # create branch pointing to the previously created commit
+ branch = self.repo.create_branch(branch_name, commit)
+
+ # append to the pushable references
+ self._updated_refs.append('refs/heads/{}'.format(branch_name))
+
+ return branch
+
+ def create_tag(self, tag_name, commit_id, message=''):
+ tag_id = self.repo.create_tag(tag_name, commit_id,
+ pygit2.GIT_OBJ_COMMIT, self.signature,
+ message)
+
+ # append to the pushable references
+ self._updated_refs.append('refs/tags/{}'.format(tag_name))
+
+ return self.repo[tag_id]
+
+ def file_contents(self, commit_id, file):
+ commit = self.repo[commit_id]
+ entry = commit.tree[file]
+ blob = self.repo[entry.id]
+ return blob.data
+
+ def _parse_github_user_repo(self):
+ m = re.match(r'.*\/([^\/]+)\/([^\/\.]+)(\.git)?$', self.remote_url)
+ if m is None:
+ raise CrossbowError(
+ "Unable to parse the github owner and repository from the "
+ "repository's remote url '{}'".format(self.remote_url)
+ )
+ user, repo = m.group(1), m.group(2)
+ return user, repo
+
+ def as_github_repo(self, github_token=None):
+ """Converts it to a repository object which wraps the GitHub API"""
+ if self._github_repo is None:
+ if not _have_github3:
+ raise ImportError('Must install github3.py')
+ github_token = github_token or self.github_token
+ username, reponame = self._parse_github_user_repo()
+ session = github3.session.GitHubSession(
+ default_connect_timeout=10,
+ default_read_timeout=30
+ )
+ github = github3.GitHub(session=session)
+ github.login(token=github_token)
+ self._github_repo = github.repository(username, reponame)
+ return self._github_repo
+
+ def github_commit(self, sha):
+ repo = self.as_github_repo()
+ return repo.commit(sha)
+
+ def github_release(self, tag):
+ repo = self.as_github_repo()
+ try:
+ return repo.release_from_tag(tag)
+ except github3.exceptions.NotFoundError:
+ return None
+
+ def github_upload_asset_requests(self, release, path, name, mime,
+ max_retries=None, retry_backoff=None):
+ if max_retries is None:
+ max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
+ if retry_backoff is None:
+ retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
+
+ for i in range(max_retries):
+ try:
+ with open(path, 'rb') as fp:
+ result = release.upload_asset(name=name, asset=fp,
+ content_type=mime)
+ except github3.exceptions.ResponseError as e:
+ logger.error('Attempt {} has failed with message: {}.'
+ .format(i + 1, str(e)))
+ logger.error('Error message {}'.format(e.msg))
+ logger.error('List of errors provided by Github:')
+ for err in e.errors:
+ logger.error(' - {}'.format(err))
+
+ if e.code == 422:
+ # 422 Validation Failed, probably raised because
+ # ReleaseAsset already exists, so try to remove it before
+ # reattempting the asset upload
+ for asset in release.assets():
+ if asset.name == name:
+ logger.info('Release asset {} already exists, '
+ 'removing it...'.format(name))
+ asset.delete()
+ logger.info('Asset {} removed.'.format(name))
+ break
+ except github3.exceptions.ConnectionError as e:
+ logger.error('Attempt {} has failed with message: {}.'
+ .format(i + 1, str(e)))
+ else:
+ logger.info('Attempt {} has finished.'.format(i + 1))
+ return result
+
+ time.sleep(retry_backoff)
+
+ raise RuntimeError('Github asset uploading has failed!')
+
+ def github_upload_asset_curl(self, release, path, name, mime):
+ upload_url, _ = release.upload_url.split('{?')
+ upload_url += '?name={}'.format(name)
+
+ command = [
+ 'curl',
+ '--fail',
+ '-H', "Authorization: token {}".format(self.github_token),
+ '-H', "Content-Type: {}".format(mime),
+ '--data-binary', '@{}'.format(path),
+ upload_url
+ ]
+ return subprocess.run(command, shell=False, check=True)
+
+ def github_overwrite_release_assets(self, tag_name, target_commitish,
+ patterns, method='requests'):
+ # Since github has changed something the asset uploading via requests
+ # got instable, so prefer the cURL alternative.
+ # Potential cause:
+ # sigmavirus24/github3.py/issues/779#issuecomment-379470626
+ repo = self.as_github_repo()
+ if not tag_name:
+ raise CrossbowError('Empty tag name')
+ if not target_commitish:
+ raise CrossbowError('Empty target commit for the release tag')
+
+ # remove the whole release if it already exists
+ try:
+ release = repo.release_from_tag(tag_name)
+ except github3.exceptions.NotFoundError:
+ pass
+ else:
+ release.delete()
+
+ release = repo.create_release(tag_name, target_commitish)
+ for pattern in patterns:
+ for path in glob.glob(pattern, recursive=True):
+ name = os.path.basename(path)
+ size = os.path.getsize(path)
+ mime = mimetypes.guess_type(name)[0] or 'application/zip'
+
+ logger.info(
+ 'Uploading asset `{}` with mimetype {} and size {}...'
+ .format(name, mime, size)
+ )
+
+ if method == 'requests':
+ self.github_upload_asset_requests(release, path, name=name,
+ mime=mime)
+ elif method == 'curl':
+ self.github_upload_asset_curl(release, path, name=name,
+ mime=mime)
+ else:
+ raise CrossbowError(
+ 'Unsupported upload method {}'.format(method)
+ )
+
+
+class Queue(Repo):
+
+ def _latest_prefix_id(self, prefix):
+ pattern = re.compile(r'[\w\/-]*{}-(\d+)'.format(prefix))
+ matches = list(filter(None, map(pattern.match, self.repo.branches)))
+ if matches:
+ latest = max(int(m.group(1)) for m in matches)
+ else:
+ latest = -1
+ return latest
+
+ def _next_job_id(self, prefix):
+ """Auto increments the branch's identifier based on the prefix"""
+ latest_id = self._latest_prefix_id(prefix)
+ return '{}-{}'.format(prefix, latest_id + 1)
+
+ def latest_for_prefix(self, prefix):
+ latest_id = self._latest_prefix_id(prefix)
+ if latest_id < 0:
+ raise RuntimeError(
+ 'No job has been submitted with prefix {} yet'.format(prefix)
+ )
+ job_name = '{}-{}'.format(prefix, latest_id)
+ return self.get(job_name)
+
+ def date_of(self, job):
+ # it'd be better to bound to the queue repository on deserialization
+ # and reorganize these methods to Job
+ branch_name = 'origin/{}'.format(job.branch)
+ branch = self.repo.branches[branch_name]
+ commit = self.repo[branch.target]
+ return date.fromtimestamp(commit.commit_time)
+
+ def jobs(self, pattern):
+ """Return jobs sorted by its identifier in reverse order"""
+ job_names = []
+ for name in self.repo.branches.remote:
+ origin, name = name.split('/', 1)
+ result = re.match(pattern, name)
+ if result:
+ job_names.append(name)
+
+ for name in sorted(job_names, reverse=True):
+ yield self.get(name)
+
+ def get(self, job_name):
+ branch_name = 'origin/{}'.format(job_name)
+ branch = self.repo.branches[branch_name]
+ try:
+ content = self.file_contents(branch.target, 'job.yml')
+ except KeyError:
+ raise CrossbowError(
+ 'No job is found with name: {}'.format(job_name)
+ )
+
+ buffer = StringIO(content.decode('utf-8'))
+ job = yaml.load(buffer)
+ job.queue = self
+ return job
+
+ def put(self, job, prefix='build'):
+ if not isinstance(job, Job):
+ raise CrossbowError('`job` must be an instance of Job')
+ if job.branch is not None:
+ raise CrossbowError('`job.branch` is automatically generated, '
+ 'thus it must be blank')
+
+ if job.target.remote is None:
+ raise CrossbowError(
+ 'Cannot determine git remote for the Arrow repository to '
+ 'clone or push to, try to push the `{}` branch first to have '
+ 'a remote tracking counterpart.'.format(job.target.branch)
+ )
+ if job.target.branch is None:
+ raise CrossbowError(
+ 'Cannot determine the current branch of the Arrow repository '
+ 'to clone or push to, perhaps it is in detached HEAD state. '
+ 'Please checkout a branch.'
+ )
+
+ # auto increment and set next job id, e.g. build-85
+ job._queue = self
+ job.branch = self._next_job_id(prefix)
+
+ # create tasks' branches
+ for task_name, task in job.tasks.items():
+ # adding CI's name to the end of the branch in order to use skip
+ # patterns on travis and circleci
+ task.branch = '{}-{}-{}'.format(job.branch, task.ci, task_name)
+ params = {
+ **job.params,
+ "arrow": job.target,
+ "queue_remote_url": self.remote_url
+ }
+ files = task.render_files(job.template_searchpath, params=params)
+ branch = self.create_branch(task.branch, files=files)
+ self.create_tag(task.tag, branch.target)
+ task.commit = str(branch.target)
+
+ # create job's branch with its description
+ return self.create_branch(job.branch, files=job.render_files())
+
+
+def get_version(root, **kwargs):
+ """
+ Parse function for setuptools_scm that ignores tags for non-C++
+ subprojects, e.g. apache-arrow-js-XXX tags.
+ """
+ from setuptools_scm.git import parse as parse_git_version
+
+ # query the calculated version based on the git tags
+ kwargs['describe_command'] = (
+ 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
+ )
+ version = parse_git_version(root, **kwargs)
+ tag = str(version.tag)
+
+ # We may get a development tag for the next version, such as "5.0.0.dev0",
+ # or the tag of an already released version, such as "4.0.0".
+ # In the latter case, we need to increment the version so that the computed
+ # version comes after any patch release (the next feature version after
+ # 4.0.0 is 5.0.0).
+ pattern = r"^(\d+)\.(\d+)\.(\d+)"
+ match = re.match(pattern, tag)
+ major, minor, patch = map(int, match.groups())
+ if 'dev' not in tag:
+ major += 1
+
+ return "{}.{}.{}.dev{}".format(major, minor, patch, version.distance)
+
+
+class Serializable:
+
+ @classmethod
+ def to_yaml(cls, representer, data):
+ tag = '!{}'.format(cls.__name__)
+ dct = {k: v for k, v in data.__dict__.items() if not k.startswith('_')}
+ return representer.represent_mapping(tag, dct)
+
+
+class Target(Serializable):
+ """
+ Describes target repository and revision the builds run against
+
+ This serializable data container holding information about arrow's
+ git remote, branch, sha and version number as well as some metadata
+ (currently only an email address where the notification should be sent).
+ """
+
+ def __init__(self, head, branch, remote, version, email=None):
+ self.head = head
+ self.email = email
+ self.branch = branch
+ self.remote = remote
+ self.version = version
+ self.no_rc_version = re.sub(r'-rc\d+\Z', '', version)
+ # Semantic Versioning 1.0.0: https://semver.org/spec/v1.0.0.html
+ #
+ # > A pre-release version number MAY be denoted by appending an
+ # > arbitrary string immediately following the patch version and a
+ # > dash. The string MUST be comprised of only alphanumerics plus
+ # > dash [0-9A-Za-z-].
+ #
+ # Example:
+ #
+ # '0.16.1.dev10' ->
+ # '0.16.1-dev10'
+ self.no_rc_semver_version = \
+ re.sub(r'\.(dev\d+)\Z', r'-\1', self.no_rc_version)
+
+ @classmethod
+ def from_repo(cls, repo, head=None, branch=None, remote=None, version=None,
+ email=None):
+ """Initialize from a repository
+
+ Optionally override detected remote, branch, head, and/or version.
+ """
+ assert isinstance(repo, Repo)
+
+ if head is None:
+ head = str(repo.head.target)
+ if branch is None:
+ branch = repo.branch.branch_name
+ if remote is None:
+ remote = repo.remote_url
+ if version is None:
+ version = get_version(repo.path)
+ if email is None:
+ email = repo.user_email
+
+ return cls(head=head, email=email, branch=branch, remote=remote,
+ version=version)
+
+
+class Task(Serializable):
+ """
+ Describes a build task and metadata required to render CI templates
+
+ A task is represented as a single git commit and branch containing jinja2
+ rendered files (currently appveyor.yml or .travis.yml configurations).
+
+ A task can't be directly submitted to a queue, must belong to a job.
+ Each task's unique identifier is its branch name, which is generated after
+ submitting the job to a queue.
+ """
+
+ def __init__(self, ci, template, artifacts=None, params=None):
+ assert ci in {
+ 'circle',
+ 'travis',
+ 'appveyor',
+ 'azure',
+ 'github',
+ 'drone',
+ }
+ self.ci = ci
+ self.template = template
+ self.artifacts = artifacts or []
+ self.params = params or {}
+ self.branch = None # filled after adding to a queue
+ self.commit = None # filled after adding to a queue
+ self._queue = None # set by the queue object after put or get
+ self._status = None # status cache
+ self._assets = None # assets cache
+
+ def render_files(self, searchpath, params=None):
+ params = {**self.params, **(params or {}), "task": self}
+ try:
+ rendered = _render_jinja_template(searchpath, self.template,
+ params=params)
+ except jinja2.TemplateError as e:
+ raise RuntimeError(
+ 'Failed to render template `{}` with {}: {}'.format(
+ self.template, e.__class__.__name__, str(e)
+ )
+ )
+
+ tree = {**_default_tree, self.filename: rendered}
+ return _unflatten_tree(tree)
+
+ @property
+ def tag(self):
+ return self.branch
+
+ @property
+ def filename(self):
+ config_files = {
+ 'circle': '.circleci/config.yml',
+ 'travis': '.travis.yml',
+ 'appveyor': 'appveyor.yml',
+ 'azure': 'azure-pipelines.yml',
+ 'github': '.github/workflows/crossbow.yml',
+ 'drone': '.drone.yml',
+ }
+ return config_files[self.ci]
+
+ def status(self, force_query=False):
+ _status = getattr(self, '_status', None)
+ if force_query or _status is None:
+ github_commit = self._queue.github_commit(self.commit)
+ self._status = TaskStatus(github_commit)
+ return self._status
+
+ def assets(self, force_query=False, validate_patterns=True):
+ _assets = getattr(self, '_assets', None)
+ if force_query or _assets is None:
+ github_release = self._queue.github_release(self.tag)
+ self._assets = TaskAssets(github_release,
+ artifact_patterns=self.artifacts,
+ validate_patterns=validate_patterns)
+ return self._assets
+
+
+class TaskStatus:
+ """
+ Combine the results from status and checks API to a single state.
+
+ Azure pipelines uses checks API which doesn't provide a combined
+ interface like status API does, so we need to manually combine
+ both the commit statuses and the commit checks coming from
+ different API endpoint
+
+ Status.state: error, failure, pending or success, default pending
+ CheckRun.status: queued, in_progress or completed, default: queued
+ CheckRun.conclusion: success, failure, neutral, cancelled, timed_out
+ or action_required, only set if
+ CheckRun.status == 'completed'
+
+ 1. Convert CheckRun's status and conclusion to one of Status.state
+ 2. Merge the states based on the following rules:
+ - failure if any of the contexts report as error or failure
+ - pending if there are no statuses or a context is pending
+ - success if the latest status for all contexts is success
+ error otherwise.
+
+ Parameters
+ ----------
+ commit : github3.Commit
+ Commit to query the combined status for.
+
+ Returns
+ -------
+ TaskStatus(
+ combined_state='error|failure|pending|success',
+ github_status='original github status object',
+ github_check_runs='github checks associated with the commit',
+ total_count='number of statuses and checks'
+ )
+ """
+
+ def __init__(self, commit):
+ status = commit.status()
+ check_runs = list(commit.check_runs())
+ states = [s.state for s in status.statuses]
+
+ for check in check_runs:
+ if check.status == 'completed':
+ if check.conclusion in {'success', 'failure'}:
+ states.append(check.conclusion)
+ elif check.conclusion in {'cancelled', 'timed_out',
+ 'action_required'}:
+ states.append('error')
+ # omit `neutral` conclusion
+ else:
+ states.append('pending')
+
+ # it could be more effective, but the following is more descriptive
+ combined_state = 'error'
+ if len(states):
+ if any(state in {'error', 'failure'} for state in states):
+ combined_state = 'failure'
+ elif any(state == 'pending' for state in states):
+ combined_state = 'pending'
+ elif all(state == 'success' for state in states):
+ combined_state = 'success'
+
+ # show link to the actual build, some of the CI providers implement
+ # the statuses API others implement the checks API, so display both
+ build_links = [s.target_url for s in status.statuses]
+ build_links += [c.html_url for c in check_runs]
+
+ self.combined_state = combined_state
+ self.github_status = status
+ self.github_check_runs = check_runs
+ self.total_count = len(states)
+ self.build_links = build_links
+
+
+class TaskAssets(dict):
+
+ def __init__(self, github_release, artifact_patterns,
+ validate_patterns=True):
+ # HACK(kszucs): don't expect uploaded assets of no atifacts were
+ # defiened for the tasks in order to spare a bit of github rate limit
+ if not artifact_patterns:
+ return
+
+ if github_release is None:
+ github_assets = {} # no assets have been uploaded for the task
+ else:
+ github_assets = {a.name: a for a in github_release.assets()}
+
+ if not validate_patterns:
+ # shortcut to avoid pattern validation and just set all artifacts
+ return self.update(github_assets)
+
+ for pattern in artifact_patterns:
+ # artifact can be a regex pattern
+ compiled = re.compile(f"^{pattern}$")
+ matches = list(
+ filter(None, map(compiled.match, github_assets.keys()))
+ )
+ num_matches = len(matches)
+
+ # validate artifact pattern matches single asset
+ if num_matches == 0:
+ self[pattern] = None
+ elif num_matches == 1:
+ self[pattern] = github_assets[matches[0].group(0)]
+ else:
+ raise CrossbowError(
+ 'Only a single asset should match pattern `{}`, there are '
+ 'multiple ones: {}'.format(pattern, ', '.join(matches))
+ )
+
+ def missing_patterns(self):
+ return [pattern for pattern, asset in self.items() if asset is None]
+
+ def uploaded_assets(self):
+ return [asset for asset in self.values() if asset is not None]
+
+
+class Job(Serializable):
+ """Describes multiple tasks against a single target repository"""
+
+ def __init__(self, target, tasks, params=None, template_searchpath=None):
+ if not tasks:
+ raise ValueError('no tasks were provided for the job')
+ if not all(isinstance(task, Task) for task in tasks.values()):
+ raise ValueError('each `tasks` mus be an instance of Task')
+ if not isinstance(target, Target):
+ raise ValueError('`target` must be an instance of Target')
+ if not isinstance(target, Target):
+ raise ValueError('`target` must be an instance of Target')
+ if not isinstance(params, dict):
+ raise ValueError('`params` must be an instance of dict')
+
+ self.target = target
+ self.tasks = tasks
+ self.params = params or {} # additional parameters for the tasks
+ self.branch = None # filled after adding to a queue
+ self._queue = None # set by the queue object after put or get
+ if template_searchpath is None:
+ self._template_searchpath = ArrowSources.find().path
+ else:
+ self._template_searchpath = template_searchpath
+
+ def render_files(self):
+ with StringIO() as buf:
+ yaml.dump(self, buf)
+ content = buf.getvalue()
+ tree = {**_default_tree, "job.yml": content}
+ return _unflatten_tree(tree)
+
+ def render_tasks(self, params=None):
+ result = {}
+ params = {
+ **self.params,
+ "arrow": self.target,
+ **(params or {})
+ }
+ for task_name, task in self.tasks.items():
+ files = task.render_files(self._template_searchpath, params)
+ result[task_name] = files
+ return result
+
+ @property
+ def template_searchpath(self):
+ return self._template_searchpath
+
+ @property
+ def queue(self):
+ assert isinstance(self._queue, Queue)
+ return self._queue
+
+ @queue.setter
+ def queue(self, queue):
+ assert isinstance(queue, Queue)
+ self._queue = queue
+ for task in self.tasks.values():
+ task._queue = queue
+
+ @property
+ def email(self):
+ return os.environ.get('CROSSBOW_EMAIL', self.target.email)
+
+ @property
+ def date(self):
+ return self.queue.date_of(self)
+
+ def show(self, stream=None):
+ return yaml.dump(self, stream=stream)
+
+ @classmethod
+ def from_config(cls, config, target, tasks=None, groups=None, params=None):
+ """
+ Intantiate a job from based on a config.
+
+ Parameters
+ ----------
+ config : dict
+ Deserialized content of tasks.yml
+ target : Target
+ Describes target repository and revision the builds run against.
+ tasks : Optional[List[str]], default None
+ List of glob patterns for matching task names.
+ groups : Optional[List[str]], default None
+ List of exact group names matching predefined task sets in the
+ config.
+ params : Optional[Dict[str, str]], default None
+ Additional rendering parameters for the task templates.
+
+ Returns
+ -------
+ Job
+
+ Raises
+ ------
+ Exception:
+ If invalid groups or tasks has been passed.
+ """
+ task_definitions = config.select(tasks, groups=groups)
+
+ # instantiate the tasks
+ tasks = {}
+ versions = {'version': target.version,
+ 'no_rc_version': target.no_rc_version,
+ 'no_rc_semver_version': target.no_rc_semver_version}
+ for task_name, task in task_definitions.items():
+ artifacts = task.pop('artifacts', None) or [] # because of yaml
+ artifacts = [fn.format(**versions) for fn in artifacts]
+ tasks[task_name] = Task(artifacts=artifacts, **task)
+
+ return cls(target=target, tasks=tasks, params=params,
+ template_searchpath=config.template_searchpath)
+
+ def is_finished(self):
+ for task in self.tasks.values():
+ status = task.status(force_query=True)
+ if status.combined_state == 'pending':
+ return False
+ return True
+
+ def wait_until_finished(self, poll_max_minutes=120,
+ poll_interval_minutes=10):
+ started_at = time.time()
+ while True:
+ if self.is_finished():
+ break
+
+ waited_for_minutes = (time.time() - started_at) / 60
+ if waited_for_minutes > poll_max_minutes:
+ msg = ('Exceeded the maximum amount of time waiting for job '
+ 'to finish, waited for {} minutes.')
+ raise RuntimeError(msg.format(waited_for_minutes))
+
+ logger.info('Waiting {} minutes and then checking again'
+ .format(poll_interval_minutes))
+ time.sleep(poll_interval_minutes * 60)
+
+
+class Config(dict):
+
+ def __init__(self, tasks, template_searchpath):
+ super().__init__(tasks)
+ self.template_searchpath = template_searchpath
+
+ @classmethod
+ def load_yaml(cls, path):
+ path = Path(path)
+ searchpath = path.parent
+ rendered = _render_jinja_template(searchpath, template=path.name,
+ params={})
+ config = yaml.load(rendered)
+ return cls(config, template_searchpath=searchpath)
+
+ def show(self, stream=None):
+ return yaml.dump(dict(self), stream=stream)
+
+ def select(self, tasks=None, groups=None):
+ config_groups = dict(self['groups'])
+ config_tasks = dict(self['tasks'])
+ valid_groups = set(config_groups.keys())
+ valid_tasks = set(config_tasks.keys())
+ group_whitelist = list(groups or [])
+ task_whitelist = list(tasks or [])
+
+ # validate that the passed groups are defined in the config
+ requested_groups = set(group_whitelist)
+ invalid_groups = requested_groups - valid_groups
+ if invalid_groups:
+ msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format(
+ invalid_groups, valid_groups
+ )
+ raise CrossbowError(msg)
+
+ # merge the tasks defined in the selected groups
+ task_patterns = [list(config_groups[name]) for name in group_whitelist]
+ task_patterns = set(sum(task_patterns, task_whitelist))
+
+ # treat the task names as glob patterns to select tasks more easily
+ requested_tasks = set()
+ for pattern in task_patterns:
+ matches = fnmatch.filter(valid_tasks, pattern)
+ if len(matches):
+ requested_tasks.update(matches)
+ else:
+ raise CrossbowError(
+ "Unable to match any tasks for `{}`".format(pattern)
+ )
+
+ # validate that the passed and matched tasks are defined in the config
+ invalid_tasks = requested_tasks - valid_tasks
+ if invalid_tasks:
+ msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format(
+ invalid_tasks, valid_tasks
+ )
+ raise CrossbowError(msg)
+
+ return {
+ task_name: config_tasks[task_name] for task_name in requested_tasks
+ }
+
+ def validate(self):
+ # validate that the task groups are properly referening the tasks
+ for group_name, group in self['groups'].items():
+ for pattern in group:
+ tasks = self.select(tasks=[pattern])
+ if not tasks:
+ raise CrossbowError(
+ "The pattern `{}` defined for task group `{}` is not "
+ "matching any of the tasks defined in the "
+ "configuration file.".format(pattern, group_name)
+ )
+
+ # validate that the tasks are constructible
+ for task_name, task in self['tasks'].items():
+ try:
+ Task(**task)
+ except Exception as e:
+ raise CrossbowError(
+ 'Unable to construct a task object from the '
+ 'definition of task `{}`. The original error message '
+ 'is: `{}`'.format(task_name, str(e))
+ )
+
+ # validate that the defined tasks are renderable, in order to to that
+ # define the required object with dummy data
+ target = Target(
+ head='e279a7e06e61c14868ca7d71dea795420aea6539',
+ branch='master',
+ remote='https://github.com/apache/arrow',
+ version='1.0.0dev123',
+ email='dummy@example.ltd'
+ )
+
+ for task_name, task in self['tasks'].items():
+ task = Task(**task)
+ files = task.render_files(
+ self.template_searchpath,
+ params=dict(
+ arrow=target,
+ queue_remote_url='https://github.com/org/crossbow'
+ )
+ )
+ if not files:
+ raise CrossbowError('No files have been rendered for task `{}`'
+ .format(task_name))
+
+
+# configure yaml serializer
+yaml = YAML()
+yaml.register_class(Job)
+yaml.register_class(Task)
+yaml.register_class(Target)
diff --git a/src/arrow/dev/archery/archery/crossbow/reports.py b/src/arrow/dev/archery/archery/crossbow/reports.py
new file mode 100644
index 000000000..f86a67a74
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/reports.py
@@ -0,0 +1,315 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import click
+import collections
+import operator
+import fnmatch
+import functools
+from io import StringIO
+import textwrap
+
+
+# TODO(kszucs): use archery.report.JinjaReport instead
+class Report:
+
+ def __init__(self, job, task_filters=None):
+ self.job = job
+
+ tasks = sorted(job.tasks.items())
+ if task_filters:
+ filtered = set()
+ for pattern in task_filters:
+ filtered |= set(fnmatch.filter(job.tasks.keys(), pattern))
+
+ tasks = [(name, task) for name, task in tasks if name in filtered]
+
+ self._tasks = dict(tasks)
+
+ @property
+ def tasks(self):
+ return self._tasks
+
+ def show(self):
+ raise NotImplementedError()
+
+
+class ConsoleReport(Report):
+ """Report the status of a Job to the console using click"""
+
+ # output table's header template
+ HEADER = '[{state:>7}] {branch:<52} {content:>16}'
+ DETAILS = ' â”” {url}'
+
+ # output table's row template for assets
+ ARTIFACT_NAME = '{artifact:>69} '
+ ARTIFACT_STATE = '[{state:>7}]'
+
+ # state color mapping to highlight console output
+ COLORS = {
+ # from CombinedStatus
+ 'error': 'red',
+ 'failure': 'red',
+ 'pending': 'yellow',
+ 'success': 'green',
+ # custom state messages
+ 'ok': 'green',
+ 'missing': 'red'
+ }
+
+ def lead(self, state, branch, n_uploaded, n_expected):
+ line = self.HEADER.format(
+ state=state.upper(),
+ branch=branch,
+ content='uploaded {} / {}'.format(n_uploaded, n_expected)
+ )
+ return click.style(line, fg=self.COLORS[state.lower()])
+
+ def header(self):
+ header = self.HEADER.format(
+ state='state',
+ branch='Task / Branch',
+ content='Artifacts'
+ )
+ delimiter = '-' * len(header)
+ return '{}\n{}'.format(header, delimiter)
+
+ def artifact(self, state, pattern, asset):
+ if asset is None:
+ artifact = pattern
+ state = 'pending' if state == 'pending' else 'missing'
+ else:
+ artifact = asset.name
+ state = 'ok'
+
+ name_ = self.ARTIFACT_NAME.format(artifact=artifact)
+ state_ = click.style(
+ self.ARTIFACT_STATE.format(state=state.upper()),
+ self.COLORS[state]
+ )
+ return name_ + state_
+
+ def show(self, outstream, asset_callback=None, validate_patterns=True):
+ echo = functools.partial(click.echo, file=outstream)
+
+ # write table's header
+ echo(self.header())
+
+ # write table's body
+ for task_name, task in self.tasks.items():
+ # write summary of the uploaded vs total assets
+ status = task.status()
+ assets = task.assets(validate_patterns=validate_patterns)
+
+ # mapping of artifact pattern to asset or None of not uploaded
+ n_expected = len(task.artifacts)
+ n_uploaded = len(assets.uploaded_assets())
+ echo(self.lead(status.combined_state, task_name, n_uploaded,
+ n_expected))
+
+ # show link to the actual build, some of the CI providers implement
+ # the statuses API others implement the checks API, so display both
+ for link in status.build_links:
+ echo(self.DETAILS.format(url=link))
+
+ # write per asset status
+ for artifact_pattern, asset in assets.items():
+ if asset_callback is not None:
+ asset_callback(task_name, task, asset)
+ echo(self.artifact(status.combined_state, artifact_pattern,
+ asset))
+
+
+class EmailReport(Report):
+
+ HEADER = textwrap.dedent("""
+ Arrow Build Report for Job {job_name}
+
+ All tasks: {all_tasks_url}
+ """)
+
+ TASK = textwrap.dedent("""
+ - {name}:
+ URL: {url}
+ """).strip()
+
+ EMAIL = textwrap.dedent("""
+ From: {sender_name} <{sender_email}>
+ To: {recipient_email}
+ Subject: {subject}
+
+ {body}
+ """).strip()
+
+ STATUS_HEADERS = {
+ # from CombinedStatus
+ 'error': 'Errored Tasks:',
+ 'failure': 'Failed Tasks:',
+ 'pending': 'Pending Tasks:',
+ 'success': 'Succeeded Tasks:',
+ }
+
+ def __init__(self, job, sender_name, sender_email, recipient_email):
+ self.sender_name = sender_name
+ self.sender_email = sender_email
+ self.recipient_email = recipient_email
+ super().__init__(job)
+
+ def url(self, query):
+ repo_url = self.job.queue.remote_url.strip('.git')
+ return '{}/branches/all?query={}'.format(repo_url, query)
+
+ def listing(self, tasks):
+ return '\n'.join(
+ sorted(
+ self.TASK.format(name=task_name, url=self.url(task.branch))
+ for task_name, task in tasks.items()
+ )
+ )
+
+ def header(self):
+ url = self.url(self.job.branch)
+ return self.HEADER.format(job_name=self.job.branch, all_tasks_url=url)
+
+ def subject(self):
+ return (
+ "[NIGHTLY] Arrow Build Report for Job {}".format(self.job.branch)
+ )
+
+ def body(self):
+ buffer = StringIO()
+ buffer.write(self.header())
+
+ tasks_by_state = collections.defaultdict(dict)
+ for task_name, task in self.job.tasks.items():
+ state = task.status().combined_state
+ tasks_by_state[state][task_name] = task
+
+ for state in ('failure', 'error', 'pending', 'success'):
+ if state in tasks_by_state:
+ tasks = tasks_by_state[state]
+ buffer.write('\n')
+ buffer.write(self.STATUS_HEADERS[state])
+ buffer.write('\n')
+ buffer.write(self.listing(tasks))
+ buffer.write('\n')
+
+ return buffer.getvalue()
+
+ def email(self):
+ return self.EMAIL.format(
+ sender_name=self.sender_name,
+ sender_email=self.sender_email,
+ recipient_email=self.recipient_email,
+ subject=self.subject(),
+ body=self.body()
+ )
+
+ def show(self, outstream):
+ outstream.write(self.email())
+
+ def send(self, smtp_user, smtp_password, smtp_server, smtp_port):
+ import smtplib
+
+ email = self.email()
+
+ server = smtplib.SMTP_SSL(smtp_server, smtp_port)
+ server.ehlo()
+ server.login(smtp_user, smtp_password)
+ server.sendmail(smtp_user, self.recipient_email, email)
+ server.close()
+
+
+class CommentReport(Report):
+
+ _markdown_badge = '[![{title}]({badge})]({url})'
+
+ badges = {
+ 'github': _markdown_badge.format(
+ title='Github Actions',
+ url='https://github.com/{repo}/actions?query=branch:{branch}',
+ badge=(
+ 'https://github.com/{repo}/workflows/Crossbow/'
+ 'badge.svg?branch={branch}'
+ ),
+ ),
+ 'azure': _markdown_badge.format(
+ title='Azure',
+ url=(
+ 'https://dev.azure.com/{repo}/_build/latest'
+ '?definitionId=1&branchName={branch}'
+ ),
+ badge=(
+ 'https://dev.azure.com/{repo}/_apis/build/status/'
+ '{repo_dotted}?branchName={branch}'
+ )
+ ),
+ 'travis': _markdown_badge.format(
+ title='TravisCI',
+ url='https://travis-ci.com/{repo}/branches',
+ badge='https://img.shields.io/travis/{repo}/{branch}.svg'
+ ),
+ 'circle': _markdown_badge.format(
+ title='CircleCI',
+ url='https://circleci.com/gh/{repo}/tree/{branch}',
+ badge=(
+ 'https://img.shields.io/circleci/build/github'
+ '/{repo}/{branch}.svg'
+ )
+ ),
+ 'appveyor': _markdown_badge.format(
+ title='Appveyor',
+ url='https://ci.appveyor.com/project/{repo}/history',
+ badge='https://img.shields.io/appveyor/ci/{repo}/{branch}.svg'
+ ),
+ 'drone': _markdown_badge.format(
+ title='Drone',
+ url='https://cloud.drone.io/{repo}',
+ badge='https://img.shields.io/drone/build/{repo}/{branch}.svg'
+ ),
+ }
+
+ def __init__(self, job, crossbow_repo):
+ self.crossbow_repo = crossbow_repo
+ super().__init__(job)
+
+ def show(self):
+ url = 'https://github.com/{repo}/branches/all?query={branch}'
+ sha = self.job.target.head
+
+ msg = 'Revision: {}\n\n'.format(sha)
+ msg += 'Submitted crossbow builds: [{repo} @ {branch}]'
+ msg += '({})\n'.format(url)
+ msg += '\n|Task|Status|\n|----|------|'
+
+ tasks = sorted(self.job.tasks.items(), key=operator.itemgetter(0))
+ for key, task in tasks:
+ branch = task.branch
+
+ try:
+ template = self.badges[task.ci]
+ badge = template.format(
+ repo=self.crossbow_repo,
+ repo_dotted=self.crossbow_repo.replace('/', '.'),
+ branch=branch
+ )
+ except KeyError:
+ badge = 'unsupported CI service `{}`'.format(task.ci)
+
+ msg += '\n|{}|{}|'.format(key, badge)
+
+ return msg.format(repo=self.crossbow_repo, branch=self.job.branch)
diff --git a/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml
new file mode 100644
index 000000000..c37c7b553
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml
@@ -0,0 +1,51 @@
+!Job
+target: !Target
+ head: f766a1d615dd1b7ee706d05102e579195951a61c
+ email: unkown
+ branch: refs/pull/4435/merge
+ remote: https://github.com/apache/arrow
+ version: 0.13.0.dev306
+ no_rc_version: 0.13.0.dev306
+tasks:
+ docker-cpp-cmake32: !Task
+ ci: circle
+ platform: linux
+ template: docker-tests/circle.linux.yml
+ artifacts: []
+ params:
+ commands:
+ - docker-compose build cpp-cmake32
+ - docker-compose run cpp-cmake32
+ branch: ursabot-1-circle-docker-cpp-cmake32
+ commit: a56b077c8d1b891a7935048e5672bf6fc07599ec
+ wheel-osx-cp37m: !Task
+ ci: travis
+ platform: osx
+ template: python-wheels/travis.osx.yml
+ artifacts:
+ - pyarrow-0.13.0.dev306-cp37-cp37m-macosx_10_6_intel.whl
+ params:
+ python_version: 3.7
+ branch: ursabot-1-travis-wheel-osx-cp37m
+ commit: a56b077c8d1b891a7935048e5672bf6fc07599ec
+ wheel-osx-cp36m: !Task
+ ci: travis
+ platform: osx
+ template: python-wheels/travis.osx.yml
+ artifacts:
+ - pyarrow-0.13.0.dev306-cp36-cp36m-macosx_10_6_intel.whl
+ params:
+ python_version: 3.6
+ branch: ursabot-1-travis-wheel-osx-cp36m
+ commit: a56b077c8d1b891a7935048e5672bf6fc07599ec
+ wheel-win-cp36m: !Task
+ ci: appveyor
+ platform: win
+ template: python-wheels/appveyor.yml
+ artifacts:
+ - pyarrow-0.13.0.dev306-cp36-cp36m-win_amd64.whl
+ params:
+ python_version: 3.6
+ branch: ursabot-1-appveyor-wheel-win-cp36m
+ commit: a56b077c8d1b891a7935048e5672bf6fc07599ec
+branch: ursabot-1
diff --git a/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md
new file mode 100644
index 000000000..15825218c
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md
@@ -0,0 +1,10 @@
+Revision: {revision}
+
+Submitted crossbow builds: [{repo} @ {branch}](https://github.com/{repo}/branches/all?query={branch})
+
+|Task|Status|
+|----|------|
+|docker-cpp-cmake32|[![CircleCI](https://img.shields.io/circleci/build/github/{repo}/{branch}-circle-docker-cpp-cmake32.svg)](https://circleci.com/gh/{repo}/tree/{branch}-circle-docker-cpp-cmake32)|
+|wheel-osx-cp36m|[![TravisCI](https://img.shields.io/travis/{repo}/{branch}-travis-wheel-osx-cp36m.svg)](https://travis-ci.com/{repo}/branches)|
+|wheel-osx-cp37m|[![TravisCI](https://img.shields.io/travis/{repo}/{branch}-travis-wheel-osx-cp37m.svg)](https://travis-ci.com/{repo}/branches)|
+|wheel-win-cp36m|[![Appveyor](https://img.shields.io/appveyor/ci/{repo}/{branch}-appveyor-wheel-win-cp36m.svg)](https://ci.appveyor.com/project/{repo}/history)|
diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_core.py b/src/arrow/dev/archery/archery/crossbow/tests/test_core.py
new file mode 100644
index 000000000..518474236
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/tests/test_core.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from archery.utils.source import ArrowSources
+from archery.crossbow import Config
+
+
+def test_config():
+ src = ArrowSources.find()
+ conf = Config.load_yaml(src.dev / "tasks" / "tasks.yml")
+ conf.validate()
diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py b/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py
new file mode 100644
index 000000000..ee9ba1ee2
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from click.testing import CliRunner
+import pytest
+
+from archery.crossbow.cli import crossbow
+from archery.utils.git import git
+
+
+@pytest.mark.integration
+def test_crossbow_submit(tmp_path):
+ runner = CliRunner()
+
+ def invoke(*args):
+ return runner.invoke(crossbow, ['--queue-path', str(tmp_path), *args])
+
+ # initialize an empty crossbow repository
+ git.run_cmd("init", str(tmp_path))
+ git.run_cmd("-C", str(tmp_path), "remote", "add", "origin",
+ "https://github.com/dummy/repo")
+ git.run_cmd("-C", str(tmp_path), "commit", "-m", "initial",
+ "--allow-empty")
+
+ result = invoke('check-config')
+ assert result.exit_code == 0
+
+ result = invoke('submit', '--no-fetch', '--no-push', '-g', 'wheel')
+ assert result.exit_code == 0
diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py b/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py
new file mode 100644
index 000000000..0df292bb5
--- /dev/null
+++ b/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import textwrap
+
+from archery.crossbow.core import yaml
+from archery.crossbow.reports import CommentReport
+
+
+def test_crossbow_comment_formatter(load_fixture):
+ msg = load_fixture('crossbow-success-message.md')
+ job = load_fixture('crossbow-job.yaml', decoder=yaml.load)
+
+ report = CommentReport(job, crossbow_repo='ursa-labs/crossbow')
+ expected = msg.format(
+ repo='ursa-labs/crossbow',
+ branch='ursabot-1',
+ revision='f766a1d615dd1b7ee706d05102e579195951a61c',
+ status='has been succeeded.'
+ )
+ assert report.show() == textwrap.dedent(expected).strip()
diff --git a/src/arrow/dev/archery/archery/docker.py b/src/arrow/dev/archery/archery/docker.py
new file mode 100644
index 000000000..17d4c713a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker.py
@@ -0,0 +1,402 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import subprocess
+from io import StringIO
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from .utils.command import Command, default_bin
+from .compat import _ensure_path
+
+
+def flatten(node, parents=None):
+ parents = list(parents or [])
+ if isinstance(node, str):
+ yield (node, parents)
+ elif isinstance(node, list):
+ for value in node:
+ yield from flatten(value, parents=parents)
+ elif isinstance(node, dict):
+ for key, value in node.items():
+ yield (key, parents)
+ yield from flatten(value, parents=parents + [key])
+ else:
+ raise TypeError(node)
+
+
+def _sanitize_command(cmd):
+ if isinstance(cmd, list):
+ cmd = " ".join(cmd)
+ return re.sub(r"\s+", " ", cmd)
+
+
+class UndefinedImage(Exception):
+ pass
+
+
+class ComposeConfig:
+
+ def __init__(self, config_path, dotenv_path, compose_bin, params=None):
+ config_path = _ensure_path(config_path)
+ if dotenv_path:
+ dotenv_path = _ensure_path(dotenv_path)
+ else:
+ dotenv_path = config_path.parent / '.env'
+ self._read_env(dotenv_path, params)
+ self._read_config(config_path, compose_bin)
+
+ def _read_env(self, dotenv_path, params):
+ """
+ Read .env and merge it with explicitly passed parameters.
+ """
+ self.dotenv = dotenv_values(str(dotenv_path))
+ if params is None:
+ self.params = {}
+ else:
+ self.params = {k: v for k, v in params.items() if k in self.dotenv}
+
+ # forward the process' environment variables
+ self.env = os.environ.copy()
+ # set the defaults from the dotenv files
+ self.env.update(self.dotenv)
+ # override the defaults passed as parameters
+ self.env.update(self.params)
+
+ # translate docker's architecture notation to a more widely used one
+ arch = self.env.get('ARCH', 'amd64')
+ arch_aliases = {
+ 'amd64': 'x86_64',
+ 'arm64v8': 'aarch64',
+ 's390x': 's390x'
+ }
+ arch_short_aliases = {
+ 'amd64': 'x64',
+ 'arm64v8': 'arm64',
+ 's390x': 's390x'
+ }
+ self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch)
+ self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch)
+
+ def _read_config(self, config_path, compose_bin):
+ """
+ Validate and read the docker-compose.yml
+ """
+ yaml = YAML()
+ with config_path.open() as fp:
+ config = yaml.load(fp)
+
+ services = config['services'].keys()
+ self.hierarchy = dict(flatten(config.get('x-hierarchy', {})))
+ self.with_gpus = config.get('x-with-gpus', [])
+ nodes = self.hierarchy.keys()
+ errors = []
+
+ for name in self.with_gpus:
+ if name not in services:
+ errors.append(
+ 'Service `{}` defined in `x-with-gpus` bot not in '
+ '`services`'.format(name)
+ )
+ for name in nodes - services:
+ errors.append(
+ 'Service `{}` is defined in `x-hierarchy` bot not in '
+ '`services`'.format(name)
+ )
+ for name in services - nodes:
+ errors.append(
+ 'Service `{}` is defined in `services` but not in '
+ '`x-hierarchy`'.format(name)
+ )
+
+ # trigger docker-compose's own validation
+ compose = Command('docker-compose')
+ args = ['--file', str(config_path), 'config']
+ result = compose.run(*args, env=self.env, check=False,
+ stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+ if result.returncode != 0:
+ # strip the intro line of docker-compose errors
+ errors += result.stderr.decode().splitlines()
+
+ if errors:
+ msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+ raise ValueError(
+ 'Found errors with docker-compose:\n{}'.format(msg)
+ )
+
+ rendered_config = StringIO(result.stdout.decode())
+ self.path = config_path
+ self.config = yaml.load(rendered_config)
+
+ def get(self, service_name):
+ try:
+ service = self.config['services'][service_name]
+ except KeyError:
+ raise UndefinedImage(service_name)
+ service['name'] = service_name
+ service['need_gpu'] = service_name in self.with_gpus
+ service['ancestors'] = self.hierarchy[service_name]
+ return service
+
+ def __getitem__(self, service_name):
+ return self.get(service_name)
+
+
+class Docker(Command):
+
+ def __init__(self, docker_bin=None):
+ self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+ def __init__(self, config_path, dotenv_path=None, compose_bin=None,
+ params=None):
+ compose_bin = default_bin(compose_bin, 'docker-compose')
+ self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
+ params)
+ self.bin = compose_bin
+ self.pull_memory = set()
+
+ def clear_pull_memory(self):
+ self.pull_memory = set()
+
+ def _execute_compose(self, *args, **kwargs):
+ # execute as a docker compose command
+ try:
+ result = super().run('--file', str(self.config.path), *args,
+ env=self.config.env, **kwargs)
+ result.check_returncode()
+ except subprocess.CalledProcessError as e:
+ def formatdict(d, template):
+ return '\n'.join(
+ template.format(k, v) for k, v in sorted(d.items())
+ )
+ msg = (
+ "`{cmd}` exited with a non-zero exit code {code}, see the "
+ "process log above.\n\nThe docker-compose command was "
+ "invoked with the following parameters:\n\nDefaults defined "
+ "in .env:\n{dotenv}\n\nArchery was called with:\n{params}"
+ )
+ raise RuntimeError(
+ msg.format(
+ cmd=' '.join(e.cmd),
+ code=e.returncode,
+ dotenv=formatdict(self.config.dotenv, template=' {}: {}'),
+ params=formatdict(
+ self.config.params, template=' export {}={}'
+ )
+ )
+ )
+
+ def _execute_docker(self, *args, **kwargs):
+ # execute as a plain docker cli command
+ try:
+ result = Docker().run(*args, **kwargs)
+ result.check_returncode()
+ except subprocess.CalledProcessError as e:
+ raise RuntimeError(
+ "{} exited with non-zero exit code {}".format(
+ ' '.join(e.cmd), e.returncode
+ )
+ )
+
+ def pull(self, service_name, pull_leaf=True, using_docker=False):
+ def _pull(service):
+ args = ['pull']
+ if service['image'] in self.pull_memory:
+ return
+
+ if using_docker:
+ try:
+ self._execute_docker(*args, service['image'])
+ except Exception as e:
+ # better --ignore-pull-failures handling
+ print(e)
+ else:
+ args.append('--ignore-pull-failures')
+ self._execute_compose(*args, service['name'])
+
+ self.pull_memory.add(service['image'])
+
+ service = self.config.get(service_name)
+ for ancestor in service['ancestors']:
+ _pull(self.config.get(ancestor))
+ if pull_leaf:
+ _pull(service)
+
+ def build(self, service_name, use_cache=True, use_leaf_cache=True,
+ using_docker=False, using_buildx=False):
+ def _build(service, use_cache):
+ if 'build' not in service:
+ # nothing to do
+ return
+
+ args = []
+ cache_from = list(service.get('build', {}).get('cache_from', []))
+ if use_cache:
+ for image in cache_from:
+ if image not in self.pull_memory:
+ try:
+ self._execute_docker('pull', image)
+ except Exception as e:
+ print(e)
+ finally:
+ self.pull_memory.add(image)
+ else:
+ args.append('--no-cache')
+
+ # turn on inline build cache, this is a docker buildx feature
+ # used to bundle the image build cache to the pushed image manifest
+ # so the build cache can be reused across hosts, documented at
+ # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
+ if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1':
+ args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1'])
+
+ if using_buildx:
+ for k, v in service['build'].get('args', {}).items():
+ args.extend(['--build-arg', '{}={}'.format(k, v)])
+
+ if use_cache:
+ cache_ref = '{}-cache'.format(service['image'])
+ cache_from = 'type=registry,ref={}'.format(cache_ref)
+ cache_to = (
+ 'type=registry,ref={},mode=max'.format(cache_ref)
+ )
+ args.extend([
+ '--cache-from', cache_from,
+ '--cache-to', cache_to,
+ ])
+
+ args.extend([
+ '--output', 'type=docker',
+ '-f', service['build']['dockerfile'],
+ '-t', service['image'],
+ service['build'].get('context', '.')
+ ])
+ self._execute_docker("buildx", "build", *args)
+ elif using_docker:
+ # better for caching
+ for k, v in service['build'].get('args', {}).items():
+ args.extend(['--build-arg', '{}={}'.format(k, v)])
+ for img in cache_from:
+ args.append('--cache-from="{}"'.format(img))
+ args.extend([
+ '-f', service['build']['dockerfile'],
+ '-t', service['image'],
+ service['build'].get('context', '.')
+ ])
+ self._execute_docker("build", *args)
+ else:
+ self._execute_compose("build", *args, service['name'])
+
+ service = self.config.get(service_name)
+ # build ancestor services
+ for ancestor in service['ancestors']:
+ _build(self.config.get(ancestor), use_cache=use_cache)
+ # build the leaf/target service
+ _build(service, use_cache=use_cache and use_leaf_cache)
+
+ def run(self, service_name, command=None, *, env=None, volumes=None,
+ user=None, using_docker=False):
+ service = self.config.get(service_name)
+
+ args = []
+ if user is not None:
+ args.extend(['-u', user])
+
+ if env is not None:
+ for k, v in env.items():
+ args.extend(['-e', '{}={}'.format(k, v)])
+
+ if volumes is not None:
+ for volume in volumes:
+ args.extend(['--volume', volume])
+
+ if using_docker or service['need_gpu']:
+ # use gpus, requires docker>=19.03
+ if service['need_gpu']:
+ args.extend(['--gpus', 'all'])
+
+ if service.get('shm_size'):
+ args.extend(['--shm-size', service['shm_size']])
+
+ # append env variables from the compose conf
+ for k, v in service.get('environment', {}).items():
+ args.extend(['-e', '{}={}'.format(k, v)])
+
+ # append volumes from the compose conf
+ for v in service.get('volumes', []):
+ if not isinstance(v, str):
+ # if not the compact string volume definition
+ v = "{}:{}".format(v['source'], v['target'])
+ args.extend(['-v', v])
+
+ # infer whether an interactive shell is desired or not
+ if command in ['cmd.exe', 'bash', 'sh', 'powershell']:
+ args.append('-it')
+
+ # get the actual docker image name instead of the compose service
+ # name which we refer as image in general
+ args.append(service['image'])
+
+ # add command from compose if it wasn't overridden
+ if command is not None:
+ args.append(command)
+ else:
+ # replace whitespaces from the preformatted compose command
+ cmd = _sanitize_command(service.get('command', ''))
+ if cmd:
+ args.append(cmd)
+
+ # execute as a plain docker cli command
+ self._execute_docker('run', '--rm', *args)
+ else:
+ # execute as a docker-compose command
+ args.append(service_name)
+ if command is not None:
+ args.append(command)
+ self._execute_compose('run', '--rm', *args)
+
+ def push(self, service_name, user=None, password=None, using_docker=False):
+ def _push(service):
+ if using_docker:
+ return self._execute_docker('push', service['image'])
+ else:
+ return self._execute_compose('push', service['name'])
+
+ if user is not None:
+ try:
+ # TODO(kszucs): have an option for a prompt
+ self._execute_docker('login', '-u', user, '-p', password)
+ except subprocess.CalledProcessError:
+ # hide credentials
+ msg = ('Failed to push `{}`, check the passed credentials'
+ .format(service_name))
+ raise RuntimeError(msg) from None
+
+ service = self.config.get(service_name)
+ for ancestor in service['ancestors']:
+ _push(self.config.get(ancestor))
+ _push(service)
+
+ def images(self):
+ return sorted(self.config.hierarchy.keys())
diff --git a/src/arrow/dev/archery/archery/docker/__init__.py b/src/arrow/dev/archery/archery/docker/__init__.py
new file mode 100644
index 000000000..6be29c916
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import DockerCompose, UndefinedImage # noqa
diff --git a/src/arrow/dev/archery/archery/docker/cli.py b/src/arrow/dev/archery/archery/docker/cli.py
new file mode 100644
index 000000000..c6b4a6473
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker/cli.py
@@ -0,0 +1,261 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+import click
+
+from ..utils.cli import validate_arrow_sources
+from .core import DockerCompose, UndefinedImage
+
+
+def _mock_compose_calls(compose):
+ from types import MethodType
+ from subprocess import CompletedProcess
+
+ def _mock(compose, executable):
+ def _execute(self, *args, **kwargs):
+ params = ['{}={}'.format(k, v)
+ for k, v in self.config.params.items()]
+ command = ' '.join(params + [executable] + list(args))
+ click.echo(command)
+ return CompletedProcess([], 0)
+ return MethodType(_execute, compose)
+
+ compose._execute_docker = _mock(compose, executable='docker')
+ compose._execute_compose = _mock(compose, executable='docker-compose')
+
+
+@click.group()
+@click.option("--src", metavar="<arrow_src>", default=None,
+ callback=validate_arrow_sources,
+ help="Specify Arrow source directory.")
+@click.option('--dry-run/--execute', default=False,
+ help="Display the docker-compose commands instead of executing "
+ "them.")
+@click.pass_context
+def docker(ctx, src, dry_run):
+ """
+ Interact with docker-compose based builds.
+ """
+ ctx.ensure_object(dict)
+
+ config_path = src.path / 'docker-compose.yml'
+ if not config_path.exists():
+ raise click.ClickException(
+ "Docker compose configuration cannot be found in directory {}, "
+ "try to pass the arrow source directory explicitly.".format(src)
+ )
+
+ # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
+ # environment variables to keep the usage similar to docker-compose
+ compose = DockerCompose(config_path, params=os.environ)
+ if dry_run:
+ _mock_compose_calls(compose)
+ ctx.obj['compose'] = compose
+
+
+@docker.command("check-config")
+@click.pass_obj
+def check_config(obj):
+ """
+ Validate docker-compose configuration.
+ """
+ # executes the body of the docker function above which does the validation
+ # during the configuration loading
+
+
+@docker.command('build')
+@click.argument('image')
+@click.option('--force-pull/--no-pull', default=True,
+ help="Whether to force pull the image and its ancestor images")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+ envvar='ARCHERY_USE_DOCKER_CLI',
+ help="Use docker CLI directly for building instead of calling "
+ "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+ envvar='ARCHERY_USE_DOCKER_BUILDX',
+ help="Use buildx with docker CLI directly for building instead "
+ "of calling docker-compose or the plain docker build "
+ "command. This option makes the build cache reusable "
+ "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+ help="Whether to use cache when building the image and its "
+ "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+ help="Whether to use cache when building only the (leaf) image "
+ "passed as the argument. To disable caching for both the "
+ "image and its ancestors use --no-cache option.")
+@click.pass_obj
+def docker_build(obj, image, *, force_pull, using_docker_cli,
+ using_docker_buildx, use_cache, use_leaf_cache):
+ """
+ Execute docker-compose builds.
+ """
+ compose = obj['compose']
+
+ using_docker_cli |= using_docker_buildx
+ try:
+ if force_pull:
+ compose.pull(image, pull_leaf=use_leaf_cache,
+ using_docker=using_docker_cli)
+ compose.build(image, use_cache=use_cache,
+ use_leaf_cache=use_leaf_cache,
+ using_docker=using_docker_cli,
+ using_buildx=using_docker_buildx,
+ pull_parents=force_pull)
+ except UndefinedImage as e:
+ raise click.ClickException(
+ "There is no service/image defined in docker-compose.yml with "
+ "name: {}".format(str(e))
+ )
+ except RuntimeError as e:
+ raise click.ClickException(str(e))
+
+
+@docker.command('run')
+@click.argument('image')
+@click.argument('command', required=False, default=None)
+@click.option('--env', '-e', multiple=True,
+ help="Set environment variable within the container")
+@click.option('--user', '-u', default=None,
+ help="Username or UID to run the container with")
+@click.option('--force-pull/--no-pull', default=True,
+ help="Whether to force pull the image and its ancestor images")
+@click.option('--force-build/--no-build', default=True,
+ help="Whether to force build the image and its ancestor images")
+@click.option('--build-only', default=False, is_flag=True,
+ help="Pull and/or build the image, but do not run it")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+ envvar='ARCHERY_USE_DOCKER_CLI',
+ help="Use docker CLI directly for building instead of calling "
+ "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+ envvar='ARCHERY_USE_DOCKER_BUILDX',
+ help="Use buildx with docker CLI directly for building instead "
+ "of calling docker-compose or the plain docker build "
+ "command. This option makes the build cache reusable "
+ "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+ help="Whether to use cache when building the image and its "
+ "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+ help="Whether to use cache when building only the (leaf) image "
+ "passed as the argument. To disable caching for both the "
+ "image and its ancestors use --no-cache option.")
+@click.option('--resource-limit', default=None,
+ help="A CPU/memory limit preset to mimic CI environments like "
+ "GitHub Actions. Implies --using-docker-cli. Note that "
+ "exporting ARCHERY_DOCKER_BIN=\"sudo docker\" is likely "
+ "required, unless Docker is configured with cgroups v2 "
+ "(else Docker will silently ignore the limits).")
+@click.option('--volume', '-v', multiple=True,
+ help="Set volume within the container")
+@click.pass_obj
+def docker_run(obj, image, command, *, env, user, force_pull, force_build,
+ build_only, using_docker_cli, using_docker_buildx, use_cache,
+ use_leaf_cache, resource_limit, volume):
+ """
+ Execute docker-compose builds.
+
+ To see the available builds run `archery docker images`.
+
+ Examples:
+
+ # execute a single build
+ archery docker run conda-python
+
+ # execute the builds but disable the image pulling
+ archery docker run --no-cache conda-python
+
+ # pass a docker-compose parameter, like the python version
+ PYTHON=3.8 archery docker run conda-python
+
+ # disable the cache only for the leaf image
+ PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
+
+ # entirely skip building the image
+ archery docker run --no-pull --no-build conda-python
+
+ # pass runtime parameters via docker environment variables
+ archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
+
+ # set a volume
+ archery docker run -v $PWD/build:/build ubuntu-cpp
+
+ # starting an interactive bash session for debugging
+ archery docker run ubuntu-cpp bash
+ """
+ compose = obj['compose']
+ using_docker_cli |= using_docker_buildx
+
+ env = dict(kv.split('=', 1) for kv in env)
+ try:
+ if force_pull:
+ compose.pull(image, pull_leaf=use_leaf_cache,
+ using_docker=using_docker_cli)
+ if force_build:
+ compose.build(image, use_cache=use_cache,
+ use_leaf_cache=use_leaf_cache,
+ using_docker=using_docker_cli,
+ using_buildx=using_docker_buildx)
+ if build_only:
+ return
+ compose.run(
+ image,
+ command=command,
+ env=env,
+ user=user,
+ using_docker=using_docker_cli,
+ resource_limit=resource_limit,
+ volumes=volume
+ )
+ except UndefinedImage as e:
+ raise click.ClickException(
+ "There is no service/image defined in docker-compose.yml with "
+ "name: {}".format(str(e))
+ )
+ except RuntimeError as e:
+ raise click.ClickException(str(e))
+
+
+@docker.command('push')
+@click.argument('image')
+@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
+ help='Docker repository username')
+@click.option('--password', '-p', required=False,
+ envvar='ARCHERY_DOCKER_PASSWORD',
+ help='Docker repository password')
+@click.option('--using-docker-cli', default=False, is_flag=True,
+ help="Use docker CLI directly for building instead of calling "
+ "docker-compose. This may help to reuse cached layers.")
+@click.pass_obj
+def docker_compose_push(obj, image, user, password, using_docker_cli):
+ """Push the generated docker-compose image."""
+ compose = obj['compose']
+ compose.push(image, user=user, password=password,
+ using_docker=using_docker_cli)
+
+
+@docker.command('images')
+@click.pass_obj
+def docker_compose_images(obj):
+ """List the available docker-compose images."""
+ compose = obj['compose']
+ click.echo('Available images:')
+ for image in compose.images():
+ click.echo(f' - {image}')
diff --git a/src/arrow/dev/archery/archery/docker/core.py b/src/arrow/dev/archery/archery/docker/core.py
new file mode 100644
index 000000000..aaf16bdfa
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker/core.py
@@ -0,0 +1,417 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import subprocess
+from io import StringIO
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from ..utils.command import Command, default_bin
+from ..compat import _ensure_path
+
+
+def flatten(node, parents=None):
+ parents = list(parents or [])
+ if isinstance(node, str):
+ yield (node, parents)
+ elif isinstance(node, list):
+ for value in node:
+ yield from flatten(value, parents=parents)
+ elif isinstance(node, dict):
+ for key, value in node.items():
+ yield (key, parents)
+ yield from flatten(value, parents=parents + [key])
+ else:
+ raise TypeError(node)
+
+
+def _sanitize_command(cmd):
+ if isinstance(cmd, list):
+ cmd = " ".join(cmd)
+ return re.sub(r"\s+", " ", cmd)
+
+
+class UndefinedImage(Exception):
+ pass
+
+
+class ComposeConfig:
+
+ def __init__(self, config_path, dotenv_path, compose_bin, params=None):
+ config_path = _ensure_path(config_path)
+ if dotenv_path:
+ dotenv_path = _ensure_path(dotenv_path)
+ else:
+ dotenv_path = config_path.parent / '.env'
+ self._read_env(dotenv_path, params)
+ self._read_config(config_path, compose_bin)
+
+ def _read_env(self, dotenv_path, params):
+ """
+ Read .env and merge it with explicitly passed parameters.
+ """
+ self.dotenv = dotenv_values(str(dotenv_path))
+ if params is None:
+ self.params = {}
+ else:
+ self.params = {k: v for k, v in params.items() if k in self.dotenv}
+
+ # forward the process' environment variables
+ self.env = os.environ.copy()
+ # set the defaults from the dotenv files
+ self.env.update(self.dotenv)
+ # override the defaults passed as parameters
+ self.env.update(self.params)
+
+ # translate docker's architecture notation to a more widely used one
+ arch = self.env.get('ARCH', 'amd64')
+ arch_aliases = {
+ 'amd64': 'x86_64',
+ 'arm64v8': 'aarch64',
+ 's390x': 's390x'
+ }
+ arch_short_aliases = {
+ 'amd64': 'x64',
+ 'arm64v8': 'arm64',
+ 's390x': 's390x'
+ }
+ self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch)
+ self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch)
+
+ def _read_config(self, config_path, compose_bin):
+ """
+ Validate and read the docker-compose.yml
+ """
+ yaml = YAML()
+ with config_path.open() as fp:
+ config = yaml.load(fp)
+
+ services = config['services'].keys()
+ self.hierarchy = dict(flatten(config.get('x-hierarchy', {})))
+ self.limit_presets = config.get('x-limit-presets', {})
+ self.with_gpus = config.get('x-with-gpus', [])
+ nodes = self.hierarchy.keys()
+ errors = []
+
+ for name in self.with_gpus:
+ if name not in services:
+ errors.append(
+ 'Service `{}` defined in `x-with-gpus` bot not in '
+ '`services`'.format(name)
+ )
+ for name in nodes - services:
+ errors.append(
+ 'Service `{}` is defined in `x-hierarchy` bot not in '
+ '`services`'.format(name)
+ )
+ for name in services - nodes:
+ errors.append(
+ 'Service `{}` is defined in `services` but not in '
+ '`x-hierarchy`'.format(name)
+ )
+
+ # trigger docker-compose's own validation
+ compose = Command('docker-compose')
+ args = ['--file', str(config_path), 'config']
+ result = compose.run(*args, env=self.env, check=False,
+ stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+ if result.returncode != 0:
+ # strip the intro line of docker-compose errors
+ errors += result.stderr.decode().splitlines()
+
+ if errors:
+ msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+ raise ValueError(
+ 'Found errors with docker-compose:\n{}'.format(msg)
+ )
+
+ rendered_config = StringIO(result.stdout.decode())
+ self.path = config_path
+ self.config = yaml.load(rendered_config)
+
+ def get(self, service_name):
+ try:
+ service = self.config['services'][service_name]
+ except KeyError:
+ raise UndefinedImage(service_name)
+ service['name'] = service_name
+ service['need_gpu'] = service_name in self.with_gpus
+ service['ancestors'] = self.hierarchy[service_name]
+ return service
+
+ def __getitem__(self, service_name):
+ return self.get(service_name)
+
+
+class Docker(Command):
+
+ def __init__(self, docker_bin=None):
+ self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+ def __init__(self, config_path, dotenv_path=None, compose_bin=None,
+ params=None):
+ compose_bin = default_bin(compose_bin, 'docker-compose')
+ self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
+ params)
+ self.bin = compose_bin
+ self.pull_memory = set()
+
+ def clear_pull_memory(self):
+ self.pull_memory = set()
+
+ def _execute_compose(self, *args, **kwargs):
+ # execute as a docker compose command
+ try:
+ result = super().run('--file', str(self.config.path), *args,
+ env=self.config.env, **kwargs)
+ result.check_returncode()
+ except subprocess.CalledProcessError as e:
+ def formatdict(d, template):
+ return '\n'.join(
+ template.format(k, v) for k, v in sorted(d.items())
+ )
+ msg = (
+ "`{cmd}` exited with a non-zero exit code {code}, see the "
+ "process log above.\n\nThe docker-compose command was "
+ "invoked with the following parameters:\n\nDefaults defined "
+ "in .env:\n{dotenv}\n\nArchery was called with:\n{params}"
+ )
+ raise RuntimeError(
+ msg.format(
+ cmd=' '.join(e.cmd),
+ code=e.returncode,
+ dotenv=formatdict(self.config.dotenv, template=' {}: {}'),
+ params=formatdict(
+ self.config.params, template=' export {}={}'
+ )
+ )
+ )
+
+ def _execute_docker(self, *args, **kwargs):
+ # execute as a plain docker cli command
+ try:
+ result = Docker().run(*args, **kwargs)
+ result.check_returncode()
+ except subprocess.CalledProcessError as e:
+ raise RuntimeError(
+ "{} exited with non-zero exit code {}".format(
+ ' '.join(e.cmd), e.returncode
+ )
+ )
+
+ def pull(self, service_name, pull_leaf=True, using_docker=False):
+ def _pull(service):
+ args = ['pull']
+ if service['image'] in self.pull_memory:
+ return
+
+ if using_docker:
+ try:
+ self._execute_docker(*args, service['image'])
+ except Exception as e:
+ # better --ignore-pull-failures handling
+ print(e)
+ else:
+ args.append('--ignore-pull-failures')
+ self._execute_compose(*args, service['name'])
+
+ self.pull_memory.add(service['image'])
+
+ service = self.config.get(service_name)
+ for ancestor in service['ancestors']:
+ _pull(self.config.get(ancestor))
+ if pull_leaf:
+ _pull(service)
+
+ def build(self, service_name, use_cache=True, use_leaf_cache=True,
+ using_docker=False, using_buildx=False, pull_parents=True):
+ def _build(service, use_cache):
+ if 'build' not in service:
+ # nothing to do
+ return
+
+ args = []
+ cache_from = list(service.get('build', {}).get('cache_from', []))
+ if pull_parents:
+ for image in cache_from:
+ if image not in self.pull_memory:
+ try:
+ self._execute_docker('pull', image)
+ except Exception as e:
+ print(e)
+ finally:
+ self.pull_memory.add(image)
+
+ if not use_cache:
+ args.append('--no-cache')
+
+ # turn on inline build cache, this is a docker buildx feature
+ # used to bundle the image build cache to the pushed image manifest
+ # so the build cache can be reused across hosts, documented at
+ # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
+ if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1':
+ args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1'])
+
+ if using_buildx:
+ for k, v in service['build'].get('args', {}).items():
+ args.extend(['--build-arg', '{}={}'.format(k, v)])
+
+ if use_cache:
+ cache_ref = '{}-cache'.format(service['image'])
+ cache_from = 'type=registry,ref={}'.format(cache_ref)
+ cache_to = (
+ 'type=registry,ref={},mode=max'.format(cache_ref)
+ )
+ args.extend([
+ '--cache-from', cache_from,
+ '--cache-to', cache_to,
+ ])
+
+ args.extend([
+ '--output', 'type=docker',
+ '-f', service['build']['dockerfile'],
+ '-t', service['image'],
+ service['build'].get('context', '.')
+ ])
+ self._execute_docker("buildx", "build", *args)
+ elif using_docker:
+ # better for caching
+ for k, v in service['build'].get('args', {}).items():
+ args.extend(['--build-arg', '{}={}'.format(k, v)])
+ for img in cache_from:
+ args.append('--cache-from="{}"'.format(img))
+ args.extend([
+ '-f', service['build']['dockerfile'],
+ '-t', service['image'],
+ service['build'].get('context', '.')
+ ])
+ self._execute_docker("build", *args)
+ else:
+ self._execute_compose("build", *args, service['name'])
+
+ service = self.config.get(service_name)
+ # build ancestor services
+ for ancestor in service['ancestors']:
+ _build(self.config.get(ancestor), use_cache=use_cache)
+ # build the leaf/target service
+ _build(service, use_cache=use_cache and use_leaf_cache)
+
+ def run(self, service_name, command=None, *, env=None, volumes=None,
+ user=None, using_docker=False, resource_limit=None):
+ service = self.config.get(service_name)
+
+ args = []
+ if user is not None:
+ args.extend(['-u', user])
+
+ if env is not None:
+ for k, v in env.items():
+ args.extend(['-e', '{}={}'.format(k, v)])
+
+ if volumes is not None:
+ for volume in volumes:
+ args.extend(['--volume', volume])
+
+ if using_docker or service['need_gpu'] or resource_limit:
+ # use gpus, requires docker>=19.03
+ if service['need_gpu']:
+ args.extend(['--gpus', 'all'])
+
+ if service.get('shm_size'):
+ args.extend(['--shm-size', service['shm_size']])
+
+ # append env variables from the compose conf
+ for k, v in service.get('environment', {}).items():
+ args.extend(['-e', '{}={}'.format(k, v)])
+
+ # append volumes from the compose conf
+ for v in service.get('volumes', []):
+ if not isinstance(v, str):
+ # if not the compact string volume definition
+ v = "{}:{}".format(v['source'], v['target'])
+ args.extend(['-v', v])
+
+ # infer whether an interactive shell is desired or not
+ if command in ['cmd.exe', 'bash', 'sh', 'powershell']:
+ args.append('-it')
+
+ if resource_limit:
+ limits = self.config.limit_presets.get(resource_limit)
+ if not limits:
+ raise ValueError(
+ f"Unknown resource limit preset '{resource_limit}'")
+ cpuset = limits.get('cpuset_cpus', [])
+ if cpuset:
+ args.append(f'--cpuset-cpus={",".join(map(str, cpuset))}')
+ memory = limits.get('memory')
+ if memory:
+ args.append(f'--memory={memory}')
+ args.append(f'--memory-swap={memory}')
+
+ # get the actual docker image name instead of the compose service
+ # name which we refer as image in general
+ args.append(service['image'])
+
+ # add command from compose if it wasn't overridden
+ if command is not None:
+ args.append(command)
+ else:
+ # replace whitespaces from the preformatted compose command
+ cmd = _sanitize_command(service.get('command', ''))
+ if cmd:
+ args.append(cmd)
+
+ # execute as a plain docker cli command
+ self._execute_docker('run', '--rm', *args)
+ else:
+ # execute as a docker-compose command
+ args.append(service_name)
+ if command is not None:
+ args.append(command)
+ self._execute_compose('run', '--rm', *args)
+
+ def push(self, service_name, user=None, password=None, using_docker=False):
+ def _push(service):
+ if using_docker:
+ return self._execute_docker('push', service['image'])
+ else:
+ return self._execute_compose('push', service['name'])
+
+ if user is not None:
+ try:
+ # TODO(kszucs): have an option for a prompt
+ self._execute_docker('login', '-u', user, '-p', password)
+ except subprocess.CalledProcessError:
+ # hide credentials
+ msg = ('Failed to push `{}`, check the passed credentials'
+ .format(service_name))
+ raise RuntimeError(msg) from None
+
+ service = self.config.get(service_name)
+ for ancestor in service['ancestors']:
+ _push(self.config.get(ancestor))
+ _push(service)
+
+ def images(self):
+ return sorted(self.config.hierarchy.keys())
diff --git a/src/arrow/dev/archery/archery/docker/tests/test_docker.py b/src/arrow/dev/archery/archery/docker/tests/test_docker.py
new file mode 100644
index 000000000..982f3bfc1
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker/tests/test_docker.py
@@ -0,0 +1,531 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import collections
+import os
+import re
+import subprocess
+from unittest import mock
+
+import pytest
+
+from archery.docker import DockerCompose
+from archery.testing import assert_subprocess_calls, override_env, PartialEnv
+
+
+missing_service_compose_yml = """
+version: '3.5'
+
+x-hierarchy:
+ - foo:
+ - sub-foo:
+ - sub-sub-foo
+ - another-sub-sub-foo
+ - bar:
+ - sub-bar
+ - baz
+
+services:
+ foo:
+ image: org/foo
+ sub-sub-foo:
+ image: org/sub-sub-foo
+ another-sub-sub-foo:
+ image: org/another-sub-sub-foo
+ bar:
+ image: org/bar
+ sub-bar:
+ image: org/sub-bar
+ baz:
+ image: org/baz
+"""
+
+missing_node_compose_yml = """
+version: '3.5'
+
+x-hierarchy:
+ - foo:
+ - sub-foo:
+ - sub-sub-foo
+ - another-sub-sub-foo
+ - bar
+ - baz
+
+services:
+ foo:
+ image: org/foo
+ sub-foo:
+ image: org/sub-foo
+ sub-sub-foo:
+ image: org/sub-foo-foo
+ another-sub-sub-foo:
+ image: org/another-sub-sub-foo
+ bar:
+ image: org/bar
+ sub-bar:
+ image: org/sub-bar
+ baz:
+ image: org/baz
+"""
+
+ok_compose_yml = """
+version: '3.5'
+
+x-hierarchy:
+ - foo:
+ - sub-foo:
+ - sub-sub-foo
+ - another-sub-sub-foo
+ - bar:
+ - sub-bar
+ - baz
+
+services:
+ foo:
+ image: org/foo
+ sub-foo:
+ image: org/sub-foo
+ sub-sub-foo:
+ image: org/sub-sub-foo
+ another-sub-sub-foo:
+ image: org/another-sub-sub-foo
+ bar:
+ image: org/bar
+ sub-bar:
+ image: org/sub-bar
+ baz:
+ image: org/baz
+"""
+
+arrow_compose_yml = """
+version: '3.5'
+
+x-with-gpus:
+ - ubuntu-cuda
+
+x-hierarchy:
+ - conda-cpp:
+ - conda-python:
+ - conda-python-pandas
+ - conda-python-dask
+ - ubuntu-cpp:
+ - ubuntu-cpp-cmake32
+ - ubuntu-c-glib:
+ - ubuntu-ruby
+ - ubuntu-cuda
+
+x-limit-presets:
+ github:
+ cpuset_cpus: [0, 1]
+ memory: 7g
+
+services:
+ conda-cpp:
+ image: org/conda-cpp
+ build:
+ context: .
+ dockerfile: ci/docker/conda-cpp.dockerfile
+ conda-python:
+ image: org/conda-python
+ build:
+ context: .
+ dockerfile: ci/docker/conda-cpp.dockerfile
+ args:
+ python: 3.6
+ conda-python-pandas:
+ image: org/conda-python-pandas
+ build:
+ context: .
+ dockerfile: ci/docker/conda-python-pandas.dockerfile
+ conda-python-dask:
+ image: org/conda-python-dask
+ ubuntu-cpp:
+ image: org/ubuntu-cpp
+ build:
+ context: .
+ dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile
+ ubuntu-cpp-cmake32:
+ image: org/ubuntu-cpp-cmake32
+ ubuntu-c-glib:
+ image: org/ubuntu-c-glib
+ ubuntu-ruby:
+ image: org/ubuntu-ruby
+ ubuntu-cuda:
+ image: org/ubuntu-cuda
+ environment:
+ CUDA_ENV: 1
+ OTHER_ENV: 2
+ volumes:
+ - /host:/container
+ command: /bin/bash -c "echo 1 > /tmp/dummy && cat /tmp/dummy"
+"""
+
+arrow_compose_env = {
+ 'UBUNTU': '20.04', # overridden below
+ 'PYTHON': '3.6',
+ 'PANDAS': 'latest',
+ 'DASK': 'latest', # overridden below
+}
+
+
+def create_config(directory, yml_content, env_content=None):
+ env_path = directory / '.env'
+ config_path = directory / 'docker-compose.yml'
+
+ with config_path.open('w') as fp:
+ fp.write(yml_content)
+
+ if env_content is not None:
+ with env_path.open('w') as fp:
+ for k, v in env_content.items():
+ fp.write("{}={}\n".format(k, v))
+
+ return config_path
+
+
+def format_run(args):
+ cmd = ["run", "--rm"]
+ if isinstance(args, str):
+ return " ".join(cmd + [args])
+ else:
+ return cmd + args
+
+
+@pytest.fixture
+def arrow_compose_path(tmpdir):
+ return create_config(tmpdir, arrow_compose_yml, arrow_compose_env)
+
+
+def test_config_validation(tmpdir):
+ config_path = create_config(tmpdir, missing_service_compose_yml)
+ msg = "`sub-foo` is defined in `x-hierarchy` bot not in `services`"
+ with pytest.raises(ValueError, match=msg):
+ DockerCompose(config_path)
+
+ config_path = create_config(tmpdir, missing_node_compose_yml)
+ msg = "`sub-bar` is defined in `services` but not in `x-hierarchy`"
+ with pytest.raises(ValueError, match=msg):
+ DockerCompose(config_path)
+
+ config_path = create_config(tmpdir, ok_compose_yml)
+ DockerCompose(config_path) # no issue
+
+
+def assert_docker_calls(compose, expected_args):
+ base_command = ['docker']
+ expected_commands = []
+ for args in expected_args:
+ if isinstance(args, str):
+ args = re.split(r"\s", args)
+ expected_commands.append(base_command + args)
+ return assert_subprocess_calls(expected_commands, check=True)
+
+
+def assert_compose_calls(compose, expected_args, env=mock.ANY):
+ base_command = ['docker-compose', '--file', str(compose.config.path)]
+ expected_commands = []
+ for args in expected_args:
+ if isinstance(args, str):
+ args = re.split(r"\s", args)
+ expected_commands.append(base_command + args)
+ return assert_subprocess_calls(expected_commands, check=True, env=env)
+
+
+def test_arrow_example_validation_passes(arrow_compose_path):
+ DockerCompose(arrow_compose_path)
+
+
+def test_compose_default_params_and_env(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path, params=dict(
+ UBUNTU='18.04',
+ DASK='master'
+ ))
+ assert compose.config.dotenv == arrow_compose_env
+ assert compose.config.params == {
+ 'UBUNTU': '18.04',
+ 'DASK': 'master',
+ }
+
+
+def test_forwarding_env_variables(arrow_compose_path):
+ expected_calls = [
+ "pull --ignore-pull-failures conda-cpp",
+ "build conda-cpp",
+ ]
+ expected_env = PartialEnv(
+ MY_CUSTOM_VAR_A='a',
+ MY_CUSTOM_VAR_B='b'
+ )
+ with override_env({'MY_CUSTOM_VAR_A': 'a', 'MY_CUSTOM_VAR_B': 'b'}):
+ compose = DockerCompose(arrow_compose_path)
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ assert os.environ['MY_CUSTOM_VAR_A'] == 'a'
+ assert os.environ['MY_CUSTOM_VAR_B'] == 'b'
+ compose.pull('conda-cpp')
+ compose.build('conda-cpp')
+
+
+def test_compose_pull(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path)
+
+ expected_calls = [
+ "pull --ignore-pull-failures conda-cpp",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.clear_pull_memory()
+ compose.pull('conda-cpp')
+
+ expected_calls = [
+ "pull --ignore-pull-failures conda-cpp",
+ "pull --ignore-pull-failures conda-python",
+ "pull --ignore-pull-failures conda-python-pandas"
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.clear_pull_memory()
+ compose.pull('conda-python-pandas')
+
+ expected_calls = [
+ "pull --ignore-pull-failures conda-cpp",
+ "pull --ignore-pull-failures conda-python",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.clear_pull_memory()
+ compose.pull('conda-python-pandas', pull_leaf=False)
+
+
+def test_compose_pull_params(arrow_compose_path):
+ expected_calls = [
+ "pull --ignore-pull-failures conda-cpp",
+ "pull --ignore-pull-failures conda-python",
+ ]
+ compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04'))
+ expected_env = PartialEnv(PYTHON='3.6', PANDAS='latest')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.clear_pull_memory()
+ compose.pull('conda-python-pandas', pull_leaf=False)
+
+
+def test_compose_build(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path)
+
+ expected_calls = [
+ "build conda-cpp",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-cpp')
+
+ expected_calls = [
+ "build --no-cache conda-cpp"
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-cpp', use_cache=False)
+
+ expected_calls = [
+ "build conda-cpp",
+ "build conda-python",
+ "build conda-python-pandas"
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-python-pandas')
+
+ expected_calls = [
+ "build --no-cache conda-cpp",
+ "build --no-cache conda-python",
+ "build --no-cache conda-python-pandas",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-python-pandas', use_cache=False)
+
+ expected_calls = [
+ "build conda-cpp",
+ "build conda-python",
+ "build --no-cache conda-python-pandas",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-python-pandas', use_cache=True,
+ use_leaf_cache=False)
+
+
+@mock.patch.dict(os.environ, {"BUILDKIT_INLINE_CACHE": "1"})
+def test_compose_buildkit_inline_cache(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path)
+
+ expected_calls = [
+ "build --build-arg BUILDKIT_INLINE_CACHE=1 conda-cpp",
+ ]
+ with assert_compose_calls(compose, expected_calls):
+ compose.build('conda-cpp')
+
+
+def test_compose_build_params(arrow_compose_path):
+ expected_calls = [
+ "build ubuntu-cpp",
+ ]
+
+ compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04'))
+ expected_env = PartialEnv(UBUNTU="18.04")
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.build('ubuntu-cpp')
+
+ compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='16.04'))
+ expected_env = PartialEnv(UBUNTU="16.04")
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.build('ubuntu-cpp')
+
+ expected_calls = [
+ "build --no-cache conda-cpp",
+ "build --no-cache conda-python",
+ "build --no-cache conda-python-pandas",
+ ]
+ compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04'))
+ expected_env = PartialEnv(PYTHON='3.6', PANDAS='latest')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.build('conda-python-pandas', use_cache=False)
+
+
+def test_compose_run(arrow_compose_path):
+ expected_calls = [
+ format_run("conda-cpp"),
+ ]
+ compose = DockerCompose(arrow_compose_path)
+ with assert_compose_calls(compose, expected_calls):
+ compose.run('conda-cpp')
+
+ expected_calls = [
+ format_run("conda-python")
+ ]
+ expected_env = PartialEnv(PYTHON='3.6')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.run('conda-python')
+
+ compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8'))
+ expected_env = PartialEnv(PYTHON='3.8')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.run('conda-python')
+
+ compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8'))
+ for command in ["bash", "echo 1"]:
+ expected_calls = [
+ format_run(["conda-python", command]),
+ ]
+ expected_env = PartialEnv(PYTHON='3.8')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ compose.run('conda-python', command)
+
+ expected_calls = [
+ (
+ format_run("-e CONTAINER_ENV_VAR_A=a -e CONTAINER_ENV_VAR_B=b "
+ "conda-python")
+ )
+ ]
+ compose = DockerCompose(arrow_compose_path)
+ expected_env = PartialEnv(PYTHON='3.6')
+ with assert_compose_calls(compose, expected_calls, env=expected_env):
+ env = collections.OrderedDict([
+ ("CONTAINER_ENV_VAR_A", "a"),
+ ("CONTAINER_ENV_VAR_B", "b")
+ ])
+ compose.run('conda-python', env=env)
+
+ expected_calls = [
+ (
+ format_run("--volume /host/build:/build --volume "
+ "/host/ccache:/ccache:delegated conda-python")
+ )
+ ]
+ compose = DockerCompose(arrow_compose_path)
+ with assert_compose_calls(compose, expected_calls):
+ volumes = ("/host/build:/build", "/host/ccache:/ccache:delegated")
+ compose.run('conda-python', volumes=volumes)
+
+
+def test_compose_run_with_resource_limits(arrow_compose_path):
+ expected_calls = [
+ format_run([
+ "--cpuset-cpus=0,1",
+ "--memory=7g",
+ "--memory-swap=7g",
+ "org/conda-cpp"
+ ]),
+ ]
+ compose = DockerCompose(arrow_compose_path)
+ with assert_docker_calls(compose, expected_calls):
+ compose.run('conda-cpp', resource_limit="github")
+
+
+def test_compose_push(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8'))
+ expected_env = PartialEnv(PYTHON="3.8")
+ expected_calls = [
+ mock.call(["docker", "login", "-u", "user", "-p", "pass"], check=True),
+ ]
+ for image in ["conda-cpp", "conda-python", "conda-python-pandas"]:
+ expected_calls.append(
+ mock.call(["docker-compose", "--file", str(compose.config.path),
+ "push", image], check=True, env=expected_env)
+ )
+ with assert_subprocess_calls(expected_calls):
+ compose.push('conda-python-pandas', user='user', password='pass')
+
+
+def test_compose_error(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path, params=dict(
+ PYTHON='3.8',
+ PANDAS='master'
+ ))
+
+ error = subprocess.CalledProcessError(99, [])
+ with mock.patch('subprocess.run', side_effect=error):
+ with pytest.raises(RuntimeError) as exc:
+ compose.run('conda-cpp')
+
+ exception_message = str(exc.value)
+ assert "exited with a non-zero exit code 99" in exception_message
+ assert "PANDAS: latest" in exception_message
+ assert "export PANDAS=master" in exception_message
+
+
+def test_image_with_gpu(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path)
+
+ expected_calls = [
+ [
+ "run", "--rm", "--gpus", "all",
+ "-e", "CUDA_ENV=1",
+ "-e", "OTHER_ENV=2",
+ "-v", "/host:/container:rw",
+ "org/ubuntu-cuda",
+ '/bin/bash -c "echo 1 > /tmp/dummy && cat /tmp/dummy"'
+ ]
+ ]
+ with assert_docker_calls(compose, expected_calls):
+ compose.run('ubuntu-cuda')
+
+
+def test_listing_images(arrow_compose_path):
+ compose = DockerCompose(arrow_compose_path)
+ assert sorted(compose.images()) == [
+ 'conda-cpp',
+ 'conda-python',
+ 'conda-python-dask',
+ 'conda-python-pandas',
+ 'ubuntu-c-glib',
+ 'ubuntu-cpp',
+ 'ubuntu-cpp-cmake32',
+ 'ubuntu-cuda',
+ 'ubuntu-ruby',
+ ]
diff --git a/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py b/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py
new file mode 100644
index 000000000..ab39c7b9d
--- /dev/null
+++ b/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest.mock import patch
+
+from click.testing import CliRunner
+
+from archery.docker import DockerCompose
+from archery.docker.cli import docker
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_with_custom_command(run, build, pull):
+ # with custom command
+ args = ["run", "ubuntu-cpp", "bash"]
+ result = CliRunner().invoke(docker, args)
+
+ assert result.exit_code == 0
+ pull.assert_called_once_with(
+ "ubuntu-cpp", pull_leaf=True, using_docker=False
+ )
+ build.assert_called_once_with(
+ "ubuntu-cpp",
+ use_cache=True,
+ use_leaf_cache=True,
+ using_docker=False,
+ using_buildx=False
+ )
+ run.assert_called_once_with(
+ "ubuntu-cpp",
+ command="bash",
+ env={},
+ resource_limit=None,
+ user=None,
+ using_docker=False,
+ volumes=(),
+ )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_options(run, build, pull):
+ # environment variables and volumes
+ args = [
+ "run",
+ "-e",
+ "ARROW_GANDIVA=OFF",
+ "-e",
+ "ARROW_FLIGHT=ON",
+ "--volume",
+ "./build:/build",
+ "-v",
+ "./ccache:/ccache:delegated",
+ "-u",
+ "root",
+ "ubuntu-cpp",
+ ]
+ result = CliRunner().invoke(docker, args)
+ assert result.exit_code == 0
+ pull.assert_called_once_with(
+ "ubuntu-cpp", pull_leaf=True, using_docker=False
+ )
+ build.assert_called_once_with(
+ "ubuntu-cpp",
+ use_cache=True,
+ use_leaf_cache=True,
+ using_docker=False,
+ using_buildx=False
+ )
+ run.assert_called_once_with(
+ "ubuntu-cpp",
+ command=None,
+ env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+ resource_limit=None,
+ user="root",
+ using_docker=False,
+ volumes=(
+ "./build:/build",
+ "./ccache:/ccache:delegated",
+ ),
+ )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_limit_options(run):
+ # environment variables and volumes
+ args = [
+ "run",
+ "-e",
+ "ARROW_GANDIVA=OFF",
+ "-e",
+ "ARROW_FLIGHT=ON",
+ "--volume",
+ "./build:/build",
+ "-v",
+ "./ccache:/ccache:delegated",
+ "-u",
+ "root",
+ "--resource-limit=github",
+ "--no-build",
+ "--no-pull",
+ "ubuntu-cpp",
+ ]
+ result = CliRunner().invoke(docker, args)
+ assert result.exit_code == 0
+ run.assert_called_once_with(
+ "ubuntu-cpp",
+ command=None,
+ env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+ resource_limit="github",
+ user="root",
+ using_docker=False,
+ volumes=(
+ "./build:/build",
+ "./ccache:/ccache:delegated",
+ ),
+ )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_pulling_or_building(run):
+ args = ["run", "--no-pull", "--no-build", "ubuntu-cpp"]
+ result = CliRunner().invoke(docker, args)
+ assert result.exit_code == 0
+ run.assert_called_once_with(
+ "ubuntu-cpp",
+ command=None,
+ env={},
+ resource_limit=None,
+ user=None,
+ using_docker=False,
+ volumes=(),
+ )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+def test_docker_run_only_pulling_and_building(build, pull):
+ args = ["run", "ubuntu-cpp", "--build-only"]
+ result = CliRunner().invoke(docker, args)
+ assert result.exit_code == 0
+ pull.assert_called_once_with(
+ "ubuntu-cpp", pull_leaf=True, using_docker=False
+ )
+ build.assert_called_once_with(
+ "ubuntu-cpp",
+ use_cache=True,
+ use_leaf_cache=True,
+ using_docker=False,
+ using_buildx=False
+ )
+
+
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_build_cache(run, build):
+ args = [
+ "run",
+ "--no-pull",
+ "--force-build",
+ "--user",
+ "me",
+ "--no-cache",
+ "--no-leaf-cache",
+ "ubuntu-cpp",
+ ]
+ result = CliRunner().invoke(docker, args)
+ assert result.exit_code == 0
+ build.assert_called_once_with(
+ "ubuntu-cpp",
+ use_cache=False,
+ use_leaf_cache=False,
+ using_docker=False,
+ using_buildx=False
+ )
+ run.assert_called_once_with(
+ "ubuntu-cpp",
+ command=None,
+ env={},
+ resource_limit=None,
+ user="me",
+ using_docker=False,
+ volumes=(),
+ )
diff --git a/src/arrow/dev/archery/archery/integration/__init__.py b/src/arrow/dev/archery/archery/integration/__init__.py
new file mode 100644
index 000000000..13a83393a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/src/arrow/dev/archery/archery/integration/datagen.py b/src/arrow/dev/archery/archery/integration/datagen.py
new file mode 100644
index 000000000..b764982bd
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/datagen.py
@@ -0,0 +1,1662 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import namedtuple, OrderedDict
+import binascii
+import json
+import os
+import random
+import tempfile
+
+import numpy as np
+
+from .util import frombytes, tobytes, random_bytes, random_utf8
+
+
+def metadata_key_values(pairs):
+ return [{'key': k, 'value': v} for k, v in pairs]
+
+
+class Field(object):
+
+ def __init__(self, name, *, nullable=True, metadata=None):
+ self.name = name
+ self.nullable = nullable
+ self.metadata = metadata or []
+
+ def get_json(self):
+ entries = [
+ ('name', self.name),
+ ('type', self._get_type()),
+ ('nullable', self.nullable),
+ ('children', self._get_children()),
+ ]
+
+ dct = self._get_dictionary()
+ if dct:
+ entries.append(('dictionary', dct))
+
+ if self.metadata is not None and len(self.metadata) > 0:
+ entries.append(('metadata', metadata_key_values(self.metadata)))
+
+ return OrderedDict(entries)
+
+ def _get_dictionary(self):
+ return None
+
+ def _make_is_valid(self, size, null_probability=0.4):
+ if self.nullable:
+ return (np.random.random_sample(size) > null_probability
+ ).astype(np.int8)
+ else:
+ return np.ones(size, dtype=np.int8)
+
+
+class Column(object):
+
+ def __init__(self, name, count):
+ self.name = name
+ self.count = count
+
+ def __len__(self):
+ return self.count
+
+ def _get_children(self):
+ return []
+
+ def _get_buffers(self):
+ return []
+
+ def get_json(self):
+ entries = [
+ ('name', self.name),
+ ('count', self.count)
+ ]
+
+ buffers = self._get_buffers()
+ entries.extend(buffers)
+
+ children = self._get_children()
+ if len(children) > 0:
+ entries.append(('children', children))
+
+ return OrderedDict(entries)
+
+
+class PrimitiveField(Field):
+
+ def _get_children(self):
+ return []
+
+
+class PrimitiveColumn(Column):
+
+ def __init__(self, name, count, is_valid, values):
+ super().__init__(name, count)
+ self.is_valid = is_valid
+ self.values = values
+
+ def _encode_value(self, x):
+ return x
+
+ def _get_buffers(self):
+ return [
+ ('VALIDITY', [int(v) for v in self.is_valid]),
+ ('DATA', list([self._encode_value(x) for x in self.values]))
+ ]
+
+
+class NullColumn(Column):
+ # This subclass is for readability only
+ pass
+
+
+class NullField(PrimitiveField):
+
+ def __init__(self, name, metadata=None):
+ super().__init__(name, nullable=True,
+ metadata=metadata)
+
+ def _get_type(self):
+ return OrderedDict([('name', 'null')])
+
+ def generate_column(self, size, name=None):
+ return NullColumn(name or self.name, size)
+
+
+TEST_INT_MAX = 2 ** 31 - 1
+TEST_INT_MIN = ~TEST_INT_MAX
+
+
+class IntegerField(PrimitiveField):
+
+ def __init__(self, name, is_signed, bit_width, *, nullable=True,
+ metadata=None,
+ min_value=TEST_INT_MIN,
+ max_value=TEST_INT_MAX):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ self.is_signed = is_signed
+ self.bit_width = bit_width
+ self.min_value = min_value
+ self.max_value = max_value
+
+ def _get_generated_data_bounds(self):
+ if self.is_signed:
+ signed_iinfo = np.iinfo('int' + str(self.bit_width))
+ min_value, max_value = signed_iinfo.min, signed_iinfo.max
+ else:
+ unsigned_iinfo = np.iinfo('uint' + str(self.bit_width))
+ min_value, max_value = 0, unsigned_iinfo.max
+
+ lower_bound = max(min_value, self.min_value)
+ upper_bound = min(max_value, self.max_value)
+ return lower_bound, upper_bound
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'int'),
+ ('isSigned', self.is_signed),
+ ('bitWidth', self.bit_width)
+ ])
+
+ def generate_column(self, size, name=None):
+ lower_bound, upper_bound = self._get_generated_data_bounds()
+ return self.generate_range(size, lower_bound, upper_bound,
+ name=name, include_extremes=True)
+
+ def generate_range(self, size, lower, upper, name=None,
+ include_extremes=False):
+ values = np.random.randint(lower, upper, size=size, dtype=np.int64)
+ if include_extremes and size >= 2:
+ values[:2] = [lower, upper]
+ values = list(map(int if self.bit_width < 64 else str, values))
+
+ is_valid = self._make_is_valid(size)
+
+ if name is None:
+ name = self.name
+ return PrimitiveColumn(name, size, is_valid, values)
+
+
+class DateField(IntegerField):
+
+ DAY = 0
+ MILLISECOND = 1
+
+ # 1/1/1 to 12/31/9999
+ _ranges = {
+ DAY: [-719162, 2932896],
+ MILLISECOND: [-62135596800000, 253402214400000]
+ }
+
+ def __init__(self, name, unit, *, nullable=True, metadata=None):
+ bit_width = 32 if unit == self.DAY else 64
+
+ min_value, max_value = self._ranges[unit]
+ super().__init__(
+ name, True, bit_width,
+ nullable=nullable, metadata=metadata,
+ min_value=min_value, max_value=max_value
+ )
+ self.unit = unit
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'date'),
+ ('unit', 'DAY' if self.unit == self.DAY else 'MILLISECOND')
+ ])
+
+
+TIMEUNIT_NAMES = {
+ 's': 'SECOND',
+ 'ms': 'MILLISECOND',
+ 'us': 'MICROSECOND',
+ 'ns': 'NANOSECOND'
+}
+
+
+class TimeField(IntegerField):
+
+ BIT_WIDTHS = {
+ 's': 32,
+ 'ms': 32,
+ 'us': 64,
+ 'ns': 64
+ }
+
+ _ranges = {
+ 's': [0, 86400],
+ 'ms': [0, 86400000],
+ 'us': [0, 86400000000],
+ 'ns': [0, 86400000000000]
+ }
+
+ def __init__(self, name, unit='s', *, nullable=True,
+ metadata=None):
+ min_val, max_val = self._ranges[unit]
+ super().__init__(name, True, self.BIT_WIDTHS[unit],
+ nullable=nullable, metadata=metadata,
+ min_value=min_val, max_value=max_val)
+ self.unit = unit
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'time'),
+ ('unit', TIMEUNIT_NAMES[self.unit]),
+ ('bitWidth', self.bit_width)
+ ])
+
+
+class TimestampField(IntegerField):
+
+ # 1/1/1 to 12/31/9999
+ _ranges = {
+ 's': [-62135596800, 253402214400],
+ 'ms': [-62135596800000, 253402214400000],
+ 'us': [-62135596800000000, 253402214400000000],
+
+ # Physical range for int64, ~584 years and change
+ 'ns': [np.iinfo('int64').min, np.iinfo('int64').max]
+ }
+
+ def __init__(self, name, unit='s', tz=None, *, nullable=True,
+ metadata=None):
+ min_val, max_val = self._ranges[unit]
+ super().__init__(name, True, 64,
+ nullable=nullable,
+ metadata=metadata,
+ min_value=min_val,
+ max_value=max_val)
+ self.unit = unit
+ self.tz = tz
+
+ def _get_type(self):
+ fields = [
+ ('name', 'timestamp'),
+ ('unit', TIMEUNIT_NAMES[self.unit])
+ ]
+
+ if self.tz is not None:
+ fields.append(('timezone', self.tz))
+
+ return OrderedDict(fields)
+
+
+class DurationIntervalField(IntegerField):
+
+ def __init__(self, name, unit='s', *, nullable=True,
+ metadata=None):
+ min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max,
+ super().__init__(
+ name, True, 64,
+ nullable=nullable, metadata=metadata,
+ min_value=min_val, max_value=max_val)
+ self.unit = unit
+
+ def _get_type(self):
+ fields = [
+ ('name', 'duration'),
+ ('unit', TIMEUNIT_NAMES[self.unit])
+ ]
+
+ return OrderedDict(fields)
+
+
+class YearMonthIntervalField(IntegerField):
+ def __init__(self, name, *, nullable=True, metadata=None):
+ min_val, max_val = [-10000*12, 10000*12] # +/- 10000 years.
+ super().__init__(
+ name, True, 32,
+ nullable=nullable, metadata=metadata,
+ min_value=min_val, max_value=max_val)
+
+ def _get_type(self):
+ fields = [
+ ('name', 'interval'),
+ ('unit', 'YEAR_MONTH'),
+ ]
+
+ return OrderedDict(fields)
+
+
+class DayTimeIntervalField(PrimitiveField):
+ def __init__(self, name, *, nullable=True, metadata=None):
+ super().__init__(name,
+ nullable=True,
+ metadata=metadata)
+
+ @property
+ def numpy_type(self):
+ return object
+
+ def _get_type(self):
+
+ return OrderedDict([
+ ('name', 'interval'),
+ ('unit', 'DAY_TIME'),
+ ])
+
+ def generate_column(self, size, name=None):
+ min_day_value, max_day_value = -10000*366, 10000*366
+ values = [{'days': random.randint(min_day_value, max_day_value),
+ 'milliseconds': random.randint(-86400000, +86400000)}
+ for _ in range(size)]
+
+ is_valid = self._make_is_valid(size)
+ if name is None:
+ name = self.name
+ return PrimitiveColumn(name, size, is_valid, values)
+
+
+class MonthDayNanoIntervalField(PrimitiveField):
+ def __init__(self, name, *, nullable=True, metadata=None):
+ super().__init__(name,
+ nullable=True,
+ metadata=metadata)
+
+ @property
+ def numpy_type(self):
+ return object
+
+ def _get_type(self):
+
+ return OrderedDict([
+ ('name', 'interval'),
+ ('unit', 'MONTH_DAY_NANO'),
+ ])
+
+ def generate_column(self, size, name=None):
+ I32 = 'int32'
+ min_int_value, max_int_value = np.iinfo(I32).min, np.iinfo(I32).max
+ I64 = 'int64'
+ min_nano_val, max_nano_val = np.iinfo(I64).min, np.iinfo(I64).max,
+ values = [{'months': random.randint(min_int_value, max_int_value),
+ 'days': random.randint(min_int_value, max_int_value),
+ 'nanoseconds': random.randint(min_nano_val, max_nano_val)}
+ for _ in range(size)]
+
+ is_valid = self._make_is_valid(size)
+ if name is None:
+ name = self.name
+ return PrimitiveColumn(name, size, is_valid, values)
+
+
+class FloatingPointField(PrimitiveField):
+
+ def __init__(self, name, bit_width, *, nullable=True,
+ metadata=None):
+ super().__init__(name,
+ nullable=nullable,
+ metadata=metadata)
+
+ self.bit_width = bit_width
+ self.precision = {
+ 16: 'HALF',
+ 32: 'SINGLE',
+ 64: 'DOUBLE'
+ }[self.bit_width]
+
+ @property
+ def numpy_type(self):
+ return 'float' + str(self.bit_width)
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'floatingpoint'),
+ ('precision', self.precision)
+ ])
+
+ def generate_column(self, size, name=None):
+ values = np.random.randn(size) * 1000
+ values = np.round(values, 3)
+
+ is_valid = self._make_is_valid(size)
+ if name is None:
+ name = self.name
+ return PrimitiveColumn(name, size, is_valid, values)
+
+
+DECIMAL_PRECISION_TO_VALUE = {
+ key: (1 << (8 * i - 1)) - 1 for i, key in enumerate(
+ [1, 3, 5, 7, 10, 12, 15, 17, 19, 22, 24, 27, 29, 32, 34, 36,
+ 40, 42, 44, 50, 60, 70],
+ start=1,
+ )
+}
+
+
+def decimal_range_from_precision(precision):
+ assert 1 <= precision <= 76
+ try:
+ max_value = DECIMAL_PRECISION_TO_VALUE[precision]
+ except KeyError:
+ return decimal_range_from_precision(precision - 1)
+ else:
+ return ~max_value, max_value
+
+
+class DecimalField(PrimitiveField):
+ def __init__(self, name, precision, scale, bit_width, *,
+ nullable=True, metadata=None):
+ super().__init__(name, nullable=True,
+ metadata=metadata)
+ self.precision = precision
+ self.scale = scale
+ self.bit_width = bit_width
+
+ @property
+ def numpy_type(self):
+ return object
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'decimal'),
+ ('precision', self.precision),
+ ('scale', self.scale),
+ ('bitWidth', self.bit_width),
+ ])
+
+ def generate_column(self, size, name=None):
+ min_value, max_value = decimal_range_from_precision(self.precision)
+ values = [random.randint(min_value, max_value) for _ in range(size)]
+
+ is_valid = self._make_is_valid(size)
+ if name is None:
+ name = self.name
+ return DecimalColumn(name, size, is_valid, values, self.bit_width)
+
+
+class DecimalColumn(PrimitiveColumn):
+
+ def __init__(self, name, count, is_valid, values, bit_width):
+ super().__init__(name, count, is_valid, values)
+ self.bit_width = bit_width
+
+ def _encode_value(self, x):
+ return str(x)
+
+
+class BooleanField(PrimitiveField):
+ bit_width = 1
+
+ def _get_type(self):
+ return OrderedDict([('name', 'bool')])
+
+ @property
+ def numpy_type(self):
+ return 'bool'
+
+ def generate_column(self, size, name=None):
+ values = list(map(bool, np.random.randint(0, 2, size=size)))
+ is_valid = self._make_is_valid(size)
+ if name is None:
+ name = self.name
+ return PrimitiveColumn(name, size, is_valid, values)
+
+
+class FixedSizeBinaryField(PrimitiveField):
+
+ def __init__(self, name, byte_width, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ self.byte_width = byte_width
+
+ @property
+ def numpy_type(self):
+ return object
+
+ @property
+ def column_class(self):
+ return FixedSizeBinaryColumn
+
+ def _get_type(self):
+ return OrderedDict([('name', 'fixedsizebinary'),
+ ('byteWidth', self.byte_width)])
+
+ def generate_column(self, size, name=None):
+ is_valid = self._make_is_valid(size)
+ values = []
+
+ for i in range(size):
+ values.append(random_bytes(self.byte_width))
+
+ if name is None:
+ name = self.name
+ return self.column_class(name, size, is_valid, values)
+
+
+class BinaryField(PrimitiveField):
+
+ @property
+ def numpy_type(self):
+ return object
+
+ @property
+ def column_class(self):
+ return BinaryColumn
+
+ def _get_type(self):
+ return OrderedDict([('name', 'binary')])
+
+ def _random_sizes(self, size):
+ return np.random.exponential(scale=4, size=size).astype(np.int32)
+
+ def generate_column(self, size, name=None):
+ is_valid = self._make_is_valid(size)
+ values = []
+
+ sizes = self._random_sizes(size)
+
+ for i, nbytes in enumerate(sizes):
+ if is_valid[i]:
+ values.append(random_bytes(nbytes))
+ else:
+ values.append(b"")
+
+ if name is None:
+ name = self.name
+ return self.column_class(name, size, is_valid, values)
+
+
+class StringField(BinaryField):
+
+ @property
+ def column_class(self):
+ return StringColumn
+
+ def _get_type(self):
+ return OrderedDict([('name', 'utf8')])
+
+ def generate_column(self, size, name=None):
+ K = 7
+ is_valid = self._make_is_valid(size)
+ values = []
+
+ for i in range(size):
+ if is_valid[i]:
+ values.append(tobytes(random_utf8(K)))
+ else:
+ values.append(b"")
+
+ if name is None:
+ name = self.name
+ return self.column_class(name, size, is_valid, values)
+
+
+class LargeBinaryField(BinaryField):
+
+ @property
+ def column_class(self):
+ return LargeBinaryColumn
+
+ def _get_type(self):
+ return OrderedDict([('name', 'largebinary')])
+
+
+class LargeStringField(StringField):
+
+ @property
+ def column_class(self):
+ return LargeStringColumn
+
+ def _get_type(self):
+ return OrderedDict([('name', 'largeutf8')])
+
+
+class Schema(object):
+
+ def __init__(self, fields, metadata=None):
+ self.fields = fields
+ self.metadata = metadata
+
+ def get_json(self):
+ entries = [
+ ('fields', [field.get_json() for field in self.fields])
+ ]
+
+ if self.metadata is not None and len(self.metadata) > 0:
+ entries.append(('metadata', metadata_key_values(self.metadata)))
+
+ return OrderedDict(entries)
+
+
+class _NarrowOffsetsMixin:
+
+ def _encode_offsets(self, offsets):
+ return list(map(int, offsets))
+
+
+class _LargeOffsetsMixin:
+
+ def _encode_offsets(self, offsets):
+ # 64-bit offsets have to be represented as strings to roundtrip
+ # through JSON.
+ return list(map(str, offsets))
+
+
+class _BaseBinaryColumn(PrimitiveColumn):
+
+ def _encode_value(self, x):
+ return frombytes(binascii.hexlify(x).upper())
+
+ def _get_buffers(self):
+ offset = 0
+ offsets = [0]
+
+ data = []
+ for i, v in enumerate(self.values):
+ if self.is_valid[i]:
+ offset += len(v)
+ else:
+ v = b""
+
+ offsets.append(offset)
+ data.append(self._encode_value(v))
+
+ return [
+ ('VALIDITY', [int(x) for x in self.is_valid]),
+ ('OFFSET', self._encode_offsets(offsets)),
+ ('DATA', data)
+ ]
+
+
+class _BaseStringColumn(_BaseBinaryColumn):
+
+ def _encode_value(self, x):
+ return frombytes(x)
+
+
+class BinaryColumn(_BaseBinaryColumn, _NarrowOffsetsMixin):
+ pass
+
+
+class StringColumn(_BaseStringColumn, _NarrowOffsetsMixin):
+ pass
+
+
+class LargeBinaryColumn(_BaseBinaryColumn, _LargeOffsetsMixin):
+ pass
+
+
+class LargeStringColumn(_BaseStringColumn, _LargeOffsetsMixin):
+ pass
+
+
+class FixedSizeBinaryColumn(PrimitiveColumn):
+
+ def _encode_value(self, x):
+ return frombytes(binascii.hexlify(x).upper())
+
+ def _get_buffers(self):
+ data = []
+ for i, v in enumerate(self.values):
+ data.append(self._encode_value(v))
+
+ return [
+ ('VALIDITY', [int(x) for x in self.is_valid]),
+ ('DATA', data)
+ ]
+
+
+class ListField(Field):
+
+ def __init__(self, name, value_field, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ self.value_field = value_field
+
+ @property
+ def column_class(self):
+ return ListColumn
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'list')
+ ])
+
+ def _get_children(self):
+ return [self.value_field.get_json()]
+
+ def generate_column(self, size, name=None):
+ MAX_LIST_SIZE = 4
+
+ is_valid = self._make_is_valid(size)
+ list_sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size)
+ offsets = [0]
+
+ offset = 0
+ for i in range(size):
+ if is_valid[i]:
+ offset += int(list_sizes[i])
+ offsets.append(offset)
+
+ # The offset now is the total number of elements in the child array
+ values = self.value_field.generate_column(offset)
+
+ if name is None:
+ name = self.name
+ return self.column_class(name, size, is_valid, offsets, values)
+
+
+class LargeListField(ListField):
+
+ @property
+ def column_class(self):
+ return LargeListColumn
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'largelist')
+ ])
+
+
+class _BaseListColumn(Column):
+
+ def __init__(self, name, count, is_valid, offsets, values):
+ super().__init__(name, count)
+ self.is_valid = is_valid
+ self.offsets = offsets
+ self.values = values
+
+ def _get_buffers(self):
+ return [
+ ('VALIDITY', [int(v) for v in self.is_valid]),
+ ('OFFSET', self._encode_offsets(self.offsets))
+ ]
+
+ def _get_children(self):
+ return [self.values.get_json()]
+
+
+class ListColumn(_BaseListColumn, _NarrowOffsetsMixin):
+ pass
+
+
+class LargeListColumn(_BaseListColumn, _LargeOffsetsMixin):
+ pass
+
+
+class MapField(Field):
+
+ def __init__(self, name, key_field, item_field, *, nullable=True,
+ metadata=None, keys_sorted=False, entries_name='entries'):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+
+ assert not key_field.nullable
+ self.key_field = key_field
+ self.item_field = item_field
+ self.pair_field = StructField(entries_name, [key_field, item_field],
+ nullable=False)
+ self.keys_sorted = keys_sorted
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'map'),
+ ('keysSorted', self.keys_sorted)
+ ])
+
+ def _get_children(self):
+ return [self.pair_field.get_json()]
+
+ def generate_column(self, size, name=None):
+ MAX_MAP_SIZE = 4
+
+ is_valid = self._make_is_valid(size)
+ map_sizes = np.random.randint(0, MAX_MAP_SIZE + 1, size=size)
+ offsets = [0]
+
+ offset = 0
+ for i in range(size):
+ if is_valid[i]:
+ offset += int(map_sizes[i])
+ offsets.append(offset)
+
+ # The offset now is the total number of elements in the child array
+ pairs = self.pair_field.generate_column(offset)
+ if name is None:
+ name = self.name
+
+ return MapColumn(name, size, is_valid, offsets, pairs)
+
+
+class MapColumn(Column):
+
+ def __init__(self, name, count, is_valid, offsets, pairs):
+ super().__init__(name, count)
+ self.is_valid = is_valid
+ self.offsets = offsets
+ self.pairs = pairs
+
+ def _get_buffers(self):
+ return [
+ ('VALIDITY', [int(v) for v in self.is_valid]),
+ ('OFFSET', list(self.offsets))
+ ]
+
+ def _get_children(self):
+ return [self.pairs.get_json()]
+
+
+class FixedSizeListField(Field):
+
+ def __init__(self, name, value_field, list_size, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ self.value_field = value_field
+ self.list_size = list_size
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'fixedsizelist'),
+ ('listSize', self.list_size)
+ ])
+
+ def _get_children(self):
+ return [self.value_field.get_json()]
+
+ def generate_column(self, size, name=None):
+ is_valid = self._make_is_valid(size)
+ values = self.value_field.generate_column(size * self.list_size)
+
+ if name is None:
+ name = self.name
+ return FixedSizeListColumn(name, size, is_valid, values)
+
+
+class FixedSizeListColumn(Column):
+
+ def __init__(self, name, count, is_valid, values):
+ super().__init__(name, count)
+ self.is_valid = is_valid
+ self.values = values
+
+ def _get_buffers(self):
+ return [
+ ('VALIDITY', [int(v) for v in self.is_valid])
+ ]
+
+ def _get_children(self):
+ return [self.values.get_json()]
+
+
+class StructField(Field):
+
+ def __init__(self, name, fields, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ self.fields = fields
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'struct')
+ ])
+
+ def _get_children(self):
+ return [field.get_json() for field in self.fields]
+
+ def generate_column(self, size, name=None):
+ is_valid = self._make_is_valid(size)
+
+ field_values = [field.generate_column(size) for field in self.fields]
+ if name is None:
+ name = self.name
+ return StructColumn(name, size, is_valid, field_values)
+
+
+class _BaseUnionField(Field):
+
+ def __init__(self, name, fields, type_ids=None, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable, metadata=metadata)
+ if type_ids is None:
+ type_ids = list(range(fields))
+ else:
+ assert len(fields) == len(type_ids)
+ self.fields = fields
+ self.type_ids = type_ids
+ assert all(x >= 0 for x in self.type_ids)
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'union'),
+ ('mode', self.mode),
+ ('typeIds', self.type_ids),
+ ])
+
+ def _get_children(self):
+ return [field.get_json() for field in self.fields]
+
+ def _make_type_ids(self, size):
+ return np.random.choice(self.type_ids, size)
+
+
+class SparseUnionField(_BaseUnionField):
+ mode = 'SPARSE'
+
+ def generate_column(self, size, name=None):
+ array_type_ids = self._make_type_ids(size)
+ field_values = [field.generate_column(size) for field in self.fields]
+
+ if name is None:
+ name = self.name
+ return SparseUnionColumn(name, size, array_type_ids, field_values)
+
+
+class DenseUnionField(_BaseUnionField):
+ mode = 'DENSE'
+
+ def generate_column(self, size, name=None):
+ # Reverse mapping {logical type id => physical child id}
+ child_ids = [None] * (max(self.type_ids) + 1)
+ for i, type_id in enumerate(self.type_ids):
+ child_ids[type_id] = i
+
+ array_type_ids = self._make_type_ids(size)
+ offsets = []
+ child_sizes = [0] * len(self.fields)
+
+ for i in range(size):
+ child_id = child_ids[array_type_ids[i]]
+ offset = child_sizes[child_id]
+ offsets.append(offset)
+ child_sizes[child_id] = offset + 1
+
+ field_values = [
+ field.generate_column(child_size)
+ for field, child_size in zip(self.fields, child_sizes)]
+
+ if name is None:
+ name = self.name
+ return DenseUnionColumn(name, size, array_type_ids, offsets,
+ field_values)
+
+
+class Dictionary(object):
+
+ def __init__(self, id_, field, size, name=None, ordered=False):
+ self.id_ = id_
+ self.field = field
+ self.values = field.generate_column(size=size, name=name)
+ self.ordered = ordered
+
+ def __len__(self):
+ return len(self.values)
+
+ def get_json(self):
+ dummy_batch = RecordBatch(len(self.values), [self.values])
+ return OrderedDict([
+ ('id', self.id_),
+ ('data', dummy_batch.get_json())
+ ])
+
+
+class DictionaryField(Field):
+
+ def __init__(self, name, index_field, dictionary, *, nullable=True,
+ metadata=None):
+ super().__init__(name, nullable=nullable,
+ metadata=metadata)
+ assert index_field.name == ''
+ assert isinstance(index_field, IntegerField)
+ assert isinstance(dictionary, Dictionary)
+
+ self.index_field = index_field
+ self.dictionary = dictionary
+
+ def _get_type(self):
+ return self.dictionary.field._get_type()
+
+ def _get_children(self):
+ return self.dictionary.field._get_children()
+
+ def _get_dictionary(self):
+ return OrderedDict([
+ ('id', self.dictionary.id_),
+ ('indexType', self.index_field._get_type()),
+ ('isOrdered', self.dictionary.ordered)
+ ])
+
+ def generate_column(self, size, name=None):
+ if name is None:
+ name = self.name
+ return self.index_field.generate_range(size, 0, len(self.dictionary),
+ name=name)
+
+
+ExtensionType = namedtuple(
+ 'ExtensionType', ['extension_name', 'serialized', 'storage_field'])
+
+
+class ExtensionField(Field):
+
+ def __init__(self, name, extension_type, *, nullable=True, metadata=None):
+ metadata = (metadata or []) + [
+ ('ARROW:extension:name', extension_type.extension_name),
+ ('ARROW:extension:metadata', extension_type.serialized),
+ ]
+ super().__init__(name, nullable=nullable, metadata=metadata)
+ self.extension_type = extension_type
+
+ def _get_type(self):
+ return self.extension_type.storage_field._get_type()
+
+ def _get_children(self):
+ return self.extension_type.storage_field._get_children()
+
+ def _get_dictionary(self):
+ return self.extension_type.storage_field._get_dictionary()
+
+ def generate_column(self, size, name=None):
+ if name is None:
+ name = self.name
+ return self.extension_type.storage_field.generate_column(size, name)
+
+
+class StructColumn(Column):
+
+ def __init__(self, name, count, is_valid, field_values):
+ super().__init__(name, count)
+ self.is_valid = is_valid
+ self.field_values = field_values
+
+ def _get_buffers(self):
+ return [
+ ('VALIDITY', [int(v) for v in self.is_valid])
+ ]
+
+ def _get_children(self):
+ return [field.get_json() for field in self.field_values]
+
+
+class SparseUnionColumn(Column):
+
+ def __init__(self, name, count, type_ids, field_values):
+ super().__init__(name, count)
+ self.type_ids = type_ids
+ self.field_values = field_values
+
+ def _get_buffers(self):
+ return [
+ ('TYPE_ID', [int(v) for v in self.type_ids])
+ ]
+
+ def _get_children(self):
+ return [field.get_json() for field in self.field_values]
+
+
+class DenseUnionColumn(Column):
+
+ def __init__(self, name, count, type_ids, offsets, field_values):
+ super().__init__(name, count)
+ self.type_ids = type_ids
+ self.offsets = offsets
+ self.field_values = field_values
+
+ def _get_buffers(self):
+ return [
+ ('TYPE_ID', [int(v) for v in self.type_ids]),
+ ('OFFSET', [int(v) for v in self.offsets]),
+ ]
+
+ def _get_children(self):
+ return [field.get_json() for field in self.field_values]
+
+
+class RecordBatch(object):
+
+ def __init__(self, count, columns):
+ self.count = count
+ self.columns = columns
+
+ def get_json(self):
+ return OrderedDict([
+ ('count', self.count),
+ ('columns', [col.get_json() for col in self.columns])
+ ])
+
+
+class File(object):
+
+ def __init__(self, name, schema, batches, dictionaries=None,
+ skip=None, path=None):
+ self.name = name
+ self.schema = schema
+ self.dictionaries = dictionaries or []
+ self.batches = batches
+ self.skip = set()
+ self.path = path
+ if skip:
+ self.skip.update(skip)
+
+ def get_json(self):
+ entries = [
+ ('schema', self.schema.get_json())
+ ]
+
+ if len(self.dictionaries) > 0:
+ entries.append(('dictionaries',
+ [dictionary.get_json()
+ for dictionary in self.dictionaries]))
+
+ entries.append(('batches', [batch.get_json()
+ for batch in self.batches]))
+ return OrderedDict(entries)
+
+ def write(self, path):
+ with open(path, 'wb') as f:
+ f.write(json.dumps(self.get_json(), indent=2).encode('utf-8'))
+ self.path = path
+
+ def skip_category(self, category):
+ """Skip this test for the given category.
+
+ Category should be SKIP_ARROW or SKIP_FLIGHT.
+ """
+ self.skip.add(category)
+ return self
+
+
+def get_field(name, type_, **kwargs):
+ if type_ == 'binary':
+ return BinaryField(name, **kwargs)
+ elif type_ == 'utf8':
+ return StringField(name, **kwargs)
+ elif type_ == 'largebinary':
+ return LargeBinaryField(name, **kwargs)
+ elif type_ == 'largeutf8':
+ return LargeStringField(name, **kwargs)
+ elif type_.startswith('fixedsizebinary_'):
+ byte_width = int(type_.split('_')[1])
+ return FixedSizeBinaryField(name, byte_width=byte_width, **kwargs)
+
+ dtype = np.dtype(type_)
+
+ if dtype.kind in ('i', 'u'):
+ signed = dtype.kind == 'i'
+ bit_width = dtype.itemsize * 8
+ return IntegerField(name, signed, bit_width, **kwargs)
+ elif dtype.kind == 'f':
+ bit_width = dtype.itemsize * 8
+ return FloatingPointField(name, bit_width, **kwargs)
+ elif dtype.kind == 'b':
+ return BooleanField(name, **kwargs)
+ else:
+ raise TypeError(dtype)
+
+
+def _generate_file(name, fields, batch_sizes, dictionaries=None, skip=None,
+ metadata=None):
+ schema = Schema(fields, metadata=metadata)
+ batches = []
+ for size in batch_sizes:
+ columns = []
+ for field in fields:
+ col = field.generate_column(size)
+ columns.append(col)
+
+ batches.append(RecordBatch(size, columns))
+
+ return File(name, schema, batches, dictionaries, skip=skip)
+
+
+def generate_custom_metadata_case():
+ def meta(items):
+ # Generate a simple block of metadata where each value is '{}'.
+ # Keys are delimited by whitespace in `items`.
+ return [(k, '{}') for k in items.split()]
+
+ fields = [
+ get_field('sort_of_pandas', 'int8', metadata=meta('pandas')),
+
+ get_field('lots_of_meta', 'int8', metadata=meta('a b c d .. w x y z')),
+
+ get_field(
+ 'unregistered_extension', 'int8',
+ metadata=[
+ ('ARROW:extension:name', '!nonexistent'),
+ ('ARROW:extension:metadata', ''),
+ ('ARROW:integration:allow_unregistered_extension', 'true'),
+ ]),
+
+ ListField('list_with_odd_values',
+ get_field('item', 'int32', metadata=meta('odd_values'))),
+ ]
+
+ batch_sizes = [1]
+ return _generate_file('custom_metadata', fields, batch_sizes,
+ metadata=meta('schema_custom_0 schema_custom_1'))
+
+
+def generate_duplicate_fieldnames_case():
+ fields = [
+ get_field('ints', 'int8'),
+ get_field('ints', 'int32'),
+
+ StructField('struct', [get_field('', 'int32'), get_field('', 'utf8')]),
+ ]
+
+ batch_sizes = [1]
+ return _generate_file('duplicate_fieldnames', fields, batch_sizes)
+
+
+def generate_primitive_case(batch_sizes, name='primitive'):
+ types = ['bool', 'int8', 'int16', 'int32', 'int64',
+ 'uint8', 'uint16', 'uint32', 'uint64',
+ 'float32', 'float64', 'binary', 'utf8',
+ 'fixedsizebinary_19', 'fixedsizebinary_120']
+
+ fields = []
+
+ for type_ in types:
+ fields.append(get_field(type_ + "_nullable", type_, nullable=True))
+ fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
+
+ return _generate_file(name, fields, batch_sizes)
+
+
+def generate_primitive_large_offsets_case(batch_sizes):
+ types = ['largebinary', 'largeutf8']
+
+ fields = []
+
+ for type_ in types:
+ fields.append(get_field(type_ + "_nullable", type_, nullable=True))
+ fields.append(get_field(type_ + "_nonnullable", type_, nullable=False))
+
+ return _generate_file('primitive_large_offsets', fields, batch_sizes)
+
+
+def generate_null_case(batch_sizes):
+ # Interleave null with non-null types to ensure the appropriate number of
+ # buffers (0) is read and written
+ fields = [
+ NullField(name='f0'),
+ get_field('f1', 'int32'),
+ NullField(name='f2'),
+ get_field('f3', 'float64'),
+ NullField(name='f4')
+ ]
+ return _generate_file('null', fields, batch_sizes)
+
+
+def generate_null_trivial_case(batch_sizes):
+ # Generate a case with no buffers
+ fields = [
+ NullField(name='f0'),
+ ]
+ return _generate_file('null_trivial', fields, batch_sizes)
+
+
+def generate_decimal128_case():
+ fields = [
+ DecimalField(name='f{}'.format(i), precision=precision, scale=2,
+ bit_width=128)
+ for i, precision in enumerate(range(3, 39))
+ ]
+
+ possible_batch_sizes = 7, 10
+ batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
+ # 'decimal' is the original name for the test, and it must match
+ # provide "gold" files that test backwards compatibility, so they
+ # can be appropriately skipped.
+ return _generate_file('decimal', fields, batch_sizes)
+
+
+def generate_decimal256_case():
+ fields = [
+ DecimalField(name='f{}'.format(i), precision=precision, scale=5,
+ bit_width=256)
+ for i, precision in enumerate(range(37, 70))
+ ]
+
+ possible_batch_sizes = 7, 10
+ batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
+ return _generate_file('decimal256', fields, batch_sizes)
+
+
+def generate_datetime_case():
+ fields = [
+ DateField('f0', DateField.DAY),
+ DateField('f1', DateField.MILLISECOND),
+ TimeField('f2', 's'),
+ TimeField('f3', 'ms'),
+ TimeField('f4', 'us'),
+ TimeField('f5', 'ns'),
+ TimestampField('f6', 's'),
+ TimestampField('f7', 'ms'),
+ TimestampField('f8', 'us'),
+ TimestampField('f9', 'ns'),
+ TimestampField('f10', 'ms', tz=None),
+ TimestampField('f11', 's', tz='UTC'),
+ TimestampField('f12', 'ms', tz='US/Eastern'),
+ TimestampField('f13', 'us', tz='Europe/Paris'),
+ TimestampField('f14', 'ns', tz='US/Pacific'),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("datetime", fields, batch_sizes)
+
+
+def generate_interval_case():
+ fields = [
+ DurationIntervalField('f1', 's'),
+ DurationIntervalField('f2', 'ms'),
+ DurationIntervalField('f3', 'us'),
+ DurationIntervalField('f4', 'ns'),
+ YearMonthIntervalField('f5'),
+ DayTimeIntervalField('f6'),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("interval", fields, batch_sizes)
+
+
+def generate_month_day_nano_interval_case():
+ fields = [
+ MonthDayNanoIntervalField('f1'),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("interval_mdn", fields, batch_sizes)
+
+
+def generate_map_case():
+ fields = [
+ MapField('map_nullable', get_field('key', 'utf8', nullable=False),
+ get_field('value', 'int32')),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("map", fields, batch_sizes)
+
+
+def generate_non_canonical_map_case():
+ fields = [
+ MapField('map_other_names',
+ get_field('some_key', 'utf8', nullable=False),
+ get_field('some_value', 'int32'),
+ entries_name='some_entries'),
+ ]
+
+ batch_sizes = [7]
+ return _generate_file("map_non_canonical", fields, batch_sizes)
+
+
+def generate_nested_case():
+ fields = [
+ ListField('list_nullable', get_field('item', 'int32')),
+ FixedSizeListField('fixedsizelist_nullable',
+ get_field('item', 'int32'), 4),
+ StructField('struct_nullable', [get_field('f1', 'int32'),
+ get_field('f2', 'utf8')]),
+ # Fails on Go (ARROW-8452)
+ # ListField('list_nonnullable', get_field('item', 'int32'),
+ # nullable=False),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("nested", fields, batch_sizes)
+
+
+def generate_recursive_nested_case():
+ fields = [
+ ListField('lists_list',
+ ListField('inner_list', get_field('item', 'int16'))),
+ ListField('structs_list',
+ StructField('inner_struct',
+ [get_field('f1', 'int32'),
+ get_field('f2', 'utf8')])),
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file("recursive_nested", fields, batch_sizes)
+
+
+def generate_nested_large_offsets_case():
+ fields = [
+ LargeListField('large_list_nullable', get_field('item', 'int32')),
+ LargeListField('large_list_nonnullable',
+ get_field('item', 'int32'), nullable=False),
+ LargeListField('large_list_nested',
+ ListField('inner_list', get_field('item', 'int16'))),
+ ]
+
+ batch_sizes = [0, 13]
+ return _generate_file("nested_large_offsets", fields, batch_sizes)
+
+
+def generate_unions_case():
+ fields = [
+ SparseUnionField('sparse', [get_field('f1', 'int32'),
+ get_field('f2', 'utf8')],
+ type_ids=[5, 7]),
+ DenseUnionField('dense', [get_field('f1', 'int16'),
+ get_field('f2', 'binary')],
+ type_ids=[10, 20]),
+ SparseUnionField('sparse', [get_field('f1', 'float32', nullable=False),
+ get_field('f2', 'bool')],
+ type_ids=[5, 7], nullable=False),
+ DenseUnionField('dense', [get_field('f1', 'uint8', nullable=False),
+ get_field('f2', 'uint16'),
+ NullField('f3')],
+ type_ids=[42, 43, 44], nullable=False),
+ ]
+
+ batch_sizes = [0, 11]
+ return _generate_file("union", fields, batch_sizes)
+
+
+def generate_dictionary_case():
+ dict0 = Dictionary(0, StringField('dictionary1'), size=10, name='DICT0')
+ dict1 = Dictionary(1, StringField('dictionary1'), size=5, name='DICT1')
+ dict2 = Dictionary(2, get_field('dictionary2', 'int64'),
+ size=50, name='DICT2')
+
+ fields = [
+ DictionaryField('dict0', get_field('', 'int8'), dict0),
+ DictionaryField('dict1', get_field('', 'int32'), dict1),
+ DictionaryField('dict2', get_field('', 'int16'), dict2)
+ ]
+ batch_sizes = [7, 10]
+ return _generate_file("dictionary", fields, batch_sizes,
+ dictionaries=[dict0, dict1, dict2])
+
+
+def generate_dictionary_unsigned_case():
+ dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
+ dict1 = Dictionary(1, StringField('dictionary1'), size=5, name='DICT1')
+ dict2 = Dictionary(2, StringField('dictionary2'), size=5, name='DICT2')
+
+ # TODO: JavaScript does not support uint64 dictionary indices, so disabled
+ # for now
+
+ # dict3 = Dictionary(3, StringField('dictionary3'), size=5, name='DICT3')
+ fields = [
+ DictionaryField('f0', get_field('', 'uint8'), dict0),
+ DictionaryField('f1', get_field('', 'uint16'), dict1),
+ DictionaryField('f2', get_field('', 'uint32'), dict2),
+ # DictionaryField('f3', get_field('', 'uint64'), dict3)
+ ]
+ batch_sizes = [7, 10]
+ return _generate_file("dictionary_unsigned", fields, batch_sizes,
+ dictionaries=[dict0, dict1, dict2])
+
+
+def generate_nested_dictionary_case():
+ dict0 = Dictionary(0, StringField('str'), size=10, name='DICT0')
+
+ list_of_dict = ListField(
+ 'list',
+ DictionaryField('str_dict', get_field('', 'int8'), dict0))
+ dict1 = Dictionary(1, list_of_dict, size=30, name='DICT1')
+
+ struct_of_dict = StructField('struct', [
+ DictionaryField('str_dict_a', get_field('', 'int8'), dict0),
+ DictionaryField('str_dict_b', get_field('', 'int8'), dict0)
+ ])
+ dict2 = Dictionary(2, struct_of_dict, size=30, name='DICT2')
+
+ fields = [
+ DictionaryField('list_dict', get_field('', 'int8'), dict1),
+ DictionaryField('struct_dict', get_field('', 'int8'), dict2)
+ ]
+
+ batch_sizes = [10, 13]
+ return _generate_file("nested_dictionary", fields, batch_sizes,
+ dictionaries=[dict0, dict1, dict2])
+
+
+def generate_extension_case():
+ dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
+
+ uuid_type = ExtensionType('uuid', 'uuid-serialized',
+ FixedSizeBinaryField('', 16))
+ dict_ext_type = ExtensionType(
+ 'dict-extension', 'dict-extension-serialized',
+ DictionaryField('str_dict', get_field('', 'int8'), dict0))
+
+ fields = [
+ ExtensionField('uuids', uuid_type),
+ ExtensionField('dict_exts', dict_ext_type),
+ ]
+
+ batch_sizes = [0, 13]
+ return _generate_file("extension", fields, batch_sizes,
+ dictionaries=[dict0])
+
+
+def get_generated_json_files(tempdir=None):
+ tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')
+
+ def _temp_path():
+ return
+
+ file_objs = [
+ generate_primitive_case([], name='primitive_no_batches'),
+ generate_primitive_case([17, 20], name='primitive'),
+ generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
+
+ generate_primitive_large_offsets_case([17, 20])
+ .skip_category('C#')
+ .skip_category('Go')
+ .skip_category('JS'),
+
+ generate_null_case([10, 0])
+ .skip_category('C#')
+ .skip_category('JS'), # TODO(ARROW-7900)
+
+ generate_null_trivial_case([0, 0])
+ .skip_category('C#')
+ .skip_category('JS'), # TODO(ARROW-7900)
+
+ generate_decimal128_case()
+ .skip_category('Rust'),
+
+ generate_decimal256_case()
+ .skip_category('Go') # TODO(ARROW-7948): Decimal + Go
+ .skip_category('JS')
+ .skip_category('Rust'),
+
+ generate_datetime_case()
+ .skip_category('C#'),
+
+ generate_interval_case()
+ .skip_category('C#')
+ .skip_category('JS') # TODO(ARROW-5239): Intervals + JS
+ .skip_category('Rust'),
+
+ generate_month_day_nano_interval_case()
+ .skip_category('C#')
+ .skip_category('JS')
+ .skip_category('Rust'),
+
+
+ generate_map_case()
+ .skip_category('C#')
+ .skip_category('Rust'),
+
+ generate_non_canonical_map_case()
+ .skip_category('C#')
+ .skip_category('Java') # TODO(ARROW-8715)
+ .skip_category('JS') # TODO(ARROW-8716)
+ .skip_category('Rust'),
+
+ generate_nested_case()
+ .skip_category('C#'),
+
+ generate_recursive_nested_case()
+ .skip_category('C#'),
+
+ generate_nested_large_offsets_case()
+ .skip_category('C#')
+ .skip_category('Go')
+ .skip_category('JS')
+ .skip_category('Rust'),
+
+ generate_unions_case()
+ .skip_category('C#')
+ .skip_category('Go')
+ .skip_category('JS')
+ .skip_category('Rust'),
+
+ generate_custom_metadata_case()
+ .skip_category('C#')
+ .skip_category('JS'),
+
+ generate_duplicate_fieldnames_case()
+ .skip_category('C#')
+ .skip_category('Go')
+ .skip_category('JS'),
+
+ # TODO(ARROW-3039, ARROW-5267): Dictionaries in GO
+ generate_dictionary_case()
+ .skip_category('C#')
+ .skip_category('Go'),
+
+ generate_dictionary_unsigned_case()
+ .skip_category('C#')
+ .skip_category('Go') # TODO(ARROW-9378)
+ .skip_category('Java'), # TODO(ARROW-9377)
+
+ generate_nested_dictionary_case()
+ .skip_category('C#')
+ .skip_category('Go')
+ .skip_category('Java') # TODO(ARROW-7779)
+ .skip_category('JS')
+ .skip_category('Rust'),
+
+ generate_extension_case()
+ .skip_category('C#')
+ .skip_category('Go') # TODO(ARROW-3039): requires dictionaries
+ .skip_category('JS')
+ .skip_category('Rust'),
+ ]
+
+ generated_paths = []
+ for file_obj in file_objs:
+ out_path = os.path.join(tempdir, 'generated_' +
+ file_obj.name + '.json')
+ file_obj.write(out_path)
+ generated_paths.append(file_obj)
+
+ return generated_paths
diff --git a/src/arrow/dev/archery/archery/integration/runner.py b/src/arrow/dev/archery/archery/integration/runner.py
new file mode 100644
index 000000000..463917b81
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/runner.py
@@ -0,0 +1,429 @@
+# licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import namedtuple
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+import glob
+import gzip
+import itertools
+import os
+import sys
+import tempfile
+import traceback
+
+from .scenario import Scenario
+from .tester_cpp import CPPTester
+from .tester_go import GoTester
+from .tester_rust import RustTester
+from .tester_java import JavaTester
+from .tester_js import JSTester
+from .tester_csharp import CSharpTester
+from .util import (ARROW_ROOT_DEFAULT, guid, SKIP_ARROW, SKIP_FLIGHT,
+ printer)
+from . import datagen
+
+
+Failure = namedtuple('Failure',
+ ('test_case', 'producer', 'consumer', 'exc_info'))
+
+log = printer.print
+
+
+class Outcome:
+ def __init__(self):
+ self.failure = None
+ self.skipped = False
+
+
+class IntegrationRunner(object):
+
+ def __init__(self, json_files, flight_scenarios, testers, tempdir=None,
+ debug=False, stop_on_error=True, gold_dirs=None,
+ serial=False, match=None, **unused_kwargs):
+ self.json_files = json_files
+ self.flight_scenarios = flight_scenarios
+ self.testers = testers
+ self.temp_dir = tempdir or tempfile.mkdtemp()
+ self.debug = debug
+ self.stop_on_error = stop_on_error
+ self.serial = serial
+ self.gold_dirs = gold_dirs
+ self.failures = []
+ self.match = match
+
+ if self.match is not None:
+ print("-- Only running tests with {} in their name"
+ .format(self.match))
+ self.json_files = [json_file for json_file in self.json_files
+ if self.match in json_file.name]
+
+ def run(self):
+ """
+ Run Arrow IPC integration tests for the matrix of enabled
+ implementations.
+ """
+ for producer, consumer in itertools.product(
+ filter(lambda t: t.PRODUCER, self.testers),
+ filter(lambda t: t.CONSUMER, self.testers)):
+ self._compare_implementations(
+ producer, consumer, self._produce_consume,
+ self.json_files)
+ if self.gold_dirs:
+ for gold_dir, consumer in itertools.product(
+ self.gold_dirs,
+ filter(lambda t: t.CONSUMER, self.testers)):
+ log('\n\n\n\n')
+ log('******************************************************')
+ log('Tests against golden files in {}'.format(gold_dir))
+ log('******************************************************')
+
+ def run_gold(producer, consumer, outcome, test_case):
+ self._run_gold(gold_dir, producer, consumer, outcome,
+ test_case)
+ self._compare_implementations(
+ consumer, consumer, run_gold,
+ self._gold_tests(gold_dir))
+
+ def run_flight(self):
+ """
+ Run Arrow Flight integration tests for the matrix of enabled
+ implementations.
+ """
+ servers = filter(lambda t: t.FLIGHT_SERVER, self.testers)
+ clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER),
+ self.testers)
+ for server, client in itertools.product(servers, clients):
+ self._compare_flight_implementations(server, client)
+
+ def _gold_tests(self, gold_dir):
+ prefix = os.path.basename(os.path.normpath(gold_dir))
+ SUFFIX = ".json.gz"
+ golds = [jf for jf in os.listdir(gold_dir) if jf.endswith(SUFFIX)]
+ for json_path in golds:
+ name = json_path[json_path.index('_')+1: -len(SUFFIX)]
+ base_name = prefix + "_" + name + ".gold.json"
+ out_path = os.path.join(self.temp_dir, base_name)
+ with gzip.open(os.path.join(gold_dir, json_path)) as i:
+ with open(out_path, "wb") as out:
+ out.write(i.read())
+
+ try:
+ skip = next(f for f in self.json_files
+ if f.name == name).skip
+ except StopIteration:
+ skip = set()
+ if name == 'union' and prefix == '0.17.1':
+ skip.add("Java")
+ if prefix == '1.0.0-bigendian' or prefix == '1.0.0-littleendian':
+ skip.add("C#")
+ skip.add("Go")
+ skip.add("Java")
+ skip.add("JS")
+ skip.add("Rust")
+ if prefix == '2.0.0-compression':
+ skip.add("C#")
+ skip.add("JS")
+ skip.add("Rust")
+
+ # See https://github.com/apache/arrow/pull/9822 for how to
+ # disable specific compression type tests.
+
+ if prefix == '4.0.0-shareddict':
+ skip.add("C#")
+ skip.add("Go")
+
+ yield datagen.File(name, None, None, skip=skip, path=out_path)
+
+ def _run_test_cases(self, producer, consumer, case_runner,
+ test_cases):
+ def case_wrapper(test_case):
+ with printer.cork():
+ return case_runner(test_case)
+
+ if self.failures and self.stop_on_error:
+ return
+
+ if self.serial:
+ for outcome in map(case_wrapper, test_cases):
+ if outcome.failure is not None:
+ self.failures.append(outcome.failure)
+ if self.stop_on_error:
+ break
+
+ else:
+ with ThreadPoolExecutor() as executor:
+ for outcome in executor.map(case_wrapper, test_cases):
+ if outcome.failure is not None:
+ self.failures.append(outcome.failure)
+ if self.stop_on_error:
+ break
+
+ def _compare_implementations(
+ self, producer, consumer, run_binaries, test_cases):
+ """
+ Compare Arrow IPC for two implementations (one producer, one consumer).
+ """
+ log('##########################################################')
+ log('IPC: {0} producing, {1} consuming'
+ .format(producer.name, consumer.name))
+ log('##########################################################')
+
+ case_runner = partial(self._run_ipc_test_case,
+ producer, consumer, run_binaries)
+ self._run_test_cases(producer, consumer, case_runner, test_cases)
+
+ def _run_ipc_test_case(self, producer, consumer, run_binaries, test_case):
+ """
+ Run one IPC test case.
+ """
+ outcome = Outcome()
+
+ json_path = test_case.path
+ log('==========================================================')
+ log('Testing file {0}'.format(json_path))
+ log('==========================================================')
+
+ if producer.name in test_case.skip:
+ log('-- Skipping test because producer {0} does '
+ 'not support'.format(producer.name))
+ outcome.skipped = True
+
+ elif consumer.name in test_case.skip:
+ log('-- Skipping test because consumer {0} does '
+ 'not support'.format(consumer.name))
+ outcome.skipped = True
+
+ elif SKIP_ARROW in test_case.skip:
+ log('-- Skipping test')
+ outcome.skipped = True
+
+ else:
+ try:
+ run_binaries(producer, consumer, outcome, test_case)
+ except Exception:
+ traceback.print_exc(file=printer.stdout)
+ outcome.failure = Failure(test_case, producer, consumer,
+ sys.exc_info())
+
+ return outcome
+
+ def _produce_consume(self, producer, consumer, outcome, test_case):
+ # Make the random access file
+ json_path = test_case.path
+ file_id = guid()[:8]
+ name = os.path.splitext(os.path.basename(json_path))[0]
+
+ producer_file_path = os.path.join(self.temp_dir, file_id + '_' +
+ name + '.json_as_file')
+ producer_stream_path = os.path.join(self.temp_dir, file_id + '_' +
+ name + '.producer_file_as_stream')
+ consumer_file_path = os.path.join(self.temp_dir, file_id + '_' +
+ name + '.consumer_stream_as_file')
+
+ log('-- Creating binary inputs')
+ producer.json_to_file(json_path, producer_file_path)
+
+ # Validate the file
+ log('-- Validating file')
+ consumer.validate(json_path, producer_file_path)
+
+ log('-- Validating stream')
+ producer.file_to_stream(producer_file_path, producer_stream_path)
+ consumer.stream_to_file(producer_stream_path, consumer_file_path)
+ consumer.validate(json_path, consumer_file_path)
+
+ def _run_gold(self, gold_dir, producer, consumer, outcome, test_case):
+ json_path = test_case.path
+
+ # Validate the file
+ log('-- Validating file')
+ producer_file_path = os.path.join(
+ gold_dir, "generated_" + test_case.name + ".arrow_file")
+ consumer.validate(json_path, producer_file_path)
+
+ log('-- Validating stream')
+ consumer_stream_path = os.path.join(
+ gold_dir, "generated_" + test_case.name + ".stream")
+ file_id = guid()[:8]
+ name = os.path.splitext(os.path.basename(json_path))[0]
+
+ consumer_file_path = os.path.join(self.temp_dir, file_id + '_' +
+ name + '.consumer_stream_as_file')
+
+ consumer.stream_to_file(consumer_stream_path, consumer_file_path)
+ consumer.validate(json_path, consumer_file_path)
+
+ def _compare_flight_implementations(self, producer, consumer):
+ log('##########################################################')
+ log('Flight: {0} serving, {1} requesting'
+ .format(producer.name, consumer.name))
+ log('##########################################################')
+
+ case_runner = partial(self._run_flight_test_case, producer, consumer)
+ self._run_test_cases(producer, consumer, case_runner,
+ self.json_files + self.flight_scenarios)
+
+ def _run_flight_test_case(self, producer, consumer, test_case):
+ """
+ Run one Flight test case.
+ """
+ outcome = Outcome()
+
+ log('=' * 58)
+ log('Testing file {0}'.format(test_case.name))
+ log('=' * 58)
+
+ if producer.name in test_case.skip:
+ log('-- Skipping test because producer {0} does '
+ 'not support'.format(producer.name))
+ outcome.skipped = True
+
+ elif consumer.name in test_case.skip:
+ log('-- Skipping test because consumer {0} does '
+ 'not support'.format(consumer.name))
+ outcome.skipped = True
+
+ elif SKIP_FLIGHT in test_case.skip:
+ log('-- Skipping test')
+ outcome.skipped = True
+
+ else:
+ try:
+ if isinstance(test_case, Scenario):
+ server = producer.flight_server(test_case.name)
+ client_args = {'scenario_name': test_case.name}
+ else:
+ server = producer.flight_server()
+ client_args = {'json_path': test_case.path}
+
+ with server as port:
+ # Have the client upload the file, then download and
+ # compare
+ consumer.flight_request(port, **client_args)
+ except Exception:
+ traceback.print_exc(file=printer.stdout)
+ outcome.failure = Failure(test_case, producer, consumer,
+ sys.exc_info())
+
+ return outcome
+
+
+def get_static_json_files():
+ glob_pattern = os.path.join(ARROW_ROOT_DEFAULT,
+ 'integration', 'data', '*.json')
+ return [
+ datagen.File(name=os.path.basename(p), path=p, skip=set(),
+ schema=None, batches=None)
+ for p in glob.glob(glob_pattern)
+ ]
+
+
+def run_all_tests(with_cpp=True, with_java=True, with_js=True,
+ with_csharp=True, with_go=True, with_rust=False,
+ run_flight=False, tempdir=None, **kwargs):
+ tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')
+
+ testers = []
+
+ if with_cpp:
+ testers.append(CPPTester(**kwargs))
+
+ if with_java:
+ testers.append(JavaTester(**kwargs))
+
+ if with_js:
+ testers.append(JSTester(**kwargs))
+
+ if with_csharp:
+ testers.append(CSharpTester(**kwargs))
+
+ if with_go:
+ testers.append(GoTester(**kwargs))
+
+ if with_rust:
+ testers.append(RustTester(**kwargs))
+
+ static_json_files = get_static_json_files()
+ generated_json_files = datagen.get_generated_json_files(tempdir=tempdir)
+ json_files = static_json_files + generated_json_files
+
+ # Additional integration test cases for Arrow Flight.
+ flight_scenarios = [
+ Scenario(
+ "auth:basic_proto",
+ description="Authenticate using the BasicAuth protobuf."),
+ Scenario(
+ "middleware",
+ description="Ensure headers are propagated via middleware.",
+ skip={"Rust"} # TODO(ARROW-10961): tonic upgrade needed
+ ),
+ ]
+
+ runner = IntegrationRunner(json_files, flight_scenarios, testers, **kwargs)
+ runner.run()
+ if run_flight:
+ runner.run_flight()
+
+ fail_count = 0
+ if runner.failures:
+ log("################# FAILURES #################")
+ for test_case, producer, consumer, exc_info in runner.failures:
+ fail_count += 1
+ log("FAILED TEST:", end=" ")
+ log(test_case.name, producer.name, "producing, ",
+ consumer.name, "consuming")
+ if exc_info:
+ traceback.print_exception(*exc_info)
+ log()
+
+ log(fail_count, "failures")
+ if fail_count > 0:
+ sys.exit(1)
+
+
+def write_js_test_json(directory):
+ datagen.generate_map_case().write(
+ os.path.join(directory, 'map.json')
+ )
+ datagen.generate_nested_case().write(
+ os.path.join(directory, 'nested.json')
+ )
+ datagen.generate_decimal128_case().write(
+ os.path.join(directory, 'decimal.json')
+ )
+ datagen.generate_decimal256_case().write(
+ os.path.join(directory, 'decimal256.json')
+ )
+ datagen.generate_datetime_case().write(
+ os.path.join(directory, 'datetime.json')
+ )
+ datagen.generate_dictionary_case().write(
+ os.path.join(directory, 'dictionary.json')
+ )
+ datagen.generate_dictionary_unsigned_case().write(
+ os.path.join(directory, 'dictionary_unsigned.json')
+ )
+ datagen.generate_primitive_case([]).write(
+ os.path.join(directory, 'primitive_no_batches.json')
+ )
+ datagen.generate_primitive_case([7, 10]).write(
+ os.path.join(directory, 'primitive.json')
+ )
+ datagen.generate_primitive_case([0, 0, 0]).write(
+ os.path.join(directory, 'primitive-empty.json')
+ )
diff --git a/src/arrow/dev/archery/archery/integration/scenario.py b/src/arrow/dev/archery/archery/integration/scenario.py
new file mode 100644
index 000000000..1fcbca64e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/scenario.py
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+class Scenario:
+ """
+ An integration test scenario for Arrow Flight.
+
+ Does not correspond to a particular IPC JSON file.
+ """
+
+ def __init__(self, name, description, skip=None):
+ self.name = name
+ self.description = description
+ self.skip = skip or set()
diff --git a/src/arrow/dev/archery/archery/integration/tester.py b/src/arrow/dev/archery/archery/integration/tester.py
new file mode 100644
index 000000000..122e4f2e4
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Base class for language-specific integration test harnesses
+
+import subprocess
+
+from .util import log
+
+
+class Tester(object):
+ PRODUCER = False
+ CONSUMER = False
+ FLIGHT_SERVER = False
+ FLIGHT_CLIENT = False
+
+ def __init__(self, debug=False, **args):
+ self.args = args
+ self.debug = debug
+
+ def run_shell_command(self, cmd):
+ cmd = ' '.join(cmd)
+ if self.debug:
+ log(cmd)
+ subprocess.check_call(cmd, shell=True)
+
+ def json_to_file(self, json_path, arrow_path):
+ raise NotImplementedError
+
+ def stream_to_file(self, stream_path, file_path):
+ raise NotImplementedError
+
+ def file_to_stream(self, file_path, stream_path):
+ raise NotImplementedError
+
+ def validate(self, json_path, arrow_path):
+ raise NotImplementedError
+
+ def flight_server(self, scenario_name=None):
+ """Start the Flight server on a free port.
+
+ This should be a context manager that returns the port as the
+ managed object, and cleans up the server on exit.
+ """
+ raise NotImplementedError
+
+ def flight_request(self, port, json_path=None, scenario_name=None):
+ raise NotImplementedError
diff --git a/src/arrow/dev/archery/archery/integration/tester_cpp.py b/src/arrow/dev/archery/archery/integration/tester_cpp.py
new file mode 100644
index 000000000..d35c9550e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_cpp.py
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import os
+import subprocess
+
+from .tester import Tester
+from .util import run_cmd, ARROW_ROOT_DEFAULT, log
+
+
+class CPPTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+ FLIGHT_SERVER = True
+ FLIGHT_CLIENT = True
+
+ EXE_PATH = os.environ.get(
+ 'ARROW_CPP_EXE_PATH',
+ os.path.join(ARROW_ROOT_DEFAULT, 'cpp/build/debug'))
+
+ CPP_INTEGRATION_EXE = os.path.join(EXE_PATH, 'arrow-json-integration-test')
+ STREAM_TO_FILE = os.path.join(EXE_PATH, 'arrow-stream-to-file')
+ FILE_TO_STREAM = os.path.join(EXE_PATH, 'arrow-file-to-stream')
+
+ FLIGHT_SERVER_CMD = [
+ os.path.join(EXE_PATH, 'flight-test-integration-server')]
+ FLIGHT_CLIENT_CMD = [
+ os.path.join(EXE_PATH, 'flight-test-integration-client'),
+ "-host", "localhost"]
+
+ name = 'C++'
+
+ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
+ cmd = [self.CPP_INTEGRATION_EXE, '--integration']
+
+ if arrow_path is not None:
+ cmd.append('--arrow=' + arrow_path)
+
+ if json_path is not None:
+ cmd.append('--json=' + json_path)
+
+ cmd.append('--mode=' + command)
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'VALIDATE')
+
+ def json_to_file(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'JSON_TO_ARROW')
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path]
+ self.run_shell_command(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path]
+ self.run_shell_command(cmd)
+
+ @contextlib.contextmanager
+ def flight_server(self, scenario_name=None):
+ cmd = self.FLIGHT_SERVER_CMD + ['-port=0']
+ if scenario_name:
+ cmd = cmd + ["-scenario", scenario_name]
+ if self.debug:
+ log(' '.join(cmd))
+ server = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ try:
+ output = server.stdout.readline().decode()
+ if not output.startswith("Server listening on localhost:"):
+ server.kill()
+ out, err = server.communicate()
+ raise RuntimeError(
+ "Flight-C++ server did not start properly, "
+ "stdout:\n{}\n\nstderr:\n{}\n"
+ .format(output + out.decode(), err.decode()))
+ port = int(output.split(":")[1])
+ yield port
+ finally:
+ server.kill()
+ server.wait(5)
+
+ def flight_request(self, port, json_path=None, scenario_name=None):
+ cmd = self.FLIGHT_CLIENT_CMD + [
+ '-port=' + str(port),
+ ]
+ if json_path:
+ cmd.extend(('-path', json_path))
+ elif scenario_name:
+ cmd.extend(('-scenario', scenario_name))
+ else:
+ raise TypeError("Must provide one of json_path or scenario_name")
+
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
diff --git a/src/arrow/dev/archery/archery/integration/tester_csharp.py b/src/arrow/dev/archery/archery/integration/tester_csharp.py
new file mode 100644
index 000000000..130c49cfe
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_csharp.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .tester import Tester
+from .util import run_cmd, ARROW_ROOT_DEFAULT, log
+
+
+class CSharpTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+
+ EXE_PATH = os.path.join(
+ ARROW_ROOT_DEFAULT,
+ 'csharp/artifacts/Apache.Arrow.IntegrationTest',
+ 'Debug/netcoreapp3.1/Apache.Arrow.IntegrationTest')
+
+ name = 'C#'
+
+ def _run(self, json_path=None, arrow_path=None, command='validate'):
+ cmd = [self.EXE_PATH]
+
+ cmd.extend(['--mode', command])
+
+ if json_path is not None:
+ cmd.extend(['-j', json_path])
+
+ if arrow_path is not None:
+ cmd.extend(['-a', arrow_path])
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(json_path, arrow_path, 'validate')
+
+ def json_to_file(self, json_path, arrow_path):
+ return self._run(json_path, arrow_path, 'json-to-arrow')
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = [self.EXE_PATH]
+ cmd.extend(['--mode', 'stream-to-file', '-a', file_path])
+ cmd.extend(['<', stream_path])
+ self.run_shell_command(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = [self.EXE_PATH]
+ cmd.extend(['--mode', 'file-to-stream'])
+ cmd.extend(['-a', file_path, '>', stream_path])
+ self.run_shell_command(cmd)
diff --git a/src/arrow/dev/archery/archery/integration/tester_go.py b/src/arrow/dev/archery/archery/integration/tester_go.py
new file mode 100644
index 000000000..eeba38fe5
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_go.py
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import os
+import subprocess
+
+from .tester import Tester
+from .util import run_cmd, log
+
+
+class GoTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+ FLIGHT_SERVER = True
+ FLIGHT_CLIENT = True
+
+ # FIXME(sbinet): revisit for Go modules
+ HOME = os.getenv('HOME', '~')
+ GOPATH = os.getenv('GOPATH', os.path.join(HOME, 'go'))
+ GOBIN = os.environ.get('GOBIN', os.path.join(GOPATH, 'bin'))
+
+ GO_INTEGRATION_EXE = os.path.join(GOBIN, 'arrow-json-integration-test')
+ STREAM_TO_FILE = os.path.join(GOBIN, 'arrow-stream-to-file')
+ FILE_TO_STREAM = os.path.join(GOBIN, 'arrow-file-to-stream')
+
+ FLIGHT_SERVER_CMD = [
+ os.path.join(GOBIN, 'arrow-flight-integration-server')]
+ FLIGHT_CLIENT_CMD = [
+ os.path.join(GOBIN, 'arrow-flight-integration-client'),
+ '-host', 'localhost']
+
+ name = 'Go'
+
+ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
+ cmd = [self.GO_INTEGRATION_EXE]
+
+ if arrow_path is not None:
+ cmd.extend(['-arrow', arrow_path])
+
+ if json_path is not None:
+ cmd.extend(['-json', json_path])
+
+ cmd.extend(['-mode', command])
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'VALIDATE')
+
+ def json_to_file(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'JSON_TO_ARROW')
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path]
+ self.run_shell_command(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path]
+ self.run_shell_command(cmd)
+
+ @contextlib.contextmanager
+ def flight_server(self, scenario_name=None):
+ cmd = self.FLIGHT_SERVER_CMD + ['-port=0']
+ if scenario_name:
+ cmd = cmd + ['-scenario', scenario_name]
+ if self.debug:
+ log(' '.join(cmd))
+ server = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ try:
+ output = server.stdout.readline().decode()
+ if not output.startswith("Server listening on localhost:"):
+ server.kill()
+ out, err = server.communicate()
+ raise RuntimeError(
+ "Flight-Go server did not start properly, "
+ "stdout: \n{}\n\nstderr:\n{}\n"
+ .format(output + out.decode(), err.decode())
+ )
+ port = int(output.split(":")[1])
+ yield port
+ finally:
+ server.kill()
+ server.wait(5)
+
+ def flight_request(self, port, json_path=None, scenario_name=None):
+ cmd = self.FLIGHT_CLIENT_CMD + [
+ '-port=' + str(port),
+ ]
+ if json_path:
+ cmd.extend(('-path', json_path))
+ elif scenario_name:
+ cmd.extend(('-scenario', scenario_name))
+ else:
+ raise TypeError("Must provide one of json_path or scenario_name")
+
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
diff --git a/src/arrow/dev/archery/archery/integration/tester_java.py b/src/arrow/dev/archery/archery/integration/tester_java.py
new file mode 100644
index 000000000..f283f6cd2
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_java.py
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import os
+import subprocess
+
+from .tester import Tester
+from .util import run_cmd, ARROW_ROOT_DEFAULT, log
+
+
+def load_version_from_pom():
+ import xml.etree.ElementTree as ET
+ tree = ET.parse(os.path.join(ARROW_ROOT_DEFAULT, 'java', 'pom.xml'))
+ tag_pattern = '{http://maven.apache.org/POM/4.0.0}version'
+ version_tag = list(tree.getroot().findall(tag_pattern))[0]
+ return version_tag.text
+
+
+class JavaTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+ FLIGHT_SERVER = True
+ FLIGHT_CLIENT = True
+
+ JAVA_OPTS = ['-Dio.netty.tryReflectionSetAccessible=true',
+ '-Darrow.struct.conflict.policy=CONFLICT_APPEND']
+
+ _arrow_version = load_version_from_pom()
+ ARROW_TOOLS_JAR = os.environ.get(
+ 'ARROW_JAVA_INTEGRATION_JAR',
+ os.path.join(ARROW_ROOT_DEFAULT,
+ 'java/tools/target/arrow-tools-{}-'
+ 'jar-with-dependencies.jar'.format(_arrow_version)))
+ ARROW_FLIGHT_JAR = os.environ.get(
+ 'ARROW_FLIGHT_JAVA_INTEGRATION_JAR',
+ os.path.join(ARROW_ROOT_DEFAULT,
+ 'java/flight/flight-core/target/flight-core-{}-'
+ 'jar-with-dependencies.jar'.format(_arrow_version)))
+ ARROW_FLIGHT_SERVER = ('org.apache.arrow.flight.example.integration.'
+ 'IntegrationTestServer')
+ ARROW_FLIGHT_CLIENT = ('org.apache.arrow.flight.example.integration.'
+ 'IntegrationTestClient')
+
+ name = 'Java'
+
+ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
+ cmd = ['java'] + self.JAVA_OPTS + \
+ ['-cp', self.ARROW_TOOLS_JAR, 'org.apache.arrow.tools.Integration']
+
+ if arrow_path is not None:
+ cmd.extend(['-a', arrow_path])
+
+ if json_path is not None:
+ cmd.extend(['-j', json_path])
+
+ cmd.extend(['-c', command])
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'VALIDATE')
+
+ def json_to_file(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'JSON_TO_ARROW')
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = ['java'] + self.JAVA_OPTS + \
+ ['-cp', self.ARROW_TOOLS_JAR,
+ 'org.apache.arrow.tools.StreamToFile', stream_path, file_path]
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = ['java'] + self.JAVA_OPTS + \
+ ['-cp', self.ARROW_TOOLS_JAR,
+ 'org.apache.arrow.tools.FileToStream', file_path, stream_path]
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
+
+ def flight_request(self, port, json_path=None, scenario_name=None):
+ cmd = ['java'] + self.JAVA_OPTS + \
+ ['-cp', self.ARROW_FLIGHT_JAR, self.ARROW_FLIGHT_CLIENT,
+ '-port', str(port)]
+
+ if json_path:
+ cmd.extend(('-j', json_path))
+ elif scenario_name:
+ cmd.extend(('-scenario', scenario_name))
+ else:
+ raise TypeError("Must provide one of json_path or scenario_name")
+
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
+
+ @contextlib.contextmanager
+ def flight_server(self, scenario_name=None):
+ cmd = ['java'] + self.JAVA_OPTS + \
+ ['-cp', self.ARROW_FLIGHT_JAR, self.ARROW_FLIGHT_SERVER,
+ '-port', '0']
+ if scenario_name:
+ cmd.extend(('-scenario', scenario_name))
+ if self.debug:
+ log(' '.join(cmd))
+ server = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ try:
+ output = server.stdout.readline().decode()
+ if not output.startswith("Server listening on localhost:"):
+ server.kill()
+ out, err = server.communicate()
+ raise RuntimeError(
+ "Flight-Java server did not start properly, "
+ "stdout:\n{}\n\nstderr:\n{}\n"
+ .format(output + out.decode(), err.decode()))
+ port = int(output.split(":")[1])
+ yield port
+ finally:
+ server.kill()
+ server.wait(5)
diff --git a/src/arrow/dev/archery/archery/integration/tester_js.py b/src/arrow/dev/archery/archery/integration/tester_js.py
new file mode 100644
index 000000000..e24eec0ca
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_js.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .tester import Tester
+from .util import run_cmd, ARROW_ROOT_DEFAULT, log
+
+
+class JSTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+
+ EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, 'js/bin')
+ VALIDATE = os.path.join(EXE_PATH, 'integration.js')
+ JSON_TO_ARROW = os.path.join(EXE_PATH, 'json-to-arrow.js')
+ STREAM_TO_FILE = os.path.join(EXE_PATH, 'stream-to-file.js')
+ FILE_TO_STREAM = os.path.join(EXE_PATH, 'file-to-stream.js')
+
+ name = 'JS'
+
+ def _run(self, exe_cmd, arrow_path=None, json_path=None,
+ command='VALIDATE'):
+ cmd = [exe_cmd]
+
+ if arrow_path is not None:
+ cmd.extend(['-a', arrow_path])
+
+ if json_path is not None:
+ cmd.extend(['-j', json_path])
+
+ cmd.extend(['--mode', command])
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(self.VALIDATE, arrow_path, json_path, 'VALIDATE')
+
+ def json_to_file(self, json_path, arrow_path):
+ cmd = ['node',
+ '--no-warnings', self.JSON_TO_ARROW,
+ '-a', arrow_path,
+ '-j', json_path]
+ self.run_shell_command(cmd)
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = ['node', '--no-warnings', self.STREAM_TO_FILE,
+ '<', stream_path,
+ '>', file_path]
+ self.run_shell_command(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = ['node', '--no-warnings', self.FILE_TO_STREAM,
+ '<', file_path,
+ '>', stream_path]
+ self.run_shell_command(cmd)
diff --git a/src/arrow/dev/archery/archery/integration/tester_rust.py b/src/arrow/dev/archery/archery/integration/tester_rust.py
new file mode 100644
index 000000000..bca80ebae
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/tester_rust.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import os
+import subprocess
+
+from .tester import Tester
+from .util import run_cmd, ARROW_ROOT_DEFAULT, log
+
+
+class RustTester(Tester):
+ PRODUCER = True
+ CONSUMER = True
+ FLIGHT_SERVER = True
+ FLIGHT_CLIENT = True
+
+ EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, 'rust/target/debug')
+
+ RUST_INTEGRATION_EXE = os.path.join(EXE_PATH,
+ 'arrow-json-integration-test')
+ STREAM_TO_FILE = os.path.join(EXE_PATH, 'arrow-stream-to-file')
+ FILE_TO_STREAM = os.path.join(EXE_PATH, 'arrow-file-to-stream')
+
+ FLIGHT_SERVER_CMD = [
+ os.path.join(EXE_PATH, 'flight-test-integration-server')]
+ FLIGHT_CLIENT_CMD = [
+ os.path.join(EXE_PATH, 'flight-test-integration-client'),
+ "--host", "localhost"]
+
+ name = 'Rust'
+
+ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
+ cmd = [self.RUST_INTEGRATION_EXE, '--integration']
+
+ if arrow_path is not None:
+ cmd.append('--arrow=' + arrow_path)
+
+ if json_path is not None:
+ cmd.append('--json=' + json_path)
+
+ cmd.append('--mode=' + command)
+
+ if self.debug:
+ log(' '.join(cmd))
+
+ run_cmd(cmd)
+
+ def validate(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'VALIDATE')
+
+ def json_to_file(self, json_path, arrow_path):
+ return self._run(arrow_path, json_path, 'JSON_TO_ARROW')
+
+ def stream_to_file(self, stream_path, file_path):
+ cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path]
+ self.run_shell_command(cmd)
+
+ def file_to_stream(self, file_path, stream_path):
+ cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path]
+ self.run_shell_command(cmd)
+
+ @contextlib.contextmanager
+ def flight_server(self, scenario_name=None):
+ cmd = self.FLIGHT_SERVER_CMD + ['--port=0']
+ if scenario_name:
+ cmd = cmd + ["--scenario", scenario_name]
+ if self.debug:
+ log(' '.join(cmd))
+ server = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ try:
+ output = server.stdout.readline().decode()
+ if not output.startswith("Server listening on localhost:"):
+ server.kill()
+ out, err = server.communicate()
+ raise RuntimeError(
+ "Flight-Rust server did not start properly, "
+ "stdout:\n{}\n\nstderr:\n{}\n"
+ .format(output + out.decode(), err.decode()))
+ port = int(output.split(":")[1])
+ yield port
+ finally:
+ server.kill()
+ server.wait(5)
+
+ def flight_request(self, port, json_path=None, scenario_name=None):
+ cmd = self.FLIGHT_CLIENT_CMD + [
+ '--port=' + str(port),
+ ]
+ if json_path:
+ cmd.extend(('--path', json_path))
+ elif scenario_name:
+ cmd.extend(('--scenario', scenario_name))
+ else:
+ raise TypeError("Must provide one of json_path or scenario_name")
+
+ if self.debug:
+ log(' '.join(cmd))
+ run_cmd(cmd)
diff --git a/src/arrow/dev/archery/archery/integration/util.py b/src/arrow/dev/archery/archery/integration/util.py
new file mode 100644
index 000000000..a4c4982ec
--- /dev/null
+++ b/src/arrow/dev/archery/archery/integration/util.py
@@ -0,0 +1,166 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import io
+import os
+import random
+import socket
+import subprocess
+import sys
+import threading
+import uuid
+
+import numpy as np
+
+
+def guid():
+ return uuid.uuid4().hex
+
+
+# SKIP categories
+SKIP_ARROW = 'arrow'
+SKIP_FLIGHT = 'flight'
+
+ARROW_ROOT_DEFAULT = os.environ.get(
+ 'ARROW_ROOT',
+ os.path.abspath(__file__).rsplit("/", 5)[0]
+)
+
+
+class _Printer:
+ """
+ A print()-providing object that can override the stream output on
+ a per-thread basis.
+ """
+
+ def __init__(self):
+ self._tls = threading.local()
+
+ def _get_stdout(self):
+ try:
+ return self._tls.stdout
+ except AttributeError:
+ self._tls.stdout = sys.stdout
+ self._tls.corked = False
+ return self._tls.stdout
+
+ def print(self, *args, **kwargs):
+ """
+ A variant of print() that writes to a thread-local stream.
+ """
+ print(*args, file=self._get_stdout(), **kwargs)
+
+ @property
+ def stdout(self):
+ """
+ A thread-local stdout wrapper that may be temporarily buffered
+ using `cork()`.
+ """
+ return self._get_stdout()
+
+ @contextlib.contextmanager
+ def cork(self):
+ """
+ Temporarily buffer this thread's stream and write out its contents
+ at the end of the context manager. Useful to avoid interleaved
+ output when multiple threads output progress information.
+ """
+ outer_stdout = self._get_stdout()
+ assert not self._tls.corked, "reentrant call"
+ inner_stdout = self._tls.stdout = io.StringIO()
+ self._tls.corked = True
+ try:
+ yield
+ finally:
+ self._tls.stdout = outer_stdout
+ self._tls.corked = False
+ outer_stdout.write(inner_stdout.getvalue())
+ outer_stdout.flush()
+
+
+printer = _Printer()
+log = printer.print
+
+
+_RAND_CHARS = np.array(list("abcdefghijklmnop123456Ârrôwµ£°€矢"), dtype="U")
+
+
+def random_utf8(nchars):
+ """
+ Generate one random UTF8 string.
+ """
+ return ''.join(np.random.choice(_RAND_CHARS, nchars))
+
+
+def random_bytes(nbytes):
+ """
+ Generate one random binary string.
+ """
+ # NOTE getrandbits(0) fails
+ if nbytes > 0:
+ return random.getrandbits(nbytes * 8).to_bytes(nbytes,
+ byteorder='little')
+ else:
+ return b""
+
+
+def tobytes(o):
+ if isinstance(o, str):
+ return o.encode('utf8')
+ return o
+
+
+def frombytes(o):
+ if isinstance(o, bytes):
+ return o.decode('utf8')
+ return o
+
+
+def run_cmd(cmd):
+ if isinstance(cmd, str):
+ cmd = cmd.split(' ')
+
+ try:
+ output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as e:
+ # this avoids hiding the stdout / stderr of failed processes
+ sio = io.StringIO()
+ print('Command failed:', " ".join(cmd), file=sio)
+ print('With output:', file=sio)
+ print('--------------', file=sio)
+ print(frombytes(e.output), file=sio)
+ print('--------------', file=sio)
+ raise RuntimeError(sio.getvalue())
+
+ return frombytes(output)
+
+
+# Adapted from CPython
+def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM):
+ """Returns an unused port that should be suitable for binding. This is
+ achieved by creating a temporary socket with the same family and type as
+ the 'sock' parameter (default is AF_INET, SOCK_STREAM), and binding it to
+ the specified host address (defaults to 0.0.0.0) with the port set to 0,
+ eliciting an unused ephemeral port from the OS. The temporary socket is
+ then closed and deleted, and the ephemeral port is returned.
+ """
+ with socket.socket(family, socktype) as tempsock:
+ tempsock.bind(('', 0))
+ port = tempsock.getsockname()[1]
+ del tempsock
+ return port
diff --git a/src/arrow/dev/archery/archery/lang/__init__.py b/src/arrow/dev/archery/archery/lang/__init__.py
new file mode 100644
index 000000000..13a83393a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/lang/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/src/arrow/dev/archery/archery/lang/cpp.py b/src/arrow/dev/archery/archery/lang/cpp.py
new file mode 100644
index 000000000..c2b1ca680
--- /dev/null
+++ b/src/arrow/dev/archery/archery/lang/cpp.py
@@ -0,0 +1,296 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from ..utils.cmake import CMakeDefinition
+
+
+def truthifier(value):
+ return "ON" if value else "OFF"
+
+
+def or_else(value, default):
+ return value if value else default
+
+
+def coalesce(value, fallback):
+ return fallback if value is None else value
+
+
+LLVM_VERSION = 7
+
+
+class CppConfiguration:
+ def __init__(self,
+
+ # toolchain
+ cc=None, cxx=None, cxx_flags=None,
+ build_type=None, warn_level=None,
+ cpp_package_prefix=None, install_prefix=None, use_conda=None,
+ build_static=False, build_shared=True, build_unity=True,
+ # tests & examples
+ with_tests=None, with_benchmarks=None, with_examples=None,
+ with_integration=None,
+ # static checks
+ use_asan=None, use_tsan=None, use_ubsan=None,
+ with_fuzzing=None,
+ # Components
+ with_compute=None, with_csv=None, with_cuda=None,
+ with_dataset=None, with_filesystem=None, with_flight=None,
+ with_gandiva=None, with_hdfs=None, with_hiveserver2=None,
+ with_ipc=True, with_json=None, with_jni=None,
+ with_mimalloc=None,
+ with_parquet=None, with_plasma=None, with_python=True,
+ with_r=None, with_s3=None,
+ # Compressions
+ with_brotli=None, with_bz2=None, with_lz4=None,
+ with_snappy=None, with_zlib=None, with_zstd=None,
+ # extras
+ with_lint_only=False,
+ use_gold_linker=True,
+ simd_level="SSE4_2",
+ cmake_extras=None):
+ self._cc = cc
+ self._cxx = cxx
+ self.cxx_flags = cxx_flags
+
+ self._build_type = build_type
+ self.warn_level = warn_level
+ self._install_prefix = install_prefix
+ self._package_prefix = cpp_package_prefix
+ self._use_conda = use_conda
+ self.build_static = build_static
+ self.build_shared = build_shared
+ self.build_unity = build_unity
+
+ self.with_tests = with_tests
+ self.with_benchmarks = with_benchmarks
+ self.with_examples = with_examples
+ self.with_integration = with_integration
+
+ self.use_asan = use_asan
+ self.use_tsan = use_tsan
+ self.use_ubsan = use_ubsan
+ self.with_fuzzing = with_fuzzing
+
+ self.with_compute = with_compute
+ self.with_csv = with_csv
+ self.with_cuda = with_cuda
+ self.with_dataset = with_dataset
+ self.with_filesystem = with_filesystem
+ self.with_flight = with_flight
+ self.with_gandiva = with_gandiva
+ self.with_hdfs = with_hdfs
+ self.with_hiveserver2 = with_hiveserver2
+ self.with_ipc = with_ipc
+ self.with_json = with_json
+ self.with_jni = with_jni
+ self.with_mimalloc = with_mimalloc
+ self.with_parquet = with_parquet
+ self.with_plasma = with_plasma
+ self.with_python = with_python
+ self.with_r = with_r
+ self.with_s3 = with_s3
+
+ self.with_brotli = with_brotli
+ self.with_bz2 = with_bz2
+ self.with_lz4 = with_lz4
+ self.with_snappy = with_snappy
+ self.with_zlib = with_zlib
+ self.with_zstd = with_zstd
+
+ self.with_lint_only = with_lint_only
+ self.use_gold_linker = use_gold_linker
+ self.simd_level = simd_level
+
+ self.cmake_extras = cmake_extras
+
+ # Fixup required dependencies by providing sane defaults if the caller
+ # didn't specify the option.
+ if self.with_r:
+ self.with_csv = coalesce(with_csv, True)
+ self.with_dataset = coalesce(with_dataset, True)
+ self.with_filesystem = coalesce(with_filesystem, True)
+ self.with_ipc = coalesce(with_ipc, True)
+ self.with_json = coalesce(with_json, True)
+ self.with_parquet = coalesce(with_parquet, True)
+
+ if self.with_python:
+ self.with_zlib = coalesce(with_zlib, True)
+ self.with_lz4 = coalesce(with_lz4, True)
+
+ if self.with_dataset:
+ self.with_filesystem = coalesce(with_filesystem, True)
+ self.with_parquet = coalesce(with_parquet, True)
+
+ if self.with_parquet:
+ self.with_snappy = coalesce(with_snappy, True)
+
+ @property
+ def build_type(self):
+ if self._build_type:
+ return self._build_type
+
+ if self.with_fuzzing:
+ return "relwithdebinfo"
+
+ return "release"
+
+ @property
+ def cc(self):
+ if self._cc:
+ return self._cc
+
+ if self.with_fuzzing:
+ return "clang-{}".format(LLVM_VERSION)
+
+ return None
+
+ @property
+ def cxx(self):
+ if self._cxx:
+ return self._cxx
+
+ if self.with_fuzzing:
+ return "clang++-{}".format(LLVM_VERSION)
+
+ return None
+
+ def _gen_defs(self):
+ if self.cxx_flags:
+ yield ("ARROW_CXXFLAGS", self.cxx_flags)
+
+ yield ("CMAKE_EXPORT_COMPILE_COMMANDS", truthifier(True))
+ yield ("CMAKE_BUILD_TYPE", self.build_type)
+
+ if not self.with_lint_only:
+ yield ("BUILD_WARNING_LEVEL",
+ or_else(self.warn_level, "production"))
+
+ # if not ctx.quiet:
+ # yield ("ARROW_VERBOSE_THIRDPARTY_BUILD", "ON")
+
+ maybe_prefix = self.install_prefix
+ if maybe_prefix:
+ yield ("CMAKE_INSTALL_PREFIX", maybe_prefix)
+
+ if self._package_prefix is not None:
+ yield ("ARROW_DEPENDENCY_SOURCE", "SYSTEM")
+ yield ("ARROW_PACKAGE_PREFIX", self._package_prefix)
+
+ yield ("ARROW_BUILD_STATIC", truthifier(self.build_static))
+ yield ("ARROW_BUILD_SHARED", truthifier(self.build_shared))
+ yield ("CMAKE_UNITY_BUILD", truthifier(self.build_unity))
+
+ # Tests and benchmarks
+ yield ("ARROW_BUILD_TESTS", truthifier(self.with_tests))
+ yield ("ARROW_BUILD_BENCHMARKS", truthifier(self.with_benchmarks))
+ yield ("ARROW_BUILD_EXAMPLES", truthifier(self.with_examples))
+ yield ("ARROW_BUILD_INTEGRATION", truthifier(self.with_integration))
+
+ # Static checks
+ yield ("ARROW_USE_ASAN", truthifier(self.use_asan))
+ yield ("ARROW_USE_TSAN", truthifier(self.use_tsan))
+ yield ("ARROW_USE_UBSAN", truthifier(self.use_ubsan))
+ yield ("ARROW_FUZZING", truthifier(self.with_fuzzing))
+
+ # Components
+ yield ("ARROW_COMPUTE", truthifier(self.with_compute))
+ yield ("ARROW_CSV", truthifier(self.with_csv))
+ yield ("ARROW_CUDA", truthifier(self.with_cuda))
+ yield ("ARROW_DATASET", truthifier(self.with_dataset))
+ yield ("ARROW_FILESYSTEM", truthifier(self.with_filesystem))
+ yield ("ARROW_FLIGHT", truthifier(self.with_flight))
+ yield ("ARROW_GANDIVA", truthifier(self.with_gandiva))
+ yield ("ARROW_PARQUET", truthifier(self.with_parquet))
+ yield ("ARROW_HDFS", truthifier(self.with_hdfs))
+ yield ("ARROW_HIVESERVER2", truthifier(self.with_hiveserver2))
+ yield ("ARROW_IPC", truthifier(self.with_ipc))
+ yield ("ARROW_JSON", truthifier(self.with_json))
+ yield ("ARROW_JNI", truthifier(self.with_jni))
+ yield ("ARROW_MIMALLOC", truthifier(self.with_mimalloc))
+ yield ("ARROW_PLASMA", truthifier(self.with_plasma))
+ yield ("ARROW_PYTHON", truthifier(self.with_python))
+ yield ("ARROW_S3", truthifier(self.with_s3))
+
+ # Compressions
+ yield ("ARROW_WITH_BROTLI", truthifier(self.with_brotli))
+ yield ("ARROW_WITH_BZ2", truthifier(self.with_bz2))
+ yield ("ARROW_WITH_LZ4", truthifier(self.with_lz4))
+ yield ("ARROW_WITH_SNAPPY", truthifier(self.with_snappy))
+ yield ("ARROW_WITH_ZLIB", truthifier(self.with_zlib))
+ yield ("ARROW_WITH_ZSTD", truthifier(self.with_zstd))
+
+ yield ("ARROW_LINT_ONLY", truthifier(self.with_lint_only))
+
+ # Some configurations don't like gnu gold linker.
+ broken_with_gold_ld = [self.with_fuzzing, self.with_gandiva]
+ if self.use_gold_linker and not any(broken_with_gold_ld):
+ yield ("ARROW_USE_LD_GOLD", truthifier(self.use_gold_linker))
+ yield ("ARROW_SIMD_LEVEL", or_else(self.simd_level, "SSE4_2"))
+
+ # Detect custom conda toolchain
+ if self.use_conda:
+ for d, v in [('CMAKE_AR', 'AR'), ('CMAKE_RANLIB', 'RANLIB')]:
+ v = os.environ.get(v)
+ if v:
+ yield (d, v)
+
+ @property
+ def install_prefix(self):
+ if self._install_prefix:
+ return self._install_prefix
+
+ if self.use_conda:
+ return os.environ.get("CONDA_PREFIX")
+
+ return None
+
+ @property
+ def use_conda(self):
+ # If the user didn't specify a preference, guess via environment
+ if self._use_conda is None:
+ return os.environ.get("CONDA_PREFIX") is not None
+
+ return self._use_conda
+
+ @property
+ def definitions(self):
+ extras = list(self.cmake_extras) if self.cmake_extras else []
+ definitions = ["-D{}={}".format(d[0], d[1]) for d in self._gen_defs()]
+ return definitions + extras
+
+ @property
+ def environment(self):
+ env = os.environ.copy()
+
+ if self.cc:
+ env["CC"] = self.cc
+
+ if self.cxx:
+ env["CXX"] = self.cxx
+
+ return env
+
+
+class CppCMakeDefinition(CMakeDefinition):
+ def __init__(self, source, conf, **kwargs):
+ self.configuration = conf
+ super().__init__(source, **kwargs,
+ definitions=conf.definitions, env=conf.environment,
+ build_type=conf.build_type)
diff --git a/src/arrow/dev/archery/archery/lang/java.py b/src/arrow/dev/archery/archery/lang/java.py
new file mode 100644
index 000000000..bc169adf6
--- /dev/null
+++ b/src/arrow/dev/archery/archery/lang/java.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from ..utils.command import Command, CommandStackMixin, default_bin
+from ..utils.maven import MavenDefinition
+
+
+class Java(Command):
+ def __init__(self, java_bin=None):
+ self.bin = default_bin(java_bin, "java")
+
+
+class Jar(CommandStackMixin, Java):
+ def __init__(self, jar, *args, **kwargs):
+ self.jar = jar
+ self.argv = ("-jar", jar)
+ Java.__init__(self, *args, **kwargs)
+
+
+class JavaConfiguration:
+ def __init__(self,
+
+ # toolchain
+ java_home=None, java_options=None,
+ # build & benchmark
+ build_extras=None, benchmark_extras=None):
+ self.java_home = java_home
+ self.java_options = java_options
+
+ self.build_extras = list(build_extras) if build_extras else []
+ self.benchmark_extras = list(
+ benchmark_extras) if benchmark_extras else []
+
+ @property
+ def build_definitions(self):
+ return self.build_extras
+
+ @property
+ def benchmark_definitions(self):
+ return self.benchmark_extras
+
+ @property
+ def environment(self):
+ env = os.environ.copy()
+
+ if self.java_home:
+ env["JAVA_HOME"] = self.java_home
+
+ if self.java_options:
+ env["JAVA_OPTIONS"] = self.java_options
+
+ return env
+
+
+class JavaMavenDefinition(MavenDefinition):
+ def __init__(self, source, conf, **kwargs):
+ self.configuration = conf
+ super().__init__(source, **kwargs,
+ build_definitions=conf.build_definitions,
+ benchmark_definitions=conf.benchmark_definitions,
+ env=conf.environment)
diff --git a/src/arrow/dev/archery/archery/lang/python.py b/src/arrow/dev/archery/archery/lang/python.py
new file mode 100644
index 000000000..c6ebbe650
--- /dev/null
+++ b/src/arrow/dev/archery/archery/lang/python.py
@@ -0,0 +1,223 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import inspect
+import tokenize
+from contextlib import contextmanager
+
+try:
+ from numpydoc.validate import Docstring, validate
+except ImportError:
+ have_numpydoc = False
+else:
+ have_numpydoc = True
+
+from ..utils.logger import logger
+from ..utils.command import Command, capture_stdout, default_bin
+
+
+class Flake8(Command):
+ def __init__(self, flake8_bin=None):
+ self.bin = default_bin(flake8_bin, "flake8")
+
+
+class Autopep8(Command):
+ def __init__(self, autopep8_bin=None):
+ self.bin = default_bin(autopep8_bin, "autopep8")
+
+ @capture_stdout()
+ def run_captured(self, *args, **kwargs):
+ return self.run(*args, **kwargs)
+
+
+def _tokenize_signature(s):
+ lines = s.encode('ascii').splitlines()
+ generator = iter(lines).__next__
+ return tokenize.tokenize(generator)
+
+
+def _convert_typehint(tokens):
+ names = []
+ opening_bracket_reached = False
+ for token in tokens:
+ # omit the tokens before the opening bracket
+ if not opening_bracket_reached:
+ if token.string == '(':
+ opening_bracket_reached = True
+ else:
+ continue
+
+ if token.type == 1: # type 1 means NAME token
+ names.append(token)
+ else:
+ if len(names) == 1:
+ yield (names[0].type, names[0].string)
+ elif len(names) == 2:
+ # two "NAME" tokens follow each other which means a cython
+ # typehint like `bool argument`, so remove the typehint
+ # note that we could convert it to python typehints, but hints
+ # are not supported by _signature_fromstr
+ yield (names[1].type, names[1].string)
+ elif len(names) > 2:
+ raise ValueError('More than two NAME tokens follow each other')
+ names = []
+ yield (token.type, token.string)
+
+
+def inspect_signature(obj):
+ """
+ Custom signature inspection primarily for cython generated callables.
+
+ Cython puts the signatures to the first line of the docstrings, which we
+ can reuse to parse the python signature from, but some gymnastics are
+ required, like removing the cython typehints.
+
+ It converts the cython signature:
+ array(obj, type=None, mask=None, size=None, from_pandas=None,
+ bool safe=True, MemoryPool memory_pool=None)
+ To:
+ <Signature (obj, type=None, mask=None, size=None, from_pandas=None,
+ safe=True, memory_pool=None)>
+ """
+ cython_signature = obj.__doc__.splitlines()[0]
+ cython_tokens = _tokenize_signature(cython_signature)
+ python_tokens = _convert_typehint(cython_tokens)
+ python_signature = tokenize.untokenize(python_tokens)
+ return inspect._signature_fromstr(inspect.Signature, obj, python_signature)
+
+
+class NumpyDoc:
+
+ def __init__(self, symbols=None):
+ if not have_numpydoc:
+ raise RuntimeError(
+ 'Numpydoc is not available, install the development version '
+ 'with command: pip install numpydoc==1.1.0'
+ )
+ self.symbols = set(symbols or {'pyarrow'})
+
+ def traverse(self, fn, obj, from_package):
+ """Apply a function on publicly exposed API components.
+
+ Recursively iterates over the members of the passed object. It omits
+ any '_' prefixed and thirdparty (non pyarrow) symbols.
+
+ Parameters
+ ----------
+ obj : Any
+ from_package : string, default 'pyarrow'
+ Predicate to only consider objects from this package.
+ """
+ todo = [obj]
+ seen = set()
+
+ while todo:
+ obj = todo.pop()
+ if obj in seen:
+ continue
+ else:
+ seen.add(obj)
+
+ fn(obj)
+
+ for name in dir(obj):
+ if name.startswith('_'):
+ continue
+
+ member = getattr(obj, name)
+ module = getattr(member, '__module__', None)
+ if not (module and module.startswith(from_package)):
+ continue
+
+ todo.append(member)
+
+ @contextmanager
+ def _apply_patches(self):
+ """
+ Patch Docstring class to bypass loading already loaded python objects.
+ """
+ orig_load_obj = Docstring._load_obj
+ orig_signature = inspect.signature
+
+ @staticmethod
+ def _load_obj(obj):
+ # By default it expects a qualname and import the object, but we
+ # have already loaded object after the API traversal.
+ if isinstance(obj, str):
+ return orig_load_obj(obj)
+ else:
+ return obj
+
+ def signature(obj):
+ # inspect.signature tries to parse __text_signature__ if other
+ # properties like __signature__ doesn't exists, but cython
+ # doesn't set that property despite that embedsignature cython
+ # directive is set. The only way to inspect a cython compiled
+ # callable's signature to parse it from __doc__ while
+ # embedsignature directive is set during the build phase.
+ # So path inspect.signature function to attempt to parse the first
+ # line of callable.__doc__ as a signature.
+ try:
+ return orig_signature(obj)
+ except Exception as orig_error:
+ try:
+ return inspect_signature(obj)
+ except Exception:
+ raise orig_error
+
+ try:
+ Docstring._load_obj = _load_obj
+ inspect.signature = signature
+ yield
+ finally:
+ Docstring._load_obj = orig_load_obj
+ inspect.signature = orig_signature
+
+ def validate(self, from_package='', allow_rules=None,
+ disallow_rules=None):
+ results = []
+
+ def callback(obj):
+ try:
+ result = validate(obj)
+ except OSError as e:
+ symbol = f"{obj.__module__}.{obj.__name__}"
+ logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
+ return
+
+ errors = []
+ for errcode, errmsg in result.get('errors', []):
+ if allow_rules and errcode not in allow_rules:
+ continue
+ if disallow_rules and errcode in disallow_rules:
+ continue
+ errors.append((errcode, errmsg))
+
+ if len(errors):
+ result['errors'] = errors
+ results.append((obj, result))
+
+ with self._apply_patches():
+ for symbol in self.symbols:
+ try:
+ obj = Docstring._load_obj(symbol)
+ except (ImportError, AttributeError):
+ print('{} is not available for import'.format(symbol))
+ else:
+ self.traverse(callback, obj, from_package=from_package)
+
+ return results
diff --git a/src/arrow/dev/archery/archery/linking.py b/src/arrow/dev/archery/archery/linking.py
new file mode 100644
index 000000000..c2e6f1772
--- /dev/null
+++ b/src/arrow/dev/archery/archery/linking.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import platform
+import subprocess
+
+from .utils.command import Command
+
+
+_ldd = Command("ldd")
+_otool = Command("otool")
+
+
+class DependencyError(Exception):
+ pass
+
+
+class DynamicLibrary:
+
+ def __init__(self, path):
+ self.path = path
+
+ def list_dependencies(self):
+ """
+ List the full name of the library dependencies.
+ """
+ system = platform.system()
+ if system == "Linux":
+ result = _ldd.run(self.path, stdout=subprocess.PIPE)
+ lines = result.stdout.splitlines()
+ return [ll.split(None, 1)[0].decode() for ll in lines]
+ elif system == "Darwin":
+ result = _otool.run("-L", self.path, stdout=subprocess.PIPE)
+ lines = result.stdout.splitlines()
+ return [dl.split(None, 1)[0].decode() for dl in lines]
+ else:
+ raise ValueError(f"{platform} is not supported")
+
+ def list_dependency_names(self):
+ """
+ List the truncated names of the dynamic library dependencies.
+ """
+ names = []
+ for dependency in self.list_dependencies():
+ *_, library = dependency.rsplit("/", 1)
+ name, *_ = library.split(".", 1)
+ names.append(name)
+ return names
+
+
+def check_dynamic_library_dependencies(path, allowed, disallowed):
+ dylib = DynamicLibrary(path)
+ for dep in dylib.list_dependency_names():
+ if allowed and dep not in allowed:
+ raise DependencyError(
+ f"Unexpected shared dependency found in {dylib.path}: `{dep}`"
+ )
+ if disallowed and dep in disallowed:
+ raise DependencyError(
+ f"Disallowed shared dependency found in {dylib.path}: `{dep}`"
+ )
diff --git a/src/arrow/dev/archery/archery/release.py b/src/arrow/dev/archery/archery/release.py
new file mode 100644
index 000000000..6baeabc9d
--- /dev/null
+++ b/src/arrow/dev/archery/archery/release.py
@@ -0,0 +1,535 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import defaultdict
+import functools
+import os
+import re
+import pathlib
+import shelve
+import warnings
+
+from git import Repo
+from jira import JIRA
+from semver import VersionInfo as SemVer
+
+from .utils.source import ArrowSources
+from .utils.report import JinjaReport
+
+
+def cached_property(fn):
+ return property(functools.lru_cache(maxsize=1)(fn))
+
+
+class Version(SemVer):
+
+ __slots__ = ('released', 'release_date')
+
+ def __init__(self, released=False, release_date=None, **kwargs):
+ super().__init__(**kwargs)
+ self.released = released
+ self.release_date = release_date
+
+ @classmethod
+ def parse(cls, version, **kwargs):
+ return cls(**SemVer.parse(version).to_dict(), **kwargs)
+
+ @classmethod
+ def from_jira(cls, jira_version):
+ return cls.parse(
+ jira_version.name,
+ released=jira_version.released,
+ release_date=getattr(jira_version, 'releaseDate', None)
+ )
+
+
+class Issue:
+
+ def __init__(self, key, type, summary):
+ self.key = key
+ self.type = type
+ self.summary = summary
+
+ @classmethod
+ def from_jira(cls, jira_issue):
+ return cls(
+ key=jira_issue.key,
+ type=jira_issue.fields.issuetype.name,
+ summary=jira_issue.fields.summary
+ )
+
+ @property
+ def project(self):
+ return self.key.split('-')[0]
+
+ @property
+ def number(self):
+ return int(self.key.split('-')[1])
+
+
+class Jira(JIRA):
+
+ def __init__(self, user=None, password=None,
+ url='https://issues.apache.org/jira'):
+ user = user or os.environ.get('APACHE_JIRA_USER')
+ password = password or os.environ.get('APACHE_JIRA_PASSWORD')
+ super().__init__(url, basic_auth=(user, password))
+
+ def project_version(self, version_string, project='ARROW'):
+ # query version from jira to populated with additional metadata
+ versions = {str(v): v for v in self.project_versions(project)}
+ return versions[version_string]
+
+ def project_versions(self, project):
+ versions = []
+ for v in super().project_versions(project):
+ try:
+ versions.append(Version.from_jira(v))
+ except ValueError:
+ # ignore invalid semantic versions like JS-0.4.0
+ continue
+ return sorted(versions, reverse=True)
+
+ def issue(self, key):
+ return Issue.from_jira(super().issue(key))
+
+ def project_issues(self, version, project='ARROW'):
+ query = "project={} AND fixVersion={}".format(project, version)
+ issues = super().search_issues(query, maxResults=False)
+ return list(map(Issue.from_jira, issues))
+
+
+class CachedJira:
+
+ def __init__(self, cache_path, jira=None):
+ self.jira = jira or Jira()
+ self.cache_path = cache_path
+
+ def __getattr__(self, name):
+ attr = getattr(self.jira, name)
+ return self._cached(name, attr) if callable(attr) else attr
+
+ def _cached(self, name, method):
+ def wrapper(*args, **kwargs):
+ key = str((name, args, kwargs))
+ with shelve.open(self.cache_path) as cache:
+ try:
+ result = cache[key]
+ except KeyError:
+ cache[key] = result = method(*args, **kwargs)
+ return result
+ return wrapper
+
+
+_TITLE_REGEX = re.compile(
+ r"(?P<issue>(?P<project>(ARROW|PARQUET))\-\d+)?\s*:?\s*"
+ r"(?P<components>\[.*\])?\s*(?P<summary>.*)"
+)
+_COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]")
+
+
+class CommitTitle:
+
+ def __init__(self, summary, project=None, issue=None, components=None):
+ self.project = project
+ self.issue = issue
+ self.components = components or []
+ self.summary = summary
+
+ def __str__(self):
+ out = ""
+ if self.issue:
+ out += "{}: ".format(self.issue)
+ if self.components:
+ for component in self.components:
+ out += "[{}]".format(component)
+ out += " "
+ out += self.summary
+ return out
+
+ def __eq__(self, other):
+ return (
+ self.summary == other.summary and
+ self.project == other.project and
+ self.issue == other.issue and
+ self.components == other.components
+ )
+
+ def __hash__(self):
+ return hash(
+ (self.summary, self.project, self.issue, tuple(self.components))
+ )
+
+ @classmethod
+ def parse(cls, headline):
+ matches = _TITLE_REGEX.match(headline)
+ if matches is None:
+ warnings.warn(
+ "Unable to parse commit message `{}`".format(headline)
+ )
+ return CommitTitle(headline)
+
+ values = matches.groupdict()
+ components = values.get('components') or ''
+ components = _COMPONENT_REGEX.findall(components)
+
+ return CommitTitle(
+ values['summary'],
+ project=values.get('project'),
+ issue=values.get('issue'),
+ components=components
+ )
+
+
+class Commit:
+
+ def __init__(self, wrapped):
+ self._title = CommitTitle.parse(wrapped.summary)
+ self._wrapped = wrapped
+
+ def __getattr__(self, attr):
+ if hasattr(self._title, attr):
+ return getattr(self._title, attr)
+ else:
+ return getattr(self._wrapped, attr)
+
+ def __repr__(self):
+ template = '<Commit sha={!r} issue={!r} components={!r} summary={!r}>'
+ return template.format(self.hexsha, self.issue, self.components,
+ self.summary)
+
+ @property
+ def url(self):
+ return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha)
+
+ @property
+ def title(self):
+ return self._title
+
+
+class ReleaseCuration(JinjaReport):
+ templates = {
+ 'console': 'release_curation.txt.j2'
+ }
+ fields = [
+ 'release',
+ 'within',
+ 'outside',
+ 'nojira',
+ 'parquet',
+ 'nopatch'
+ ]
+
+
+class JiraChangelog(JinjaReport):
+ templates = {
+ 'markdown': 'release_changelog.md.j2',
+ 'html': 'release_changelog.html.j2'
+ }
+ fields = [
+ 'release',
+ 'categories'
+ ]
+
+
+class Release:
+
+ def __init__(self):
+ raise TypeError("Do not initialize Release class directly, use "
+ "Release.from_jira(version) instead.")
+
+ def __repr__(self):
+ if self.version.released:
+ status = "released_at={!r}".format(self.version.release_date)
+ else:
+ status = "pending"
+ return "<{} {!r} {}>".format(self.__class__.__name__,
+ str(self.version), status)
+
+ @staticmethod
+ def from_jira(version, jira=None, repo=None):
+ if jira is None:
+ jira = Jira()
+ elif isinstance(jira, str):
+ jira = Jira(jira)
+ elif not isinstance(jira, (Jira, CachedJira)):
+ raise TypeError("`jira` argument must be a server url or a valid "
+ "Jira instance")
+
+ if repo is None:
+ arrow = ArrowSources.find()
+ repo = Repo(arrow.path)
+ elif isinstance(repo, (str, pathlib.Path)):
+ repo = Repo(repo)
+ elif not isinstance(repo, Repo):
+ raise TypeError("`repo` argument must be a path or a valid Repo "
+ "instance")
+
+ if isinstance(version, str):
+ version = jira.project_version(version, project='ARROW')
+ elif not isinstance(version, Version):
+ raise TypeError(version)
+
+ # decide the type of the release based on the version number
+ if version.patch == 0:
+ if version.minor == 0:
+ klass = MajorRelease
+ elif version.major == 0:
+ # handle minor releases before 1.0 as major releases
+ klass = MajorRelease
+ else:
+ klass = MinorRelease
+ else:
+ klass = PatchRelease
+
+ # prevent instantiating release object directly
+ obj = klass.__new__(klass)
+ obj.version = version
+ obj.jira = jira
+ obj.repo = repo
+
+ return obj
+
+ @property
+ def is_released(self):
+ return self.version.released
+
+ @property
+ def tag(self):
+ return "apache-arrow-{}".format(str(self.version))
+
+ @property
+ def branch(self):
+ raise NotImplementedError()
+
+ @property
+ def siblings(self):
+ """
+ Releases to consider when calculating previous and next releases.
+ """
+ raise NotImplementedError()
+
+ @cached_property
+ def previous(self):
+ # select all non-patch releases
+ position = self.siblings.index(self.version)
+ try:
+ previous = self.siblings[position + 1]
+ except IndexError:
+ # first release doesn't have a previous one
+ return None
+ else:
+ return Release.from_jira(previous, jira=self.jira, repo=self.repo)
+
+ @cached_property
+ def next(self):
+ # select all non-patch releases
+ position = self.siblings.index(self.version)
+ if position <= 0:
+ raise ValueError("There is no upcoming release set in JIRA after "
+ "version {}".format(self.version))
+ upcoming = self.siblings[position - 1]
+ return Release.from_jira(upcoming, jira=self.jira, repo=self.repo)
+
+ @cached_property
+ def issues(self):
+ issues = self.jira.project_issues(self.version, project='ARROW')
+ return {i.key: i for i in issues}
+
+ @cached_property
+ def commits(self):
+ """
+ All commits applied between two versions.
+ """
+ if self.previous is None:
+ # first release
+ lower = ''
+ else:
+ lower = self.repo.tags[self.previous.tag]
+
+ if self.version.released:
+ upper = self.repo.tags[self.tag]
+ else:
+ try:
+ upper = self.repo.branches[self.branch]
+ except IndexError:
+ warnings.warn("Release branch `{}` doesn't exist."
+ .format(self.branch))
+ return []
+
+ commit_range = "{}..{}".format(lower, upper)
+ return list(map(Commit, self.repo.iter_commits(commit_range)))
+
+ def curate(self):
+ # handle commits with parquet issue key specially and query them from
+ # jira and add it to the issues
+ release_issues = self.issues
+
+ within, outside, nojira, parquet = [], [], [], []
+ for c in self.commits:
+ if c.issue is None:
+ nojira.append(c)
+ elif c.issue in release_issues:
+ within.append((release_issues[c.issue], c))
+ elif c.project == 'PARQUET':
+ parquet.append((self.jira.issue(c.issue), c))
+ else:
+ outside.append((self.jira.issue(c.issue), c))
+
+ # remaining jira tickets
+ within_keys = {i.key for i, c in within}
+ nopatch = [issue for key, issue in release_issues.items()
+ if key not in within_keys]
+
+ return ReleaseCuration(release=self, within=within, outside=outside,
+ nojira=nojira, parquet=parquet, nopatch=nopatch)
+
+ def changelog(self):
+ release_issues = []
+
+ # get organized report for the release
+ curation = self.curate()
+
+ # jira tickets having patches in the release
+ for issue, _ in curation.within:
+ release_issues.append(issue)
+
+ # jira tickets without patches
+ for issue in curation.nopatch:
+ release_issues.append(issue)
+
+ # parquet patches in the release
+ for issue, _ in curation.parquet:
+ release_issues.append(issue)
+
+ # organize issues into categories
+ issue_types = {
+ 'Bug': 'Bug Fixes',
+ 'Improvement': 'New Features and Improvements',
+ 'New Feature': 'New Features and Improvements',
+ 'Sub-task': 'New Features and Improvements',
+ 'Task': 'New Features and Improvements',
+ 'Test': 'Bug Fixes',
+ 'Wish': 'New Features and Improvements',
+ }
+ categories = defaultdict(list)
+ for issue in release_issues:
+ categories[issue_types[issue.type]].append(issue)
+
+ # sort issues by the issue key in ascending order
+ for name, issues in categories.items():
+ issues.sort(key=lambda issue: (issue.project, issue.number))
+
+ return JiraChangelog(release=self, categories=categories)
+
+
+class MaintenanceMixin:
+ """
+ Utility methods for cherry-picking commits from the main branch.
+ """
+
+ def commits_to_pick(self, exclude_already_applied=True):
+ # collect commits applied on the main branch since the root of the
+ # maintenance branch (the previous major release)
+ if self.version.major == 0:
+ # treat minor releases as major releases preceeding 1.0.0 release
+ commit_range = "apache-arrow-0.{}.0..master".format(
+ self.version.minor
+ )
+ else:
+ commit_range = "apache-arrow-{}.0.0..master".format(
+ self.version.major
+ )
+
+ # keeping the original order of the commits helps to minimize the merge
+ # conflicts during cherry-picks
+ commits = map(Commit, self.repo.iter_commits(commit_range))
+
+ # exclude patches that have been already applied to the maintenance
+ # branch, we cannot identify patches based on sha because it changes
+ # after the cherry pick so use commit title instead
+ if exclude_already_applied:
+ already_applied = {c.title for c in self.commits}
+ else:
+ already_applied = set()
+
+ # iterate over the commits applied on the main branch and filter out
+ # the ones that are included in the jira release
+ patches_to_pick = [c for c in commits if
+ c.issue in self.issues and
+ c.title not in already_applied]
+
+ return reversed(patches_to_pick)
+
+ def cherry_pick_commits(self, recreate_branch=True):
+ if recreate_branch:
+ # delete, create and checkout the maintenance branch based off of
+ # the previous tag
+ if self.branch in self.repo.branches:
+ self.repo.git.branch('-D', self.branch)
+ self.repo.git.checkout(self.previous.tag, b=self.branch)
+ else:
+ # just checkout the already existing maintenance branch
+ self.repo.git.checkout(self.branch)
+
+ # cherry pick the commits based on the jira tickets
+ for commit in self.commits_to_pick():
+ self.repo.git.cherry_pick(commit.hexsha)
+
+
+class MajorRelease(Release):
+
+ @property
+ def branch(self):
+ return "master"
+
+ @cached_property
+ def siblings(self):
+ """
+ Filter only the major releases.
+ """
+ # handle minor releases before 1.0 as major releases
+ return [v for v in self.jira.project_versions('ARROW')
+ if v.patch == 0 and (v.major == 0 or v.minor == 0)]
+
+
+class MinorRelease(Release, MaintenanceMixin):
+
+ @property
+ def branch(self):
+ return "maint-{}.x.x".format(self.version.major)
+
+ @cached_property
+ def siblings(self):
+ """
+ Filter the major and minor releases.
+ """
+ return [v for v in self.jira.project_versions('ARROW') if v.patch == 0]
+
+
+class PatchRelease(Release, MaintenanceMixin):
+
+ @property
+ def branch(self):
+ return "maint-{}.{}.x".format(self.version.major, self.version.minor)
+
+ @cached_property
+ def siblings(self):
+ """
+ No filtering, consider all releases.
+ """
+ return self.jira.project_versions('ARROW')
diff --git a/src/arrow/dev/archery/archery/templates/release_changelog.md.j2 b/src/arrow/dev/archery/archery/templates/release_changelog.md.j2
new file mode 100644
index 000000000..c0406ddf4
--- /dev/null
+++ b/src/arrow/dev/archery/archery/templates/release_changelog.md.j2
@@ -0,0 +1,29 @@
+{#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#}
+# Apache Arrow {{ release.version }} ({{ release.version.release_date or today() }})
+
+{% for category, issues in categories.items() -%}
+
+## {{ category }}
+
+{% for issue in issues -%}
+* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) - {{ issue.summary | md }}
+{% endfor %}
+
+{% endfor %}
diff --git a/src/arrow/dev/archery/archery/templates/release_curation.txt.j2 b/src/arrow/dev/archery/archery/templates/release_curation.txt.j2
new file mode 100644
index 000000000..a5d11e9d4
--- /dev/null
+++ b/src/arrow/dev/archery/archery/templates/release_curation.txt.j2
@@ -0,0 +1,41 @@
+{#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#}
+Total number of JIRA tickets assigned to version {{ release.version }}: {{ release.issues|length }}
+
+Total number of applied patches since version {{ release.previous.version }}: {{ release.commits|length }}
+
+Patches with assigned issue in version {{ release.version }}:
+{% for issue, commit in within -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches with assigned issue outside of version {{ release.version }}:
+{% for issue, commit in outside -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+Patches in version {{ release.version }} without a linked issue:
+{% for commit in nojira -%}
+ - {{ commit.url }} {{ commit.title }}
+{% endfor %}
+
+JIRA issues in version {{ release.version }} without a linked patch:
+{% for issue in nopatch -%}
+ - https://issues.apache.org/jira/browse/{{ issue.key }}
+{% endfor %}
diff --git a/src/arrow/dev/archery/archery/testing.py b/src/arrow/dev/archery/archery/testing.py
new file mode 100644
index 000000000..471a54d4c
--- /dev/null
+++ b/src/arrow/dev/archery/archery/testing.py
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from contextlib import contextmanager
+import os
+from unittest import mock
+import re
+
+
+class DotDict(dict):
+
+ def __getattr__(self, key):
+ try:
+ item = self[key]
+ except KeyError:
+ raise AttributeError(key)
+ if isinstance(item, dict):
+ return DotDict(item)
+ else:
+ return item
+
+
+class PartialEnv(dict):
+
+ def __eq__(self, other):
+ return self.items() <= other.items()
+
+
+_mock_call_type = type(mock.call())
+
+
+def _ensure_mock_call_object(obj, **kwargs):
+ if isinstance(obj, _mock_call_type):
+ return obj
+ elif isinstance(obj, str):
+ cmd = re.split(r"\s+", obj)
+ return mock.call(cmd, **kwargs)
+ elif isinstance(obj, list):
+ return mock.call(obj, **kwargs)
+ else:
+ raise TypeError(obj)
+
+
+class SuccessfulSubprocessResult:
+
+ def check_returncode(self):
+ return
+
+
+@contextmanager
+def assert_subprocess_calls(expected_commands_or_calls, **kwargs):
+ calls = [
+ _ensure_mock_call_object(obj, **kwargs)
+ for obj in expected_commands_or_calls
+ ]
+ with mock.patch('subprocess.run', autospec=True) as run:
+ run.return_value = SuccessfulSubprocessResult()
+ yield run
+ run.assert_has_calls(calls)
+
+
+@contextmanager
+def override_env(mapping):
+ original = os.environ
+ try:
+ os.environ = dict(os.environ, **mapping)
+ yield os.environ
+ finally:
+ os.environ = original
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl
new file mode 100644
index 000000000..5854eb75c
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl
@@ -0,0 +1,6 @@
+{"benchmark": "RegressionSumKernel/32768/10", "change": 0.0046756468886368545, "regression": false, "baseline": 13265442258.099466, "contender": 13327466781.91994, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"}
+{"benchmark": "RegressionSumKernel/32768/1", "change": 0.0025108399115900733, "regression": false, "baseline": 15181891659.539782, "contender": 15220010959.05199, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"}
+
+{"benchmark": "RegressionSumKernel/32768/50", "change": 0.00346735806287155, "regression": false, "baseline": 11471825667.817123, "contender": 11511602595.042286, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"}
+
+{"benchmark": "RegressionSumKernel/32768/0", "change": 0.010140954727954987, "regression": false, "baseline": 18316987019.994465, "contender": 18502738756.116768, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"}
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl
new file mode 100644
index 000000000..1e25810d7
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl
@@ -0,0 +1,4 @@
+{"benchmark":"RegressionSumKernel/32768/50","change":-0.001550846227215492,"regression":false,"baseline":19241207435.428757,"contender":19211367281.47045,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"}
+{"benchmark":"RegressionSumKernel/32768/1","change":0.0020681767923465765,"regression":true,"baseline":24823170673.777943,"contender":24771831968.277977,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"}
+{"benchmark":"RegressionSumKernel/32768/10","change":0.0033323376378746905,"regression":false,"baseline":21902707565.968014,"contender":21975694782.76145,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"}
+{"benchmark":"RegressionSumKernel/32768/0","change":-0.004918126090954414,"regression":true,"baseline":27685006611.446762,"contender":27821164964.790764,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"}
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json
new file mode 100644
index 000000000..d591105f0
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json
@@ -0,0 +1,212 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "MEMBER",
+ "body": "@ursabot build",
+ "created_at": "2019-04-05T11:55:43Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726",
+ "id": 480248726,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==",
+ "updated_at": "2019-04-05T11:55:43Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "MEMBER",
+ "body": "",
+ "closed_at": null,
+ "comments": 3,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "created_at": "2019-04-05T11:22:15Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "pull_request": {
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Unittests for GithubHook",
+ "updated_at": "2019-04-05T11:55:43Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json
new file mode 100644
index 000000000..5a8f3461c
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json
@@ -0,0 +1,212 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "NONE",
+ "body": "Unknown command \"\"",
+ "created_at": "2019-04-05T11:35:47Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815",
+ "id": 480243815,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==",
+ "updated_at": "2019-04-05T11:35:47Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815",
+ "user": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4",
+ "events_url": "https://api.github.com/users/ursabot/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursabot/followers",
+ "following_url": "https://api.github.com/users/ursabot/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursabot",
+ "id": 49275095,
+ "login": "someone",
+ "node_id": "MDQ6VXNlcjQ5Mjc1MDk1",
+ "organizations_url": "https://api.github.com/users/ursabot/orgs",
+ "received_events_url": "https://api.github.com/users/ursabot/received_events",
+ "repos_url": "https://api.github.com/users/ursabot/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/ursabot"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "NONE",
+ "body": "",
+ "closed_at": null,
+ "comments": 2,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "created_at": "2019-04-05T11:22:15Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "pull_request": {
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Unittests for GithubHook",
+ "updated_at": "2019-04-05T11:35:47Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "someone",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4",
+ "events_url": "https://api.github.com/users/ursabot/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursabot/followers",
+ "following_url": "https://api.github.com/users/ursabot/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursabot",
+ "id": 49275095,
+ "login": "someone",
+ "node_id": "MDQ6VXNlcjQ5Mjc1MDk1",
+ "organizations_url": "https://api.github.com/users/ursabot/orgs",
+ "received_events_url": "https://api.github.com/users/ursabot/received_events",
+ "repos_url": "https://api.github.com/users/ursabot/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/ursabot"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json
new file mode 100644
index 000000000..bfb7210df
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json
@@ -0,0 +1,212 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "NONE",
+ "body": "Unknown command \"\"",
+ "created_at": "2019-04-05T11:35:47Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815",
+ "id": 480243815,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==",
+ "updated_at": "2019-04-05T11:35:47Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815",
+ "user": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4",
+ "events_url": "https://api.github.com/users/ursabot/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursabot/followers",
+ "following_url": "https://api.github.com/users/ursabot/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursabot",
+ "id": 49275095,
+ "login": "ursabot",
+ "node_id": "MDQ6VXNlcjQ5Mjc1MDk1",
+ "organizations_url": "https://api.github.com/users/ursabot/orgs",
+ "received_events_url": "https://api.github.com/users/ursabot/received_events",
+ "repos_url": "https://api.github.com/users/ursabot/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/ursabot"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "MEMBER",
+ "body": "",
+ "closed_at": null,
+ "comments": 2,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "created_at": "2019-04-05T11:22:15Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "pull_request": {
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Unittests for GithubHook",
+ "updated_at": "2019-04-05T11:35:47Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4",
+ "events_url": "https://api.github.com/users/ursabot/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursabot/followers",
+ "following_url": "https://api.github.com/users/ursabot/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursabot",
+ "id": 49275095,
+ "login": "ursabot",
+ "node_id": "MDQ6VXNlcjQ5Mjc1MDk1",
+ "organizations_url": "https://api.github.com/users/ursabot/orgs",
+ "received_events_url": "https://api.github.com/users/ursabot/received_events",
+ "repos_url": "https://api.github.com/users/ursabot/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/ursabot"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json
new file mode 100644
index 000000000..a3d450078
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json
@@ -0,0 +1,212 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "MEMBER",
+ "body": "bear is no game",
+ "created_at": "2019-04-05T11:26:56Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727",
+ "id": 480241727,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==",
+ "updated_at": "2019-04-05T11:26:56Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "MEMBER",
+ "body": "",
+ "closed_at": null,
+ "comments": 0,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "created_at": "2019-04-05T11:22:15Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "pull_request": {
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Unittests for GithubHook",
+ "updated_at": "2019-04-05T11:26:56Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json
new file mode 100644
index 000000000..c88197c8e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json
@@ -0,0 +1,217 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "MEMBER",
+ "body": "@ursabot ",
+ "body_html": "",
+ "body_text": "",
+ "created_at": "2019-04-05T11:35:46Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811",
+ "id": 480243811,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==",
+ "updated_at": "2019-04-05T11:35:46Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "MEMBER",
+ "body": "",
+ "body_html": "",
+ "body_text": "",
+ "closed_at": null,
+ "closed_by": null,
+ "comments": 1,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "created_at": "2019-04-05T11:22:15Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "pull_request": {
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Unittests for GithubHook",
+ "updated_at": "2019-04-05T11:35:46Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json
new file mode 100644
index 000000000..9e362fc0e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json
@@ -0,0 +1,206 @@
+{
+ "action": "created",
+ "comment": {
+ "author_association": "MEMBER",
+ "body": "@ursabot build",
+ "created_at": "2019-04-05T13:07:57Z",
+ "html_url": "https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480268708",
+ "id": 480268708,
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19",
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI2ODcwOA==",
+ "updated_at": "2019-04-05T13:07:57Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480268708",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "issue": {
+ "assignee": null,
+ "assignees": [],
+ "author_association": "MEMBER",
+ "body": "",
+ "closed_at": null,
+ "comments": 5,
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments",
+ "created_at": "2019-04-02T09:56:41Z",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/issues/19",
+ "id": 428131685,
+ "labels": [],
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}",
+ "locked": false,
+ "milestone": null,
+ "node_id": "MDU6SXNzdWU0MjgxMzE2ODU=",
+ "number": 19,
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "state": "open",
+ "title": "Build ursabot itself via ursabot",
+ "updated_at": "2019-04-05T13:07:57Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19",
+ "user": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+ },
+ "organization": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "description": "Innovation lab for open source data science tools, powered by Apache Arrow",
+ "events_url": "https://api.github.com/orgs/ursa-labs/events",
+ "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks",
+ "id": 46514972,
+ "issues_url": "https://api.github.com/orgs/ursa-labs/issues",
+ "login": "ursa-labs",
+ "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}",
+ "repos_url": "https://api.github.com/orgs/ursa-labs/repos",
+ "url": "https://api.github.com/orgs/ursa-labs"
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T12:01:40Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 898,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json b/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json
new file mode 100644
index 000000000..9cf5c0dda
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json
@@ -0,0 +1,445 @@
+{
+ "action": "opened",
+ "number": 26,
+ "pull_request": {
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26",
+ "id": 267785552,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "number": 26,
+ "state": "open",
+ "locked": false,
+ "title": "Unittests for GithubHook",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "body": "",
+ "created_at": "2019-04-05T11:22:15Z",
+ "updated_at": "2019-04-05T12:01:40Z",
+ "closed_at": null,
+ "merged_at": null,
+ "merge_commit_sha": "cc5dc3606988b3824be54df779ed2028776113cb",
+ "assignee": null,
+ "assignees": [],
+ "requested_reviewers": [],
+ "requested_teams": [],
+ "labels": [],
+ "milestone": null,
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits",
+ "review_comments_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments",
+ "review_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "head": {
+ "label": "ursa-labs:test-hook",
+ "ref": "test-hook",
+ "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "user": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "repo": {
+ "id": 169101701,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "name": "ursabot",
+ "full_name": "ursa-labs/ursabot",
+ "private": false,
+ "owner": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "description": null,
+ "fork": false,
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "created_at": "2019-02-04T15:40:31Z",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "pushed_at": "2019-04-05T12:01:40Z",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "homepage": null,
+ "size": 898,
+ "stargazers_count": 1,
+ "watchers_count": 1,
+ "language": "Jupyter Notebook",
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 0,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 19,
+ "license": null,
+ "forks": 0,
+ "open_issues": 19,
+ "watchers": 1,
+ "default_branch": "master"
+ }
+ },
+ "base": {
+ "label": "ursa-labs:master",
+ "ref": "master",
+ "sha": "a162ad254b589b924db47e057791191b39613fd5",
+ "user": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "repo": {
+ "id": 169101701,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "name": "ursabot",
+ "full_name": "ursa-labs/ursabot",
+ "private": false,
+ "owner": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "description": null,
+ "fork": false,
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "created_at": "2019-02-04T15:40:31Z",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "pushed_at": "2019-04-05T12:01:40Z",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "homepage": null,
+ "size": 898,
+ "stargazers_count": 1,
+ "watchers_count": 1,
+ "language": "Jupyter Notebook",
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 0,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 19,
+ "license": null,
+ "forks": 0,
+ "open_issues": 19,
+ "watchers": 1,
+ "default_branch": "master"
+ }
+ },
+ "_links": {
+ "self": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "html": {
+ "href": "https://github.com/ursa-labs/ursabot/pull/26"
+ },
+ "issue": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26"
+ },
+ "comments": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments"
+ },
+ "review_comments": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments"
+ },
+ "review_comment": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}"
+ },
+ "commits": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits"
+ },
+ "statuses": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d"
+ }
+ },
+ "author_association": "MEMBER",
+ "merged": false,
+ "mergeable": true,
+ "rebaseable": true,
+ "mergeable_state": "unstable",
+ "merged_by": null,
+ "comments": 5,
+ "review_comments": 0,
+ "maintainer_can_modify": false,
+ "commits": 2,
+ "additions": 1124,
+ "deletions": 0,
+ "changed_files": 7
+ },
+ "repository": {
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "archived": false,
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "created_at": "2019-02-04T15:40:31Z",
+ "default_branch": "master",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "description": null,
+ "disabled": false,
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "fork": false,
+ "forks": 0,
+ "forks_count": 0,
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "full_name": "ursa-labs/ursabot",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "has_downloads": true,
+ "has_issues": true,
+ "has_pages": false,
+ "has_projects": true,
+ "has_wiki": true,
+ "homepage": null,
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "id": 169101701,
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "language": "Jupyter Notebook",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "license": null,
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "mirror_url": null,
+ "name": "ursabot",
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "open_issues": 19,
+ "open_issues_count": 19,
+ "owner": {
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/ursa-labs",
+ "id": 46514972,
+ "login": "ursa-labs",
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "type": "Organization",
+ "url": "https://api.github.com/users/ursa-labs"
+ },
+ "private": false,
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "pushed_at": "2019-04-05T11:22:16Z",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "size": 892,
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "stargazers_count": 1,
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "watchers": 1,
+ "watchers_count": 1
+ },
+ "sender": {
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "gravatar_id": "",
+ "html_url": "https://github.com/kszucs",
+ "id": 961747,
+ "login": "kszucs",
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "site_admin": false,
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "type": "User",
+ "url": "https://api.github.com/users/kszucs"
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json
new file mode 100644
index 000000000..1e4939776
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json
@@ -0,0 +1,64 @@
+{
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19",
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/issues/19",
+ "id": 428131685,
+ "node_id": "MDU6SXNzdWU0MjgxMzE2ODU=",
+ "number": 19,
+ "title": "Build ursabot itself via ursabot",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "labels": [],
+ "state": "closed",
+ "locked": false,
+ "assignee": null,
+ "assignees": [],
+ "milestone": null,
+ "comments": 8,
+ "created_at": "2019-04-02T09:56:41Z",
+ "updated_at": "2019-04-05T13:30:49Z",
+ "closed_at": "2019-04-05T13:30:49Z",
+ "author_association": "MEMBER",
+ "body": "",
+ "closed_by": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json
new file mode 100644
index 000000000..44c4d3bed
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json
@@ -0,0 +1,70 @@
+{
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "repository_url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "id": 429706959,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "number": 26,
+ "title": "Unittests for GithubHook + native asyncio syntax",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "labels": [],
+ "state": "closed",
+ "locked": false,
+ "assignee": null,
+ "assignees": [],
+ "milestone": null,
+ "comments": 9,
+ "created_at": "2019-04-05T11:22:15Z",
+ "updated_at": "2019-08-28T00:34:19Z",
+ "closed_at": "2019-04-05T13:54:34Z",
+ "author_association": "MEMBER",
+ "pull_request": {
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch"
+ },
+ "body": "Resolves:\r\n- #26 Unittests for GithubHook + native asyncio syntax\r\n- #27 Use native async/await keywords instead of @inlineCallbacks and yield\r\n",
+ "closed_by": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ }
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json
new file mode 100644
index 000000000..93ee4b13c
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json
@@ -0,0 +1,31 @@
+{
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/479081273",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/21#issuecomment-479081273",
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/21",
+ "id": 480243811,
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ3OTA4MTI3Mw==",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "created_at": "2019-04-02T16:29:46Z",
+ "updated_at": "2019-04-02T16:29:46Z",
+ "author_association": "MEMBER",
+ "body": "@ursabot"
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json
new file mode 100644
index 000000000..f3cd34083
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json
@@ -0,0 +1,31 @@
+{
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726",
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "id": 480248726,
+ "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "created_at": "2019-04-05T11:55:43Z",
+ "updated_at": "2019-04-05T11:55:43Z",
+ "author_association": "MEMBER",
+ "body": "@ursabot build"
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json
new file mode 100644
index 000000000..ffc48943a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json
@@ -0,0 +1,158 @@
+{
+ "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "node_id": "MDY6Q29tbWl0MTY5MTAxNzAxOjI3MDVkYTJiNjE2Yjk4ZmE2MDEwYTI1ODEzYzVhN2EyNzQ1NmY3MWQ=",
+ "commit": {
+ "author": {
+ "name": "Krisztián Szűcs",
+ "email": "szucs.krisztian@gmail.com",
+ "date": "2019-04-05T12:01:31Z"
+ },
+ "committer": {
+ "name": "Krisztián Szűcs",
+ "email": "szucs.krisztian@gmail.com",
+ "date": "2019-04-05T12:01:31Z"
+ },
+ "message": "add recorded event requests",
+ "tree": {
+ "sha": "16a7bb186833a67e9c2d84a58393503b85500ceb",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees/16a7bb186833a67e9c2d84a58393503b85500ceb"
+ },
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits/2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "comment_count": 0,
+ "verification": {
+ "verified": true,
+ "reason": "valid",
+ "signature": "-----BEGIN PGP SIGNATURE-----\n\niQFOBAABCAA4FiEEOOW2r8dr6sA77zHlgjqBKYe1QKUFAlynQ58aHHN6dWNzLmty\naXN6dGlhbkBnbWFpbC5jb20ACgkQgjqBKYe1QKUYKwf6AiXDMaLqNLNSjRY7lIXX\nudioewz0hSb4bgIXBv30nswu9CoOA0+mHCokEVtZhYbXzXDsZ1KJrilSC4j+Ws4q\nkRGA6iEmrne2HcSKNZXzcVnwV9zpwKxlVh2QCTNb1PuOYFBLH0kwE704uWIWMGDN\nbo8cjQPwegePCRguCvPh/5wa5J3uiq5gmJLG6bC/d1XYE+FJVtlnyzqzLMIryGKe\ntIciw+wwkF413Q/YVbZ49vLUeCX9H8PHC4mZYGDWuvjFW1WTfkjK5bAH+oaTVM6h\n350I5ZFloHmMA/QeRge5qFxXoEBMDGiXHHktzYZDXnliFOQNxzqwirA5lQQ6LRSS\naQ==\n=7rqi\n-----END PGP SIGNATURE-----",
+ "payload": "tree 16a7bb186833a67e9c2d84a58393503b85500ceb\nparent 446ae69b9385e8d0f40aa9595f723d34383af2f7\nauthor Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\ncommitter Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\n\nadd recorded event requests\n"
+ }
+ },
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "html_url": "https://github.com/ursa-labs/ursabot/commit/2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d/comments",
+ "author": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "committer": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "parents": [
+ {
+ "sha": "446ae69b9385e8d0f40aa9595f723d34383af2f7",
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/commits/446ae69b9385e8d0f40aa9595f723d34383af2f7",
+ "html_url": "https://github.com/ursa-labs/ursabot/commit/446ae69b9385e8d0f40aa9595f723d34383af2f7"
+ }
+ ],
+ "stats": {
+ "total": 1062,
+ "additions": 1058,
+ "deletions": 4
+ },
+ "files": [
+ {
+ "sha": "dfae6eeaef384ae6180c6302a58b49e39982dc33",
+ "filename": "ursabot/tests/fixtures/issue-comment-build-command.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"I've successfully started builds for this PR\",\n+ \"created_at\": \"2019-04-05T11:55:44Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248730\",\n+ \"id\": 480248730,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODczMA==\",\n+ \"updated_at\": \"2019-04-05T11:55:44Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248730\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 4,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:55:44Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}"
+ },
+ {
+ "sha": "7ef554e333327f0e62aa1fd76b4b17844a39adeb",
+ "filename": "ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Unknown command \\\"\\\"\",\n+ \"created_at\": \"2019-04-05T11:35:47Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\",\n+ \"id\": 480243815,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 2,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}"
+ },
+ {
+ "sha": "a8082dbc91fdfe815b795e49ec10e49000771ef5",
+ "filename": "ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"bear is no game\",\n+ \"created_at\": \"2019-04-05T11:26:56Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\",\n+ \"id\": 480241727,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 0,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "2770e29ba9086394455315e590c0b433d08e437e",
+ "filename": "ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot \",\n+ \"created_at\": \"2019-04-05T11:35:46Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\",\n+ \"id\": 480243811,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 1,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "80ff46510a2f39ae60f7c3a98e5fdaef8e688784",
+ "filename": "ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "status": "added",
+ "additions": 206,
+ "deletions": 0,
+ "changes": 206,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -0,0 +1,206 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Ursabot only listens to pull request comments!\",\n+ \"created_at\": \"2019-04-05T11:53:43Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480248217\",\n+ \"id\": 480248217,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODIxNw==\",\n+ \"updated_at\": \"2019-04-05T11:53:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248217\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 4,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\",\n+ \"created_at\": \"2019-04-02T09:56:41Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19\",\n+ \"id\": 428131685,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDU6SXNzdWU0MjgxMzE2ODU=\",\n+ \"number\": 19,\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Build ursabot itself via ursabot\",\n+ \"updated_at\": \"2019-04-05T11:53:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}"
+ },
+ {
+ "sha": "c738bb0eb54c87ba0f23e97e827d77c2be74d0b6",
+ "filename": "ursabot/tests/test_hooks.py",
+ "status": "modified",
+ "additions": 4,
+ "deletions": 4,
+ "changes": 8,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "patch": "@@ -54,7 +54,7 @@ class TestGithubHook(ChangeHookTestCase):\n await self.request('ping', {})\n assert len(self.hook.master.data.updates.changesAdded) == 0\n \n- @ensure_deferred\n- async def test_issue_comment(self):\n- payload = {}\n- await self.request('issue_comment', payload)\n+ # @ensure_deferred\n+ # async def test_issue_comment(self):\n+ # payload = {}\n+ # await self.request('issue_comment', payload)"
+ }
+ ]
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json
new file mode 100644
index 000000000..b039b3d10
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json
@@ -0,0 +1,170 @@
+[
+ {
+ "sha": "ebfe3f6c5e98723f9751c99ce8ce798f1ba529c5",
+ "filename": ".travis.yml",
+ "status": "modified",
+ "additions": 4,
+ "deletions": 1,
+ "changes": 5,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/.travis.yml",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/.travis.yml",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/.travis.yml?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -4,7 +4,10 @@ services:\n python:\n - 3.6\n script:\n- - pip install \"pytest>=3.9\" flake8 -e .\n+ # --no-binary buildbot is required because buildbot doesn't bundle its tests\n+ # to binary wheels, but ursabot's test suite depends on buildbot's so install\n+ # it from source\n+ - pip install --no-binary buildbot \"pytest>=3.9\" mock flake8 -e .\n \n # run linter\n - flake8 ursabot"
+ },
+ {
+ "sha": "86ad809d3f74c175b92ac58c6c645b0fbf5fa2c5",
+ "filename": "setup.py",
+ "status": "modified",
+ "additions": 6,
+ "deletions": 1,
+ "changes": 7,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/setup.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/setup.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/setup.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -1,8 +1,13 @@\n #!/usr/bin/env python\n \n+import sys\n from setuptools import setup\n \n \n+if sys.version_info < (3, 6):\n+ sys.exit('Python < 3.6 is not supported due to missing asyncio support')\n+\n+\n # TODO(kszucs): add package data, change maintainer\n setup(\n name='ursabot',\n@@ -15,7 +20,7 @@\n setup_requires=['setuptools_scm'],\n install_requires=['click', 'dask', 'docker', 'docker-map', 'toolz',\n 'buildbot', 'treq'],\n- tests_require=['pytest>=3.9'],\n+ tests_require=['pytest>=3.9', 'mock'],\n entry_points='''\n [console_scripts]\n ursabot=ursabot.cli:ursabot"
+ },
+ {
+ "sha": "c884f3f85bba499d77d9ad28bcd0ff5edf80f957",
+ "filename": "ursabot/factories.py",
+ "status": "modified",
+ "additions": 6,
+ "deletions": 2,
+ "changes": 8,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/factories.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/factories.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/factories.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -79,8 +79,12 @@ def prepend_step(self, step):\n repourl='https://github.com/ursa-labs/ursabot',\n mode='full'),\n ShellCommand(command=['ls', '-lah']),\n- ShellCommand(command=['pip', 'install', 'pytest', 'flake8']),\n- ShellCommand(command=['pip', 'install', '-e', '.']),\n+ ShellCommand(command=['pip', 'install', 'pytest', 'flake8', 'mock']),\n+ # --no-binary buildbot is required because buildbot doesn't bundle its\n+ # tests to binary wheels, but ursabot's test suite depends on buildbot's\n+ # so install it from source\n+ ShellCommand(command=['pip', 'install', '--no-binary', 'buildbot',\n+ '-e', '.']),\n ShellCommand(command=['flake8']),\n ShellCommand(command=['pytest', '-v', '-m', 'not docker', 'ursabot']),\n ShellCommand(command=['buildbot', 'checkconfig', '.'])"
+ },
+ {
+ "sha": "0265cfbd9c2882f492469882a7bf513a1c1b5af4",
+ "filename": "ursabot/hooks.py",
+ "status": "modified",
+ "additions": 17,
+ "deletions": 19,
+ "changes": 36,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/hooks.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/hooks.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/hooks.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -1,11 +1,11 @@\n from urllib.parse import urlparse\n \n from twisted.python import log\n-from twisted.internet import defer\n \n from buildbot.www.hooks.github import GitHubEventHandler\n from buildbot.util.httpclientservice import HTTPClientService\n \n+from .utils import ensure_deferred\n \n BOTNAME = 'ursabot'\n \n@@ -22,20 +22,18 @@ def _client(self):\n self.master, self.github_api_endpoint, headers=headers,\n debug=self.debug, verify=self.verify)\n \n- @defer.inlineCallbacks\n- def _get(self, url):\n+ async def _get(self, url):\n url = urlparse(url)\n- client = yield self._client()\n- response = yield client.get(url.path)\n- result = yield response.json()\n+ client = await self._client()\n+ response = await client.get(url.path)\n+ result = await response.json()\n return result\n \n- @defer.inlineCallbacks\n- def _post(self, url, data):\n+ async def _post(self, url, data):\n url = urlparse(url)\n- client = yield self._client()\n- response = yield client.post(url.path, json=data)\n- result = yield response.json()\n+ client = await self._client()\n+ response = await client.post(url.path, json=data)\n+ result = await response.json()\n log.msg(f'POST to {url} with the following result: {result}')\n return result\n \n@@ -46,8 +44,8 @@ def _parse_command(self, message):\n return message.split(mention)[-1].lower().strip()\n return None\n \n- @defer.inlineCallbacks\n- def handle_issue_comment(self, payload, event):\n+ @ensure_deferred\n+ async def handle_issue_comment(self, payload, event):\n issue = payload['issue']\n comments_url = issue['comments_url']\n command = self._parse_command(payload['comment']['body'])\n@@ -64,16 +62,16 @@ def handle_issue_comment(self, payload, event):\n elif command == 'build':\n if 'pull_request' not in issue:\n message = 'Ursabot only listens to pull request comments!'\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return [], 'git'\n else:\n message = f'Unknown command \"{command}\"'\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return [], 'git'\n \n try:\n- pull_request = yield self._get(issue['pull_request']['url'])\n- changes, _ = yield self.handle_pull_request({\n+ pull_request = await self._get(issue['pull_request']['url'])\n+ changes, _ = await self.handle_pull_request({\n 'action': 'synchronize',\n 'sender': payload['sender'],\n 'repository': payload['repository'],\n@@ -82,11 +80,11 @@ def handle_issue_comment(self, payload, event):\n }, event)\n except Exception as e:\n message = \"I've failed to start builds for this PR\"\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n raise e\n else:\n message = \"I've successfully started builds for this PR\"\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return changes, 'git'\n \n # TODO(kszucs):"
+ },
+ {
+ "sha": "1e1ecf2ce47da929dbf1b93632640e7e6ae1cfe0",
+ "filename": "ursabot/steps.py",
+ "status": "modified",
+ "additions": 13,
+ "deletions": 13,
+ "changes": 26,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/steps.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/steps.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/steps.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -1,9 +1,9 @@\n-from twisted.internet import defer\n-\n from buildbot.plugins import steps, util\n from buildbot.process import buildstep\n from buildbot.process.results import SUCCESS\n \n+from .utils import ensure_deferred\n+\n \n class ShellMixin(buildstep.ShellMixin):\n \"\"\"Run command in a login bash shell\n@@ -49,10 +49,10 @@ def __init__(self, **kwargs):\n kwargs = self.setupShellMixin(kwargs)\n super().__init__(**kwargs)\n \n- @defer.inlineCallbacks\n- def run(self):\n- cmd = yield self.makeRemoteShellCommand(command=self.command)\n- yield self.runCommand(cmd)\n+ @ensure_deferred\n+ async def run(self):\n+ cmd = await self.makeRemoteShellCommand(command=self.command)\n+ await self.runCommand(cmd)\n return cmd.results()\n \n \n@@ -71,8 +71,8 @@ class CMake(ShellMixin, steps.CMake):\n \n name = 'CMake'\n \n- @defer.inlineCallbacks\n- def run(self):\n+ @ensure_deferred\n+ async def run(self):\n \"\"\"Create and run CMake command\n \n Copied from the original CMake implementation to handle None values as\n@@ -94,8 +94,8 @@ def run(self):\n if self.options is not None:\n command.extend(self.options)\n \n- cmd = yield self.makeRemoteShellCommand(command=command)\n- yield self.runCommand(cmd)\n+ cmd = await self.makeRemoteShellCommand(command=command)\n+ await self.runCommand(cmd)\n \n return cmd.results()\n \n@@ -117,8 +117,8 @@ def __init__(self, variables, source='WorkerEnvironment', **kwargs):\n self.source = source\n super().__init__(**kwargs)\n \n- @defer.inlineCallbacks\n- def run(self):\n+ @ensure_deferred\n+ async def run(self):\n # on Windows, environment variables are case-insensitive, but we have\n # a case-sensitive dictionary in worker_environ. Fortunately, that\n # dictionary is also folded to uppercase, so we can simply fold the\n@@ -139,7 +139,7 @@ def run(self):\n # TODO(kszucs) try with self.setProperty similarly like in\n # SetProperties\n properties.setProperty(prop, value, self.source, runtime=True)\n- yield self.addCompleteLog('set-prop', f'{prop}: {value}')\n+ await self.addCompleteLog('set-prop', f'{prop}: {value}')\n \n return SUCCESS\n "
+ },
+ {
+ "sha": "6a7d5308be6608f542a810d410f9240157a1340f",
+ "filename": "ursabot/tests/fixtures/issue-comment-build-command.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-build-command.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-build-command.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot build\",\n+ \"created_at\": \"2019-04-05T11:55:43Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726\",\n+ \"id\": 480248726,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==\",\n+ \"updated_at\": \"2019-04-05T11:55:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 3,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:55:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "7ef554e333327f0e62aa1fd76b4b17844a39adeb",
+ "filename": "ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-by-ursabot.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Unknown command \\\"\\\"\",\n+ \"created_at\": \"2019-04-05T11:35:47Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\",\n+ \"id\": 480243815,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 2,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}"
+ },
+ {
+ "sha": "a8082dbc91fdfe815b795e49ec10e49000771ef5",
+ "filename": "ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"bear is no game\",\n+ \"created_at\": \"2019-04-05T11:26:56Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\",\n+ \"id\": 480241727,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 0,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "2770e29ba9086394455315e590c0b433d08e437e",
+ "filename": "ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "status": "added",
+ "additions": 212,
+ "deletions": 0,
+ "changes": 212,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-with-empty-command.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot \",\n+ \"created_at\": \"2019-04-05T11:35:46Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\",\n+ \"id\": 480243811,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 1,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "b7de8d838332944101812ee2a46c08dd0144efe3",
+ "filename": "ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "status": "added",
+ "additions": 206,
+ "deletions": 0,
+ "changes": 206,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-without-pull-request.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,206 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot build\",\n+ \"created_at\": \"2019-04-05T13:07:57Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480268708\",\n+ \"id\": 480268708,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI2ODcwOA==\",\n+ \"updated_at\": \"2019-04-05T13:07:57Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480268708\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 5,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\",\n+ \"created_at\": \"2019-04-02T09:56:41Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19\",\n+ \"id\": 428131685,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDU6SXNzdWU0MjgxMzE2ODU=\",\n+ \"number\": 19,\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Build ursabot itself via ursabot\",\n+ \"updated_at\": \"2019-04-05T13:07:57Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 898,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}"
+ },
+ {
+ "sha": "33e051455e866fb4774a16ae02ad40dcf9e6a7fd",
+ "filename": "ursabot/tests/fixtures/pull-request-26-commit.json",
+ "status": "added",
+ "additions": 158,
+ "deletions": 0,
+ "changes": 158,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26-commit.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26-commit.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/pull-request-26-commit.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,158 @@\n+{\n+ \"sha\": \"2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"node_id\": \"MDY6Q29tbWl0MTY5MTAxNzAxOjI3MDVkYTJiNjE2Yjk4ZmE2MDEwYTI1ODEzYzVhN2EyNzQ1NmY3MWQ=\",\n+ \"commit\": {\n+ \"author\": {\n+ \"name\": \"Krisztián Szűcs\",\n+ \"email\": \"szucs.krisztian@gmail.com\",\n+ \"date\": \"2019-04-05T12:01:31Z\"\n+ },\n+ \"committer\": {\n+ \"name\": \"Krisztián Szűcs\",\n+ \"email\": \"szucs.krisztian@gmail.com\",\n+ \"date\": \"2019-04-05T12:01:31Z\"\n+ },\n+ \"message\": \"add recorded event requests\",\n+ \"tree\": {\n+ \"sha\": \"16a7bb186833a67e9c2d84a58393503b85500ceb\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees/16a7bb186833a67e9c2d84a58393503b85500ceb\"\n+ },\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"comment_count\": 0,\n+ \"verification\": {\n+ \"verified\": true,\n+ \"reason\": \"valid\",\n+ \"signature\": \"-----BEGIN PGP SIGNATURE-----\\n\\niQFOBAABCAA4FiEEOOW2r8dr6sA77zHlgjqBKYe1QKUFAlynQ58aHHN6dWNzLmty\\naXN6dGlhbkBnbWFpbC5jb20ACgkQgjqBKYe1QKUYKwf6AiXDMaLqNLNSjRY7lIXX\\nudioewz0hSb4bgIXBv30nswu9CoOA0+mHCokEVtZhYbXzXDsZ1KJrilSC4j+Ws4q\\nkRGA6iEmrne2HcSKNZXzcVnwV9zpwKxlVh2QCTNb1PuOYFBLH0kwE704uWIWMGDN\\nbo8cjQPwegePCRguCvPh/5wa5J3uiq5gmJLG6bC/d1XYE+FJVtlnyzqzLMIryGKe\\ntIciw+wwkF413Q/YVbZ49vLUeCX9H8PHC4mZYGDWuvjFW1WTfkjK5bAH+oaTVM6h\\n350I5ZFloHmMA/QeRge5qFxXoEBMDGiXHHktzYZDXnliFOQNxzqwirA5lQQ6LRSS\\naQ==\\n=7rqi\\n-----END PGP SIGNATURE-----\",\n+ \"payload\": \"tree 16a7bb186833a67e9c2d84a58393503b85500ceb\\nparent 446ae69b9385e8d0f40aa9595f723d34383af2f7\\nauthor Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\\ncommitter Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\\n\\nadd recorded event requests\\n\"\n+ }\n+ },\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/commit/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d/comments\",\n+ \"author\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"committer\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"parents\": [\n+ {\n+ \"sha\": \"446ae69b9385e8d0f40aa9595f723d34383af2f7\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/446ae69b9385e8d0f40aa9595f723d34383af2f7\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/commit/446ae69b9385e8d0f40aa9595f723d34383af2f7\"\n+ }\n+ ],\n+ \"stats\": {\n+ \"total\": 1062,\n+ \"additions\": 1058,\n+ \"deletions\": 4\n+ },\n+ \"files\": [\n+ {\n+ \"sha\": \"dfae6eeaef384ae6180c6302a58b49e39982dc33\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"I've successfully started builds for this PR\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248730\\\",\\n+ \\\"id\\\": 480248730,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODczMA==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248730\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 4,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"7ef554e333327f0e62aa1fd76b4b17844a39adeb\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"Unknown command \\\\\\\"\\\\\\\"\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\\\",\\n+ \\\"id\\\": 480243815,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 2,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"a8082dbc91fdfe815b795e49ec10e49000771ef5\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"bear is no game\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\\\",\\n+ \\\"id\\\": 480241727,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 0,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"2770e29ba9086394455315e590c0b433d08e437e\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"@ursabot \\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\\\",\\n+ \\\"id\\\": 480243811,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 1,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"80ff46510a2f39ae60f7c3a98e5fdaef8e688784\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"status\": \"added\",\n+ \"additions\": 206,\n+ \"deletions\": 0,\n+ \"changes\": 206,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,206 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"Ursabot only listens to pull request comments!\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480248217\\\",\\n+ \\\"id\\\": 480248217,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODIxNw==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248217\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 4,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-02T09:56:41Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"id\\\": 428131685,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDU6SXNzdWU0MjgxMzE2ODU=\\\",\\n+ \\\"number\\\": 19,\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Build ursabot itself via ursabot\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"c738bb0eb54c87ba0f23e97e827d77c2be74d0b6\",\n+ \"filename\": \"ursabot/tests/test_hooks.py\",\n+ \"status\": \"modified\",\n+ \"additions\": 4,\n+ \"deletions\": 4,\n+ \"changes\": 8,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -54,7 +54,7 @@ class TestGithubHook(ChangeHookTestCase):\\n await self.request('ping', {})\\n assert len(self.hook.master.data.updates.changesAdded) == 0\\n \\n- @ensure_deferred\\n- async def test_issue_comment(self):\\n- payload = {}\\n- await self.request('issue_comment', payload)\\n+ # @ensure_deferred\\n+ # async def test_issue_comment(self):\\n+ # payload = {}\\n+ # await self.request('issue_comment', payload)\"\n+ }\n+ ]\n+}"
+ },
+ {
+ "sha": "ad061d7244b917e6ea3853698dc3bc2a8c9c6857",
+ "filename": "ursabot/tests/fixtures/pull-request-26.json",
+ "status": "added",
+ "additions": 335,
+ "deletions": 0,
+ "changes": 335,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26.json",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26.json",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/pull-request-26.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,335 @@\n+{\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\",\n+ \"id\": 267785552,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"number\": 26,\n+ \"state\": \"open\",\n+ \"locked\": false,\n+ \"title\": \"Unittests for GithubHook\",\n+ \"user\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"body\": \"\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"updated_at\": \"2019-04-05T12:01:40Z\",\n+ \"closed_at\": null,\n+ \"merged_at\": null,\n+ \"merge_commit_sha\": \"cc5dc3606988b3824be54df779ed2028776113cb\",\n+ \"assignee\": null,\n+ \"assignees\": [\n+\n+ ],\n+ \"requested_reviewers\": [\n+\n+ ],\n+ \"requested_teams\": [\n+\n+ ],\n+ \"labels\": [\n+\n+ ],\n+ \"milestone\": null,\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits\",\n+ \"review_comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments\",\n+ \"review_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"head\": {\n+ \"label\": \"ursa-labs:test-hook\",\n+ \"ref\": \"test-hook\",\n+ \"sha\": \"2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"user\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"repo\": {\n+ \"id\": 169101701,\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"name\": \"ursabot\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"private\": false,\n+ \"owner\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"description\": null,\n+ \"fork\": false,\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"homepage\": null,\n+ \"size\": 898,\n+ \"stargazers_count\": 1,\n+ \"watchers_count\": 1,\n+ \"language\": \"Jupyter Notebook\",\n+ \"has_issues\": true,\n+ \"has_projects\": true,\n+ \"has_downloads\": true,\n+ \"has_wiki\": true,\n+ \"has_pages\": false,\n+ \"forks_count\": 0,\n+ \"mirror_url\": null,\n+ \"archived\": false,\n+ \"disabled\": false,\n+ \"open_issues_count\": 19,\n+ \"license\": null,\n+ \"forks\": 0,\n+ \"open_issues\": 19,\n+ \"watchers\": 1,\n+ \"default_branch\": \"master\"\n+ }\n+ },\n+ \"base\": {\n+ \"label\": \"ursa-labs:master\",\n+ \"ref\": \"master\",\n+ \"sha\": \"a162ad254b589b924db47e057791191b39613fd5\",\n+ \"user\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"repo\": {\n+ \"id\": 169101701,\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"name\": \"ursabot\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"private\": false,\n+ \"owner\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"description\": null,\n+ \"fork\": false,\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"homepage\": null,\n+ \"size\": 898,\n+ \"stargazers_count\": 1,\n+ \"watchers_count\": 1,\n+ \"language\": \"Jupyter Notebook\",\n+ \"has_issues\": true,\n+ \"has_projects\": true,\n+ \"has_downloads\": true,\n+ \"has_wiki\": true,\n+ \"has_pages\": false,\n+ \"forks_count\": 0,\n+ \"mirror_url\": null,\n+ \"archived\": false,\n+ \"disabled\": false,\n+ \"open_issues_count\": 19,\n+ \"license\": null,\n+ \"forks\": 0,\n+ \"open_issues\": 19,\n+ \"watchers\": 1,\n+ \"default_branch\": \"master\"\n+ }\n+ },\n+ \"_links\": {\n+ \"self\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"html\": {\n+ \"href\": \"https://github.com/ursa-labs/ursabot/pull/26\"\n+ },\n+ \"issue\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\"\n+ },\n+ \"comments\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\"\n+ },\n+ \"review_comments\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments\"\n+ },\n+ \"review_comment\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}\"\n+ },\n+ \"commits\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits\"\n+ },\n+ \"statuses\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d\"\n+ }\n+ },\n+ \"author_association\": \"MEMBER\",\n+ \"merged\": false,\n+ \"mergeable\": true,\n+ \"rebaseable\": true,\n+ \"mergeable_state\": \"unstable\",\n+ \"merged_by\": null,\n+ \"comments\": 5,\n+ \"review_comments\": 0,\n+ \"maintainer_can_modify\": false,\n+ \"commits\": 2,\n+ \"additions\": 1124,\n+ \"deletions\": 0,\n+ \"changed_files\": 7\n+}"
+ },
+ {
+ "sha": "e87b27d2d7b4956d15f7468488b96cf6a06686f4",
+ "filename": "ursabot/tests/test_hooks.py",
+ "status": "added",
+ "additions": 116,
+ "deletions": 0,
+ "changes": 116,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/test_hooks.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/test_hooks.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,116 @@\n+import json\n+from pathlib import Path\n+from twisted.trial import unittest\n+\n+from buildbot.test.util.misc import TestReactorMixin\n+from buildbot.test.fake.httpclientservice import \\\n+ HTTPClientService as FakeHTTPClientService\n+from buildbot.test.unit.test_www_hooks_github import (\n+ _prepare_request, _prepare_github_change_hook)\n+\n+from ursabot.utils import ensure_deferred\n+from ursabot.hooks import GithubHook\n+\n+\n+class ChangeHookTestCase(unittest.TestCase, TestReactorMixin):\n+\n+ klass = None\n+\n+ @ensure_deferred\n+ async def setUp(self):\n+ self.setUpTestReactor()\n+\n+ assert self.klass is not None\n+ self.hook = _prepare_github_change_hook(self, **{'class': self.klass})\n+ self.master = self.hook.master\n+ self.http = await FakeHTTPClientService.getFakeService(\n+ self.master, self, 'https://api.github.com',\n+ headers={'User-Agent': 'Buildbot'}, debug=False, verify=False)\n+\n+ await self.master.startService()\n+\n+ @ensure_deferred\n+ async def tearDown(self):\n+ await self.master.stopService()\n+\n+ async def trigger(self, event, payload, headers=None, _secret=None):\n+ payload = json.dumps(payload).encode()\n+ request = _prepare_request(event, payload, _secret=_secret,\n+ headers=headers)\n+ await request.test_render(self.hook)\n+ return request\n+\n+ def load_fixture(self, name):\n+ path = Path(__file__).parent / 'fixtures' / f'{name}.json'\n+ with path.open('r') as fp:\n+ return json.load(fp)\n+\n+\n+class TestGithubHook(ChangeHookTestCase):\n+\n+ klass = GithubHook\n+\n+ @ensure_deferred\n+ async def test_ping(self):\n+ await self.trigger('ping', {})\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_not_mentioning_ursabot(self):\n+ payload = self.load_fixture('issue-comment-not-mentioning-ursabot')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_by_ursabot(self):\n+ payload = self.load_fixture('issue-comment-by-ursabot')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_with_empty_command(self):\n+ # responds to the comment\n+ request_json = {'body': 'Unknown command \"\"'}\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/26/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-with-empty-command')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_without_pull_request(self):\n+ # responds to the comment\n+ request_json = {\n+ 'body': 'Ursabot only listens to pull request comments!'\n+ }\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/19/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-without-pull-request')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_build_command(self):\n+ # handle_issue_comment queries the pull request\n+ request_json = self.load_fixture('pull-request-26')\n+ self.http.expect('get', '/repos/ursa-labs/ursabot/pulls/26',\n+ content_json=request_json)\n+ # tigger handle_pull_request which fetches the commit\n+ request_json = self.load_fixture('pull-request-26-commit')\n+ commit = '2705da2b616b98fa6010a25813c5a7a27456f71d'\n+ self.http.expect('get', f'/repos/ursa-labs/ursabot/commits/{commit}',\n+ content_json=request_json)\n+\n+ # then responds to the comment\n+ request_json = {'body': \"I've successfully started builds for this PR\"}\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/26/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-build-command')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 1"
+ },
+ {
+ "sha": "3ff0e88660cf186420e8bc672735e4d446963192",
+ "filename": "ursabot/utils.py",
+ "status": "added",
+ "additions": 10,
+ "deletions": 0,
+ "changes": 10,
+ "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/utils.py",
+ "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/utils.py",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/utils.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0",
+ "patch": "@@ -0,0 +1,10 @@\n+import functools\n+from twisted.internet import defer\n+\n+\n+def ensure_deferred(f):\n+ @functools.wraps(f)\n+ def wrapper(*args, **kwargs):\n+ result = f(*args, **kwargs)\n+ return defer.ensureDeferred(result)\n+ return wrapper"
+ }
+] \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json
new file mode 100644
index 000000000..d295afb39
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json
@@ -0,0 +1,329 @@
+{
+ "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26",
+ "id": 267785552,
+ "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy",
+ "html_url": "https://github.com/ursa-labs/ursabot/pull/26",
+ "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff",
+ "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch",
+ "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26",
+ "number": 26,
+ "state": "open",
+ "locked": false,
+ "title": "Unittests for GithubHook",
+ "user": {
+ "login": "kszucs",
+ "id": 961747,
+ "node_id": "MDQ6VXNlcjk2MTc0Nw==",
+ "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/kszucs",
+ "html_url": "https://github.com/kszucs",
+ "followers_url": "https://api.github.com/users/kszucs/followers",
+ "following_url": "https://api.github.com/users/kszucs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions",
+ "organizations_url": "https://api.github.com/users/kszucs/orgs",
+ "repos_url": "https://api.github.com/users/kszucs/repos",
+ "events_url": "https://api.github.com/users/kszucs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/kszucs/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "body": "",
+ "body_html": "",
+ "body_text": "",
+ "created_at": "2019-04-05T11:22:15Z",
+ "updated_at": "2019-04-05T12:01:40Z",
+ "closed_at": null,
+ "merged_at": null,
+ "merge_commit_sha": "cc5dc3606988b3824be54df779ed2028776113cb",
+ "assignee": null,
+ "assignees": [],
+ "requested_reviewers": [],
+ "requested_teams": [],
+ "labels": [],
+ "milestone": null,
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits",
+ "review_comments_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments",
+ "review_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "head": {
+ "label": "ursa-labs:test-hook",
+ "ref": "test-hook",
+ "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d",
+ "user": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "repo": {
+ "id": 169101701,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "name": "ursabot",
+ "full_name": "ursa-labs/ursabot",
+ "private": false,
+ "owner": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "description": null,
+ "fork": false,
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "created_at": "2019-02-04T15:40:31Z",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "pushed_at": "2019-04-05T12:01:40Z",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "homepage": null,
+ "size": 898,
+ "stargazers_count": 1,
+ "watchers_count": 1,
+ "language": "Jupyter Notebook",
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 0,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 19,
+ "license": null,
+ "forks": 0,
+ "open_issues": 19,
+ "watchers": 1,
+ "default_branch": "master"
+ }
+ },
+ "base": {
+ "label": "ursa-labs:master",
+ "ref": "master",
+ "sha": "a162ad254b589b924db47e057791191b39613fd5",
+ "user": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "repo": {
+ "id": 169101701,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=",
+ "name": "ursabot",
+ "full_name": "ursa-labs/ursabot",
+ "private": false,
+ "owner": {
+ "login": "ursa-labs",
+ "id": 46514972,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy",
+ "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/ursa-labs",
+ "html_url": "https://github.com/ursa-labs",
+ "followers_url": "https://api.github.com/users/ursa-labs/followers",
+ "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}",
+ "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions",
+ "organizations_url": "https://api.github.com/users/ursa-labs/orgs",
+ "repos_url": "https://api.github.com/users/ursa-labs/repos",
+ "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/ursa-labs/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/ursa-labs/ursabot",
+ "description": null,
+ "fork": false,
+ "url": "https://api.github.com/repos/ursa-labs/ursabot",
+ "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks",
+ "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams",
+ "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks",
+ "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events",
+ "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags",
+ "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages",
+ "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers",
+ "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors",
+ "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers",
+ "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription",
+ "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges",
+ "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads",
+ "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}",
+ "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments",
+ "created_at": "2019-02-04T15:40:31Z",
+ "updated_at": "2019-04-04T17:49:10Z",
+ "pushed_at": "2019-04-05T12:01:40Z",
+ "git_url": "git://github.com/ursa-labs/ursabot.git",
+ "ssh_url": "git@github.com:ursa-labs/ursabot.git",
+ "clone_url": "https://github.com/ursa-labs/ursabot.git",
+ "svn_url": "https://github.com/ursa-labs/ursabot",
+ "homepage": null,
+ "size": 898,
+ "stargazers_count": 1,
+ "watchers_count": 1,
+ "language": "Jupyter Notebook",
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 0,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 19,
+ "license": null,
+ "forks": 0,
+ "open_issues": 19,
+ "watchers": 1,
+ "default_branch": "master"
+ }
+ },
+ "_links": {
+ "self": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26"
+ },
+ "html": {
+ "href": "https://github.com/ursa-labs/ursabot/pull/26"
+ },
+ "issue": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26"
+ },
+ "comments": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments"
+ },
+ "review_comments": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments"
+ },
+ "review_comment": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}"
+ },
+ "commits": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits"
+ },
+ "statuses": {
+ "href": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d"
+ }
+ },
+ "author_association": "MEMBER",
+ "merged": false,
+ "mergeable": true,
+ "rebaseable": true,
+ "mergeable_state": "unstable",
+ "merged_by": null,
+ "comments": 5,
+ "review_comments": 0,
+ "maintainer_can_modify": false,
+ "commits": 2,
+ "additions": 1124,
+ "deletions": 0,
+ "changed_files": 7
+} \ No newline at end of file
diff --git a/src/arrow/dev/archery/archery/tests/test_benchmarks.py b/src/arrow/dev/archery/archery/tests/test_benchmarks.py
new file mode 100644
index 000000000..fab1e8d44
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/test_benchmarks.py
@@ -0,0 +1,383 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+
+from archery.benchmark.codec import JsonEncoder
+from archery.benchmark.core import Benchmark, median
+from archery.benchmark.compare import (
+ BenchmarkComparator, RunnerComparator
+)
+from archery.benchmark.google import (
+ GoogleBenchmark, GoogleBenchmarkObservation
+)
+from archery.benchmark.runner import StaticBenchmarkRunner
+
+
+def test_benchmark_comparator():
+ unit = "micros"
+
+ assert not BenchmarkComparator(
+ Benchmark("contender", unit, True, [10], unit, [1]),
+ Benchmark("baseline", unit, True, [20], unit, [1]),
+ ).regression
+
+ assert BenchmarkComparator(
+ Benchmark("contender", unit, False, [10], unit, [1]),
+ Benchmark("baseline", unit, False, [20], unit, [1]),
+ ).regression
+
+ assert BenchmarkComparator(
+ Benchmark("contender", unit, True, [20], unit, [1]),
+ Benchmark("baseline", unit, True, [10], unit, [1]),
+ ).regression
+
+ assert not BenchmarkComparator(
+ Benchmark("contender", unit, False, [20], unit, [1]),
+ Benchmark("baseline", unit, False, [10], unit, [1]),
+ ).regression
+
+
+def test_static_runner_from_json_not_a_regression():
+ archery_result = {
+ "suites": [
+ {
+ "name": "arrow-value-parsing-benchmark",
+ "benchmarks": [
+ {
+ "name": "FloatParsing<DoubleType>",
+ "unit": "items_per_second",
+ "less_is_better": False,
+ "values": [
+ 109941112.87296811
+ ],
+ "time_unit": "ns",
+ "times": [
+ 9095.800104330105
+ ]
+ },
+ ]
+ }
+ ]
+ }
+
+ contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+ baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+ [comparison] = RunnerComparator(contender, baseline).comparisons
+ assert not comparison.regression
+
+
+def test_static_runner_from_json_regression():
+ archery_result = {
+ "suites": [
+ {
+ "name": "arrow-value-parsing-benchmark",
+ "benchmarks": [
+ {
+ "name": "FloatParsing<DoubleType>",
+ "unit": "items_per_second",
+ "less_is_better": False,
+ "values": [
+ 109941112.87296811
+ ],
+ "time_unit": "ns",
+ "times": [
+ 9095.800104330105
+ ]
+ },
+ ]
+ }
+ ]
+ }
+
+ contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+ # introduce artificial regression
+ archery_result['suites'][0]['benchmarks'][0]['values'][0] *= 2
+ baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+ [comparison] = RunnerComparator(contender, baseline).comparisons
+ assert comparison.regression
+
+
+def test_benchmark_median():
+ assert median([10]) == 10
+ assert median([1, 2, 3]) == 2
+ assert median([1, 2]) == 1.5
+ assert median([1, 2, 3, 4]) == 2.5
+ assert median([1, 1, 1, 1]) == 1
+ try:
+ median([])
+ assert False
+ except ValueError:
+ pass
+
+
+def assert_benchmark(name, google_result, archery_result):
+ observation = GoogleBenchmarkObservation(**google_result)
+ benchmark = GoogleBenchmark(name, [observation])
+ result = json.dumps(benchmark, cls=JsonEncoder)
+ assert json.loads(result) == archery_result
+
+
+def test_items_per_second():
+ name = "ArrayArrayKernel<AddChecked, UInt8Type>/32768/0"
+ google_result = {
+ "cpu_time": 116292.58886653671,
+ "items_per_second": 281772039.9844759,
+ "iterations": 5964,
+ "name": name,
+ "null_percent": 0.0,
+ "real_time": 119811.77313729875,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "size": 32768.0,
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 5964,
+ "null_percent": 0.0,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "items_per_second",
+ "less_is_better": False,
+ "values": [281772039.9844759],
+ "time_unit": "ns",
+ "times": [119811.77313729875],
+ }
+ assert "items_per_second" in google_result
+ assert "bytes_per_second" not in google_result
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_bytes_per_second():
+ name = "BufferOutputStreamLargeWrites/real_time"
+ google_result = {
+ "bytes_per_second": 1890209037.3405428,
+ "cpu_time": 17018127.659574457,
+ "iterations": 47,
+ "name": name,
+ "real_time": 17458386.53190963,
+ "repetition_index": 1,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 47,
+ "repetition_index": 1,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "bytes_per_second",
+ "less_is_better": False,
+ "values": [1890209037.3405428],
+ "time_unit": "ns",
+ "times": [17458386.53190963],
+ }
+ assert "items_per_second" not in google_result
+ assert "bytes_per_second" in google_result
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_both_items_and_bytes_per_second():
+ name = "ArrayArrayKernel<AddChecked, UInt8Type>/32768/0"
+ google_result = {
+ "bytes_per_second": 281772039.9844759,
+ "cpu_time": 116292.58886653671,
+ "items_per_second": 281772039.9844759,
+ "iterations": 5964,
+ "name": name,
+ "null_percent": 0.0,
+ "real_time": 119811.77313729875,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "size": 32768.0,
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ # Note that bytes_per_second trumps items_per_second
+ archery_result = {
+ "counters": {"iterations": 5964,
+ "null_percent": 0.0,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "bytes_per_second",
+ "less_is_better": False,
+ "values": [281772039.9844759],
+ "time_unit": "ns",
+ "times": [119811.77313729875],
+ }
+ assert "items_per_second" in google_result
+ assert "bytes_per_second" in google_result
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_neither_items_nor_bytes_per_second():
+ name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time"
+ google_result = {
+ "cpu_time": 1778.6004847419827,
+ "iterations": 352765,
+ "name": name,
+ "real_time": 1835.3137357788837,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 352765,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "ns",
+ "less_is_better": True,
+ "values": [1835.3137357788837],
+ "time_unit": "ns",
+ "times": [1835.3137357788837],
+ }
+ assert "items_per_second" not in google_result
+ assert "bytes_per_second" not in google_result
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_prefer_real_time():
+ name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time"
+ google_result = {
+ "cpu_time": 1778.6004847419827,
+ "iterations": 352765,
+ "name": name,
+ "real_time": 1835.3137357788837,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 352765,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "ns",
+ "less_is_better": True,
+ "values": [1835.3137357788837],
+ "time_unit": "ns",
+ "times": [1835.3137357788837],
+ }
+ assert name.endswith("/real_time")
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_prefer_cpu_time():
+ name = "AllocateDeallocate<Jemalloc>/size:1048576"
+ google_result = {
+ "cpu_time": 1778.6004847419827,
+ "iterations": 352765,
+ "name": name,
+ "real_time": 1835.3137357788837,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 352765,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "ns",
+ "less_is_better": True,
+ "values": [1778.6004847419827],
+ "time_unit": "ns",
+ "times": [1835.3137357788837],
+ }
+ assert not name.endswith("/real_time")
+ assert_benchmark(name, google_result, archery_result)
+
+
+def test_omits_aggregates():
+ name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time"
+ google_aggregate = {
+ "aggregate_name": "mean",
+ "cpu_time": 1757.428694267678,
+ "iterations": 3,
+ "name": "AllocateDeallocate<Jemalloc>/size:1048576/real_time_mean",
+ "real_time": 1849.3869337041162,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "aggregate",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ google_result = {
+ "cpu_time": 1778.6004847419827,
+ "iterations": 352765,
+ "name": name,
+ "real_time": 1835.3137357788837,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "run_type": "iteration",
+ "threads": 1,
+ "time_unit": "ns",
+ }
+ archery_result = {
+ "counters": {"iterations": 352765,
+ "repetition_index": 0,
+ "repetitions": 0,
+ "run_name": name,
+ "threads": 1},
+ "name": name,
+ "unit": "ns",
+ "less_is_better": True,
+ "values": [1835.3137357788837],
+ "time_unit": "ns",
+ "times": [1835.3137357788837],
+ }
+ assert google_aggregate["run_type"] == "aggregate"
+ assert google_result["run_type"] == "iteration"
+ observation1 = GoogleBenchmarkObservation(**google_aggregate)
+ observation2 = GoogleBenchmarkObservation(**google_result)
+ benchmark = GoogleBenchmark(name, [observation1, observation2])
+ result = json.dumps(benchmark, cls=JsonEncoder)
+ assert json.loads(result) == archery_result
diff --git a/src/arrow/dev/archery/archery/tests/test_bot.py b/src/arrow/dev/archery/archery/tests/test_bot.py
new file mode 100644
index 000000000..e84fb7e27
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/test_bot.py
@@ -0,0 +1,215 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+from unittest.mock import Mock
+
+import click
+import pytest
+import responses as rsps
+
+from archery.bot import CommentBot, CommandError, group
+
+
+@pytest.fixture
+def responses():
+ with rsps.RequestsMock() as mock:
+ yield mock
+
+
+def github_url(path):
+ return 'https://api.github.com:443/{}'.format(path.strip('/'))
+
+
+@group()
+def custom_handler():
+ pass
+
+
+@custom_handler.command()
+@click.pass_obj
+def extra(obj):
+ return obj
+
+
+@custom_handler.command()
+@click.option('--force', '-f', is_flag=True)
+def build(force):
+ return force
+
+
+@custom_handler.command()
+@click.option('--name', required=True)
+def benchmark(name):
+ return name
+
+
+def test_click_based_commands():
+ assert custom_handler('build') is False
+ assert custom_handler('build -f') is True
+
+ assert custom_handler('benchmark --name strings') == 'strings'
+ with pytest.raises(CommandError):
+ assert custom_handler('benchmark')
+
+ assert custom_handler('extra', extra='data') == {'extra': 'data'}
+
+
+@pytest.mark.parametrize('fixture_name', [
+ # the bot is not mentioned, nothing to do
+ 'event-issue-comment-not-mentioning-ursabot.json',
+ # don't respond to itself, it prevents recursive comment storms!
+ 'event-issue-comment-by-ursabot.json',
+ # non-authorized user sent the comment, do not respond
+ 'event-issue-comment-by-non-authorized-user.json',
+])
+def test_noop_events(load_fixture, fixture_name):
+ payload = load_fixture(fixture_name)
+
+ handler = Mock()
+ bot = CommentBot(name='ursabot', token='', handler=handler)
+ bot.handle('issue_comment', payload)
+
+ handler.assert_not_called()
+
+
+def test_issue_comment_without_pull_request(load_fixture, responses):
+ responses.add(
+ responses.GET,
+ github_url('/repositories/169101701/issues/19'),
+ json=load_fixture('issue-19.json'),
+ status=200
+ )
+ responses.add(
+ responses.GET,
+ github_url('repos/ursa-labs/ursabot/pulls/19'),
+ json={},
+ status=404
+ )
+ responses.add(
+ responses.POST,
+ github_url('/repos/ursa-labs/ursabot/issues/19/comments'),
+ json={}
+ )
+
+ def handler(command, **kwargs):
+ pass
+
+ payload = load_fixture('event-issue-comment-without-pull-request.json')
+ bot = CommentBot(name='ursabot', token='', handler=handler)
+ bot.handle('issue_comment', payload)
+
+ post = responses.calls[2]
+ assert json.loads(post.request.body) == {
+ 'body': "The comment bot only listens to pull request comments!"
+ }
+
+
+def test_respond_with_usage(load_fixture, responses):
+ responses.add(
+ responses.GET,
+ github_url('/repositories/169101701/issues/26'),
+ json=load_fixture('issue-26.json'),
+ status=200
+ )
+ responses.add(
+ responses.GET,
+ github_url('/repos/ursa-labs/ursabot/pulls/26'),
+ json=load_fixture('pull-request-26.json'),
+ status=200
+ )
+ responses.add(
+ responses.GET,
+ github_url('/repos/ursa-labs/ursabot/issues/comments/480243811'),
+ json=load_fixture('issue-comment-480243811.json')
+ )
+ responses.add(
+ responses.POST,
+ github_url('/repos/ursa-labs/ursabot/issues/26/comments'),
+ json={}
+ )
+
+ def handler(command, **kwargs):
+ raise CommandError('test-usage')
+
+ payload = load_fixture('event-issue-comment-with-empty-command.json')
+ bot = CommentBot(name='ursabot', token='', handler=handler)
+ bot.handle('issue_comment', payload)
+
+ post = responses.calls[3]
+ assert json.loads(post.request.body) == {'body': '```\ntest-usage\n```'}
+
+
+@pytest.mark.parametrize(('command', 'reaction'), [
+ ('@ursabot build', '+1'),
+ ('@ursabot build\nwith a comment', '+1'),
+ ('@ursabot listen', '-1'),
+])
+def test_issue_comment_with_commands(load_fixture, responses, command,
+ reaction):
+ responses.add(
+ responses.GET,
+ github_url('/repositories/169101701/issues/26'),
+ json=load_fixture('issue-26.json'),
+ status=200
+ )
+ responses.add(
+ responses.GET,
+ github_url('/repos/ursa-labs/ursabot/pulls/26'),
+ json=load_fixture('pull-request-26.json'),
+ status=200
+ )
+ responses.add(
+ responses.GET,
+ github_url('/repos/ursa-labs/ursabot/issues/comments/480248726'),
+ json=load_fixture('issue-comment-480248726.json')
+ )
+ responses.add(
+ responses.POST,
+ github_url(
+ '/repos/ursa-labs/ursabot/issues/comments/480248726/reactions'
+ ),
+ json={}
+ )
+
+ def handler(command, **kwargs):
+ if command == 'build':
+ return True
+ else:
+ raise ValueError('Only `build` command is supported.')
+
+ payload = load_fixture('event-issue-comment-build-command.json')
+ payload["comment"]["body"] = command
+
+ bot = CommentBot(name='ursabot', token='', handler=handler)
+ bot.handle('issue_comment', payload)
+
+ post = responses.calls[3]
+ assert json.loads(post.request.body) == {'content': reaction}
+
+
+def test_issue_comment_with_commands_bot_not_first(load_fixture, responses):
+ # when the @-mention is not first, this is a no-op
+ handler = Mock()
+
+ payload = load_fixture('event-issue-comment-build-command.json')
+ payload["comment"]["body"] = 'with a comment\n@ursabot build'
+
+ bot = CommentBot(name='ursabot', token='', handler=handler)
+ bot.handle('issue_comment', payload)
+
+ handler.assert_not_called()
diff --git a/src/arrow/dev/archery/archery/tests/test_cli.py b/src/arrow/dev/archery/archery/tests/test_cli.py
new file mode 100644
index 000000000..3891a2c28
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/test_cli.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+from unittest.mock import patch
+
+from click.testing import CliRunner
+
+from archery.cli import archery
+
+
+@patch("archery.linking.check_dynamic_library_dependencies")
+def test_linking_check_dependencies(fn):
+ args = [
+ "linking",
+ "check-dependencies",
+ "-a", "libarrow",
+ "-d", "libcurl",
+ "somelib.so"
+ ]
+ result = CliRunner().invoke(archery, args)
+ assert result.exit_code == 0
+ fn.assert_called_once_with(
+ Path('somelib.so'), allowed={'libarrow'}, disallowed={'libcurl'}
+ )
diff --git a/src/arrow/dev/archery/archery/tests/test_release.py b/src/arrow/dev/archery/archery/tests/test_release.py
new file mode 100644
index 000000000..75aac8921
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/test_release.py
@@ -0,0 +1,333 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+from archery.release import (
+ Release, MajorRelease, MinorRelease, PatchRelease,
+ Jira, Version, Issue, CommitTitle, Commit
+)
+from archery.testing import DotDict
+
+
+# subset of issues per revision
+_issues = {
+ "1.0.1": [
+ Issue("ARROW-9684", type="Bug", summary="[C++] Title"),
+ Issue("ARROW-9667", type="New Feature", summary="[Crossbow] Title"),
+ Issue("ARROW-9659", type="Bug", summary="[C++] Title"),
+ Issue("ARROW-9644", type="Bug", summary="[C++][Dataset] Title"),
+ Issue("ARROW-9643", type="Bug", summary="[C++] Title"),
+ Issue("ARROW-9609", type="Bug", summary="[C++] Title"),
+ Issue("ARROW-9606", type="Bug", summary="[C++][Dataset] Title")
+ ],
+ "1.0.0": [
+ Issue("ARROW-300", type="New Feature", summary="[Format] Title"),
+ Issue("ARROW-4427", type="Task", summary="[Doc] Title"),
+ Issue("ARROW-5035", type="Improvement", summary="[C#] Title"),
+ Issue("ARROW-8473", type="Bug", summary="[Rust] Title"),
+ Issue("ARROW-8472", type="Bug", summary="[Go][Integration] Title"),
+ Issue("ARROW-8471", type="Bug", summary="[C++][Integration] Title"),
+ Issue("ARROW-8974", type="Improvement", summary="[C++] Title"),
+ Issue("ARROW-8973", type="New Feature", summary="[Java] Title")
+ ],
+ "0.17.1": [
+ Issue("ARROW-8684", type="Bug", summary="[Python] Title"),
+ Issue("ARROW-8657", type="Bug", summary="[C++][Parquet] Title"),
+ Issue("ARROW-8641", type="Bug", summary="[Python] Title"),
+ Issue("ARROW-8609", type="Bug", summary="[C++] Title"),
+ ],
+ "0.17.0": [
+ Issue("ARROW-2882", type="New Feature", summary="[C++][Python] Title"),
+ Issue("ARROW-2587", type="Bug", summary="[Python] Title"),
+ Issue("ARROW-2447", type="Improvement", summary="[C++] Title"),
+ Issue("ARROW-2255", type="Bug", summary="[Integration] Title"),
+ Issue("ARROW-1907", type="Bug", summary="[C++/Python] Title"),
+ Issue("ARROW-1636", type="New Feature", summary="[Format] Title")
+ ]
+}
+
+
+class FakeJira(Jira):
+
+ def __init__(self):
+ pass
+
+ def project_versions(self, project='ARROW'):
+ return [
+ Version.parse("3.0.0", released=False),
+ Version.parse("2.0.0", released=False),
+ Version.parse("1.1.0", released=False),
+ Version.parse("1.0.1", released=False),
+ Version.parse("1.0.0", released=True),
+ Version.parse("0.17.1", released=True),
+ Version.parse("0.17.0", released=True),
+ Version.parse("0.16.0", released=True),
+ Version.parse("0.15.2", released=True),
+ Version.parse("0.15.1", released=True),
+ Version.parse("0.15.0", released=True),
+ ]
+
+ def project_issues(self, version, project='ARROW'):
+ return _issues[str(version)]
+
+
+@pytest.fixture
+def fake_jira():
+ return FakeJira()
+
+
+def test_version(fake_jira):
+ v = Version.parse("1.2.5")
+ assert str(v) == "1.2.5"
+ assert v.major == 1
+ assert v.minor == 2
+ assert v.patch == 5
+ assert v.released is False
+ assert v.release_date is None
+
+ v = Version.parse("1.0.0", released=True, release_date="2020-01-01")
+ assert str(v) == "1.0.0"
+ assert v.major == 1
+ assert v.minor == 0
+ assert v.patch == 0
+ assert v.released is True
+ assert v.release_date == "2020-01-01"
+
+
+def test_issue(fake_jira):
+ i = Issue("ARROW-1234", type='Bug', summary="title")
+ assert i.key == "ARROW-1234"
+ assert i.type == "Bug"
+ assert i.summary == "title"
+ assert i.project == "ARROW"
+ assert i.number == 1234
+
+ i = Issue("PARQUET-1111", type='Improvement', summary="another title")
+ assert i.key == "PARQUET-1111"
+ assert i.type == "Improvement"
+ assert i.summary == "another title"
+ assert i.project == "PARQUET"
+ assert i.number == 1111
+
+ fake_jira_issue = DotDict({
+ 'key': 'ARROW-2222',
+ 'fields': {
+ 'issuetype': {
+ 'name': 'Feature'
+ },
+ 'summary': 'Issue title'
+ }
+ })
+ i = Issue.from_jira(fake_jira_issue)
+ assert i.key == "ARROW-2222"
+ assert i.type == "Feature"
+ assert i.summary == "Issue title"
+ assert i.project == "ARROW"
+ assert i.number == 2222
+
+
+def test_commit_title():
+ t = CommitTitle.parse(
+ "ARROW-9598: [C++][Parquet] Fix writing nullable structs"
+ )
+ assert t.project == "ARROW"
+ assert t.issue == "ARROW-9598"
+ assert t.components == ["C++", "Parquet"]
+ assert t.summary == "Fix writing nullable structs"
+
+ t = CommitTitle.parse(
+ "ARROW-8002: [C++][Dataset][R] Support partitioned dataset writing"
+ )
+ assert t.project == "ARROW"
+ assert t.issue == "ARROW-8002"
+ assert t.components == ["C++", "Dataset", "R"]
+ assert t.summary == "Support partitioned dataset writing"
+
+ t = CommitTitle.parse(
+ "ARROW-9600: [Rust][Arrow] pin older version of proc-macro2 during "
+ "build"
+ )
+ assert t.project == "ARROW"
+ assert t.issue == "ARROW-9600"
+ assert t.components == ["Rust", "Arrow"]
+ assert t.summary == "pin older version of proc-macro2 during build"
+
+ t = CommitTitle.parse("[Release] Update versions for 1.0.0")
+ assert t.project is None
+ assert t.issue is None
+ assert t.components == ["Release"]
+ assert t.summary == "Update versions for 1.0.0"
+
+ t = CommitTitle.parse("[Python][Doc] Fix rst role dataset.rst (#7725)")
+ assert t.project is None
+ assert t.issue is None
+ assert t.components == ["Python", "Doc"]
+ assert t.summary == "Fix rst role dataset.rst (#7725)"
+
+ t = CommitTitle.parse(
+ "PARQUET-1882: [C++] Buffered Reads should allow for 0 length"
+ )
+ assert t.project == 'PARQUET'
+ assert t.issue == 'PARQUET-1882'
+ assert t.components == ["C++"]
+ assert t.summary == "Buffered Reads should allow for 0 length"
+
+ t = CommitTitle.parse(
+ "ARROW-9340 [R] Use CRAN version of decor package "
+ "\nsomething else\n"
+ "\nwhich should be truncated"
+ )
+ assert t.project == 'ARROW'
+ assert t.issue == 'ARROW-9340'
+ assert t.components == ["R"]
+ assert t.summary == "Use CRAN version of decor package "
+
+
+def test_release_basics(fake_jira):
+ r = Release.from_jira("1.0.0", jira=fake_jira)
+ assert isinstance(r, MajorRelease)
+ assert r.is_released is True
+ assert r.branch == 'master'
+ assert r.tag == 'apache-arrow-1.0.0'
+
+ r = Release.from_jira("1.1.0", jira=fake_jira)
+ assert isinstance(r, MinorRelease)
+ assert r.is_released is False
+ assert r.branch == 'maint-1.x.x'
+ assert r.tag == 'apache-arrow-1.1.0'
+
+ # minor releases before 1.0 are treated as major releases
+ r = Release.from_jira("0.17.0", jira=fake_jira)
+ assert isinstance(r, MajorRelease)
+ assert r.is_released is True
+ assert r.branch == 'master'
+ assert r.tag == 'apache-arrow-0.17.0'
+
+ r = Release.from_jira("0.17.1", jira=fake_jira)
+ assert isinstance(r, PatchRelease)
+ assert r.is_released is True
+ assert r.branch == 'maint-0.17.x'
+ assert r.tag == 'apache-arrow-0.17.1'
+
+
+def test_previous_and_next_release(fake_jira):
+ r = Release.from_jira("3.0.0", jira=fake_jira)
+ assert isinstance(r.previous, MajorRelease)
+ assert r.previous.version == Version.parse("2.0.0")
+ with pytest.raises(ValueError, match="There is no upcoming release set"):
+ assert r.next
+
+ r = Release.from_jira("2.0.0", jira=fake_jira)
+ assert isinstance(r.previous, MajorRelease)
+ assert isinstance(r.next, MajorRelease)
+ assert r.previous.version == Version.parse("1.0.0")
+ assert r.next.version == Version.parse("3.0.0")
+
+ r = Release.from_jira("1.1.0", jira=fake_jira)
+ assert isinstance(r.previous, MajorRelease)
+ assert isinstance(r.next, MajorRelease)
+ assert r.previous.version == Version.parse("1.0.0")
+ assert r.next.version == Version.parse("2.0.0")
+
+ r = Release.from_jira("1.0.0", jira=fake_jira)
+ assert isinstance(r.next, MajorRelease)
+ assert isinstance(r.previous, MajorRelease)
+ assert r.previous.version == Version.parse("0.17.0")
+ assert r.next.version == Version.parse("2.0.0")
+
+ r = Release.from_jira("0.17.0", jira=fake_jira)
+ assert isinstance(r.previous, MajorRelease)
+ assert r.previous.version == Version.parse("0.16.0")
+
+ r = Release.from_jira("0.15.2", jira=fake_jira)
+ assert isinstance(r.previous, PatchRelease)
+ assert isinstance(r.next, MajorRelease)
+ assert r.previous.version == Version.parse("0.15.1")
+ assert r.next.version == Version.parse("0.16.0")
+
+ r = Release.from_jira("0.15.1", jira=fake_jira)
+ assert isinstance(r.previous, MajorRelease)
+ assert isinstance(r.next, PatchRelease)
+ assert r.previous.version == Version.parse("0.15.0")
+ assert r.next.version == Version.parse("0.15.2")
+
+
+def test_release_issues(fake_jira):
+ # major release issues
+ r = Release.from_jira("1.0.0", jira=fake_jira)
+ assert r.issues.keys() == set([
+ "ARROW-300",
+ "ARROW-4427",
+ "ARROW-5035",
+ "ARROW-8473",
+ "ARROW-8472",
+ "ARROW-8471",
+ "ARROW-8974",
+ "ARROW-8973"
+ ])
+ # minor release issues
+ r = Release.from_jira("0.17.0", jira=fake_jira)
+ assert r.issues.keys() == set([
+ "ARROW-2882",
+ "ARROW-2587",
+ "ARROW-2447",
+ "ARROW-2255",
+ "ARROW-1907",
+ "ARROW-1636",
+ ])
+ # patch release issues
+ r = Release.from_jira("1.0.1", jira=fake_jira)
+ assert r.issues.keys() == set([
+ "ARROW-9684",
+ "ARROW-9667",
+ "ARROW-9659",
+ "ARROW-9644",
+ "ARROW-9643",
+ "ARROW-9609",
+ "ARROW-9606"
+ ])
+
+
+@pytest.mark.parametrize(('version', 'ncommits'), [
+ ("1.0.0", 771),
+ ("0.17.1", 27),
+ ("0.17.0", 569),
+ ("0.15.1", 41)
+])
+def test_release_commits(fake_jira, version, ncommits):
+ r = Release.from_jira(version, jira=fake_jira)
+ assert len(r.commits) == ncommits
+ for c in r.commits:
+ assert isinstance(c, Commit)
+ assert isinstance(c.title, CommitTitle)
+ assert c.url.endswith(c.hexsha)
+
+
+def test_maintenance_patch_selection(fake_jira):
+ r = Release.from_jira("0.17.1", jira=fake_jira)
+
+ shas_to_pick = [
+ c.hexsha for c in r.commits_to_pick(exclude_already_applied=False)
+ ]
+ expected = [
+ '8939b4bd446ee406d5225c79d563a27d30fd7d6d',
+ 'bcef6c95a324417e85e0140f9745d342cd8784b3',
+ '6002ec388840de5622e39af85abdc57a2cccc9b2',
+ '9123dadfd123bca7af4eaa9455f5b0d1ca8b929d',
+ ]
+ assert shas_to_pick == expected
diff --git a/src/arrow/dev/archery/archery/tests/test_testing.py b/src/arrow/dev/archery/archery/tests/test_testing.py
new file mode 100644
index 000000000..117b9288d
--- /dev/null
+++ b/src/arrow/dev/archery/archery/tests/test_testing.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import subprocess
+
+import pytest
+
+from archery.testing import PartialEnv, assert_subprocess_calls
+
+
+def test_partial_env():
+ assert PartialEnv(a=1, b=2) == {'a': 1, 'b': 2, 'c': 3}
+ assert PartialEnv(a=1) == {'a': 1, 'b': 2, 'c': 3}
+ assert PartialEnv(a=1, b=2) == {'a': 1, 'b': 2}
+ assert PartialEnv(a=1, b=2) != {'b': 2, 'c': 3}
+ assert PartialEnv(a=1, b=2) != {'a': 1, 'c': 3}
+
+
+def test_assert_subprocess_calls():
+ expected_calls = [
+ "echo Hello",
+ ["echo", "World"]
+ ]
+ with assert_subprocess_calls(expected_calls):
+ subprocess.run(['echo', 'Hello'])
+ subprocess.run(['echo', 'World'])
+
+ expected_env = PartialEnv(
+ CUSTOM_ENV_A='a',
+ CUSTOM_ENV_C='c'
+ )
+ with assert_subprocess_calls(expected_calls, env=expected_env):
+ env = {
+ 'CUSTOM_ENV_A': 'a',
+ 'CUSTOM_ENV_B': 'b',
+ 'CUSTOM_ENV_C': 'c'
+ }
+ subprocess.run(['echo', 'Hello'], env=env)
+ subprocess.run(['echo', 'World'], env=env)
+
+ with pytest.raises(AssertionError):
+ with assert_subprocess_calls(expected_calls, env=expected_env):
+ env = {
+ 'CUSTOM_ENV_B': 'b',
+ 'CUSTOM_ENV_C': 'c'
+ }
+ subprocess.run(['echo', 'Hello'], env=env)
+ subprocess.run(['echo', 'World'], env=env)
diff --git a/src/arrow/dev/archery/archery/utils/__init__.py b/src/arrow/dev/archery/archery/utils/__init__.py
new file mode 100644
index 000000000..13a83393a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/src/arrow/dev/archery/archery/utils/cache.py b/src/arrow/dev/archery/archery/utils/cache.py
new file mode 100644
index 000000000..d92c5f32e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/cache.py
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+import os
+from urllib.request import urlopen
+
+from .logger import logger
+
+ARCHERY_CACHE_DIR = Path.home() / ".cache" / "archery"
+
+
+class Cache:
+ """ Cache stores downloaded objects, notably apache-rat.jar. """
+
+ def __init__(self, path=ARCHERY_CACHE_DIR):
+ self.root = path
+
+ if not path.exists():
+ os.makedirs(path)
+
+ def key_path(self, key):
+ """ Return the full path of a key. """
+ return self.root/key
+
+ def get(self, key):
+ """ Return the full path of a key if cached, None otherwise. """
+ path = self.key_path(key)
+ return path if path.exists() else None
+
+ def delete(self, key):
+ """ Remove a key (and the file) from the cache. """
+ path = self.get(key)
+ if path:
+ path.unlink()
+
+ def get_or_insert(self, key, create):
+ """
+ Get or Insert a key from the cache. If the key is not found, the
+ `create` closure will be evaluated.
+
+ The `create` closure takes a single parameter, the path where the
+ object should be store. The file should only be created upon success.
+ """
+ path = self.key_path(key)
+
+ if not path.exists():
+ create(path)
+
+ return path
+
+ def get_or_insert_from_url(self, key, url):
+ """
+ Get or Insert a key from the cache. If the key is not found, the file
+ is downloaded from `url`.
+ """
+ def download(path):
+ """ Tiny wrapper that download a file and save as key. """
+ logger.debug("Downloading {} as {}".format(url, path))
+ conn = urlopen(url)
+ # Ensure the download is completed before writing to disks.
+ content = conn.read()
+ with open(path, "wb") as path_fd:
+ path_fd.write(content)
+
+ return self.get_or_insert(key, download)
diff --git a/src/arrow/dev/archery/archery/utils/cli.py b/src/arrow/dev/archery/archery/utils/cli.py
new file mode 100644
index 000000000..701abe925
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/cli.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import importlib
+
+import click
+
+from .source import ArrowSources, InvalidArrowSource
+
+
+class ArrowBool(click.types.BoolParamType):
+ """
+ ArrowBool supports the 'ON' and 'OFF' values on top of the values
+ supported by BoolParamType. This is convenient to port script which exports
+ CMake options variables.
+ """
+ name = "boolean"
+
+ def convert(self, value, param, ctx):
+ if isinstance(value, str):
+ lowered = value.lower()
+ if lowered == "on":
+ return True
+ elif lowered == "off":
+ return False
+
+ return super().convert(value, param, ctx)
+
+
+def validate_arrow_sources(ctx, param, src):
+ """
+ Ensure a directory contains Arrow cpp sources.
+ """
+ try:
+ return ArrowSources.find(src)
+ except InvalidArrowSource as e:
+ raise click.BadParameter(str(e))
+
+
+def add_optional_command(name, module, function, parent):
+ try:
+ module = importlib.import_module(module, package="archery")
+ command = getattr(module, function)
+ except ImportError as exc:
+ error_message = exc.name
+
+ @parent.command(
+ name,
+ context_settings={
+ "allow_extra_args": True,
+ "ignore_unknown_options": True,
+ }
+ )
+ def command():
+ raise click.ClickException(
+ f"Couldn't import command `{name}` due to {error_message}"
+ )
+ else:
+ parent.add_command(command)
diff --git a/src/arrow/dev/archery/archery/utils/cmake.py b/src/arrow/dev/archery/archery/utils/cmake.py
new file mode 100644
index 000000000..f93895b1a
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/cmake.py
@@ -0,0 +1,215 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+from shutil import rmtree, which
+
+from .command import Command, default_bin
+
+
+class CMake(Command):
+ def __init__(self, cmake_bin=None):
+ self.bin = default_bin(cmake_bin, "cmake")
+
+ @staticmethod
+ def default_generator():
+ """ Infer default generator.
+
+ Gives precedence to ninja if there exists an executable named `ninja`
+ in the search path.
+ """
+ found_ninja = which("ninja")
+ return "Ninja" if found_ninja else "Unix Makefiles"
+
+
+cmake = CMake()
+
+
+class CMakeDefinition:
+ """ CMakeDefinition captures the cmake invocation arguments.
+
+ It allows creating build directories with the same definition, e.g.
+ ```
+ build_1 = cmake_def.build("/tmp/build-1")
+ build_2 = cmake_def.build("/tmp/build-2")
+
+ ...
+
+ build1.all()
+ build2.all()
+ """
+
+ def __init__(self, source, build_type="release", generator=None,
+ definitions=None, env=None):
+ """ Initialize a CMakeDefinition
+
+ Parameters
+ ----------
+ source : str
+ Source directory where the top-level CMakeLists.txt is
+ located. This is usually the root of the project.
+ generator : str, optional
+ definitions: list(str), optional
+ env : dict(str,str), optional
+ Environment to use when invoking cmake. This can be required to
+ work around cmake deficiencies, e.g. CC and CXX.
+ """
+ self.source = os.path.abspath(source)
+ self.build_type = build_type
+ self.generator = generator if generator else cmake.default_generator()
+ self.definitions = definitions if definitions else []
+ self.env = env
+
+ @property
+ def arguments(self):
+ """" Return the arguments to cmake invocation. """
+ arguments = [
+ "-G{}".format(self.generator),
+ ] + self.definitions + [
+ self.source
+ ]
+ return arguments
+
+ def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
+ """ Invoke cmake into a build directory.
+
+ Parameters
+ ----------
+ build_dir : str
+ Directory in which the CMake build will be instantiated.
+ force : bool
+ If the build folder exists, delete it before. Otherwise if it's
+ present, an error will be returned.
+ """
+ if os.path.exists(build_dir):
+ # Extra safety to ensure we're deleting a build folder.
+ if not CMakeBuild.is_build_dir(build_dir):
+ raise FileExistsError(
+ "{} is not a cmake build".format(build_dir)
+ )
+ if not force:
+ raise FileExistsError(
+ "{} exists use force=True".format(build_dir)
+ )
+ rmtree(build_dir)
+
+ os.mkdir(build_dir)
+
+ cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+ cmake(*self.arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
+ return CMakeBuild(build_dir, self.build_type, definition=self,
+ **kwargs)
+
+ def __repr__(self):
+ return "CMakeDefinition[source={}]".format(self.source)
+
+
+CMAKE_BUILD_TYPE_RE = re.compile("CMAKE_BUILD_TYPE:STRING=([a-zA-Z]+)")
+
+
+class CMakeBuild(CMake):
+ """ CMakeBuild represents a build directory initialized by cmake.
+
+ The build instance can be used to build/test/install. It alleviates the
+ user to know which generator is used.
+ """
+
+ def __init__(self, build_dir, build_type, definition=None):
+ """ Initialize a CMakeBuild.
+
+ The caller must ensure that cmake was invoked in the build directory.
+
+ Parameters
+ ----------
+ definition : CMakeDefinition
+ The definition to build from.
+ build_dir : str
+ The build directory to setup into.
+ """
+ assert CMakeBuild.is_build_dir(build_dir)
+ super().__init__()
+ self.build_dir = os.path.abspath(build_dir)
+ self.build_type = build_type
+ self.definition = definition
+
+ @property
+ def binaries_dir(self):
+ return os.path.join(self.build_dir, self.build_type)
+
+ def run(self, *argv, verbose=False, **kwargs):
+ cmake_args = ["--build", self.build_dir, "--"]
+ extra = []
+ if verbose:
+ extra.append("-v" if self.bin.endswith("ninja") else "VERBOSE=1")
+ # Commands must be ran under the build directory
+ return super().run(*cmake_args, *extra,
+ *argv, **kwargs, cwd=self.build_dir)
+
+ def all(self):
+ return self.run("all")
+
+ def clean(self):
+ return self.run("clean")
+
+ def install(self):
+ return self.run("install")
+
+ def test(self):
+ return self.run("test")
+
+ @staticmethod
+ def is_build_dir(path):
+ """ Indicate if a path is CMake build directory.
+
+ This method only checks for the existence of paths and does not do any
+ validation whatsoever.
+ """
+ cmake_cache = os.path.join(path, "CMakeCache.txt")
+ cmake_files = os.path.join(path, "CMakeFiles")
+ return os.path.exists(cmake_cache) and os.path.exists(cmake_files)
+
+ @staticmethod
+ def from_path(path):
+ """ Instantiate a CMakeBuild from a path.
+
+ This is used to recover from an existing physical directory (created
+ with or without CMakeBuild).
+
+ Note that this method is not idempotent as the original definition will
+ be lost. Only build_type is recovered.
+ """
+ if not CMakeBuild.is_build_dir(path):
+ raise ValueError("Not a valid CMakeBuild path: {}".format(path))
+
+ build_type = None
+ # Infer build_type by looking at CMakeCache.txt and looking for a magic
+ # definition
+ cmake_cache_path = os.path.join(path, "CMakeCache.txt")
+ with open(cmake_cache_path, "r") as cmake_cache:
+ candidates = CMAKE_BUILD_TYPE_RE.findall(cmake_cache.read())
+ build_type = candidates[0].lower() if candidates else "release"
+
+ return CMakeBuild(path, build_type)
+
+ def __repr__(self):
+ return ("CMakeBuild["
+ "build = {},"
+ "build_type = {},"
+ "definition = {}]".format(self.build_dir,
+ self.build_type,
+ self.definition))
diff --git a/src/arrow/dev/archery/archery/utils/command.py b/src/arrow/dev/archery/archery/utils/command.py
new file mode 100644
index 000000000..f655e2ef2
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/command.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shlex
+import shutil
+import subprocess
+
+from .logger import logger, ctx
+
+
+def default_bin(name, default):
+ assert(default)
+ env_name = "ARCHERY_{0}_BIN".format(default.upper())
+ return name if name else os.environ.get(env_name, default)
+
+
+# Decorator running a command and returning stdout
+class capture_stdout:
+ def __init__(self, strip=False, listify=False):
+ self.strip = strip
+ self.listify = listify
+
+ def __call__(self, f):
+ def strip_it(x):
+ return x.strip() if self.strip else x
+
+ def list_it(x):
+ return x.decode('utf-8').splitlines() if self.listify else x
+
+ def wrapper(*argv, **kwargs):
+ # Ensure stdout is captured
+ kwargs["stdout"] = subprocess.PIPE
+ return list_it(strip_it(f(*argv, **kwargs).stdout))
+ return wrapper
+
+
+class Command:
+ """
+ A runnable command.
+
+ Class inheriting from the Command class must provide the bin
+ property/attribute.
+ """
+
+ def __init__(self, bin):
+ self.bin = bin
+
+ def run(self, *argv, **kwargs):
+ assert hasattr(self, "bin")
+ invocation = shlex.split(self.bin)
+ invocation.extend(argv)
+
+ for key in ["stdout", "stderr"]:
+ # Preserve caller intention, otherwise silence
+ if key not in kwargs and ctx.quiet:
+ kwargs[key] = subprocess.PIPE
+
+ # Prefer safe by default
+ if "check" not in kwargs:
+ kwargs["check"] = True
+
+ logger.debug("Executing `{}`".format(invocation))
+ return subprocess.run(invocation, **kwargs)
+
+ @property
+ def available(self):
+ """
+ Indicate if the command binary is found in PATH.
+ """
+ binary = shlex.split(self.bin)[0]
+ return shutil.which(binary) is not None
+
+ def __call__(self, *argv, **kwargs):
+ return self.run(*argv, **kwargs)
+
+
+class CommandStackMixin:
+ def run(self, *argv, **kwargs):
+ stacked_args = self.argv + argv
+ return super(CommandStackMixin, self).run(*stacked_args, **kwargs)
+
+
+class Bash(Command):
+ def __init__(self, bash_bin=None):
+ self.bin = default_bin(bash_bin, "bash")
diff --git a/src/arrow/dev/archery/archery/utils/git.py b/src/arrow/dev/archery/archery/utils/git.py
new file mode 100644
index 000000000..798bc5d70
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/git.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .command import Command, capture_stdout, default_bin
+from ..compat import _stringify_path
+
+
+# Decorator prepending argv with the git sub-command found with the method
+# name.
+def git_cmd(fn):
+ # function name is the subcommand
+ sub_cmd = fn.__name__.replace("_", "-")
+
+ def wrapper(self, *argv, **kwargs):
+ return fn(self, sub_cmd, *argv, **kwargs)
+ return wrapper
+
+
+class Git(Command):
+ def __init__(self, git_bin=None):
+ self.bin = default_bin(git_bin, "git")
+
+ def run_cmd(self, cmd, *argv, git_dir=None, **kwargs):
+ """ Inject flags before sub-command in argv. """
+ opts = []
+ if git_dir is not None:
+ opts.extend(["-C", _stringify_path(git_dir)])
+
+ return self.run(*opts, cmd, *argv, **kwargs)
+
+ @capture_stdout(strip=False)
+ @git_cmd
+ def archive(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @git_cmd
+ def clone(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @git_cmd
+ def fetch(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @git_cmd
+ def checkout(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ def dirty(self, **kwargs):
+ return len(self.status("--short", **kwargs)) > 0
+
+ @git_cmd
+ def log(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @capture_stdout(strip=True, listify=True)
+ @git_cmd
+ def ls_files(self, *argv, listify=False, **kwargs):
+ stdout = self.run_cmd(*argv, **kwargs)
+ return stdout
+
+ @capture_stdout(strip=True)
+ @git_cmd
+ def rev_parse(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @capture_stdout(strip=True)
+ @git_cmd
+ def status(self, *argv, **kwargs):
+ return self.run_cmd(*argv, **kwargs)
+
+ @capture_stdout(strip=True)
+ def head(self, **kwargs):
+ """ Return commit pointed by HEAD. """
+ return self.rev_parse("HEAD", **kwargs)
+
+ @capture_stdout(strip=True)
+ def current_branch(self, **kwargs):
+ return self.rev_parse("--abbrev-ref", "HEAD", **kwargs)
+
+ def repository_root(self, git_dir=None, **kwargs):
+ """ Locates the repository's root path from a subdirectory. """
+ stdout = self.rev_parse("--show-toplevel", git_dir=git_dir, **kwargs)
+ return stdout.decode('utf-8')
+
+
+git = Git()
diff --git a/src/arrow/dev/archery/archery/utils/lint.py b/src/arrow/dev/archery/archery/utils/lint.py
new file mode 100644
index 000000000..d95bfeea3
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/lint.py
@@ -0,0 +1,429 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import fnmatch
+import gzip
+import os
+from pathlib import Path
+
+import click
+
+from .command import Bash, Command, default_bin
+from .cmake import CMake
+from .git import git
+from .logger import logger
+from ..lang.cpp import CppCMakeDefinition, CppConfiguration
+from ..lang.python import Autopep8, Flake8, NumpyDoc
+from .rat import Rat, exclusion_from_globs
+from .tmpdir import tmpdir
+
+
+_archery_install_msg = (
+ "Please install archery using: `pip install -e dev/archery[lint]`. "
+)
+
+
+class LintValidationException(Exception):
+ pass
+
+
+class LintResult:
+ def __init__(self, success, reason=None):
+ self.success = success
+
+ def ok(self):
+ if not self.success:
+ raise LintValidationException
+
+ @staticmethod
+ def from_cmd(command_result):
+ return LintResult(command_result.returncode == 0)
+
+
+def cpp_linter(src, build_dir, clang_format=True, cpplint=True,
+ clang_tidy=False, iwyu=False, iwyu_all=False,
+ fix=False):
+ """ Run clang-format, cpplint and clang-tidy on cpp/ codebase. """
+ logger.info("Running C++ linters")
+
+ cmake = CMake()
+ if not cmake.available:
+ logger.error("cpp linter requested but cmake binary not found.")
+ return
+
+ # A cmake build directory is required to populate `compile_commands.json`
+ # which in turn is required by clang-tidy. It also provides a convenient
+ # way to hide clang-format/clang-tidy invocation via the Generate
+ # (ninja/make) targets.
+
+ # ARROW_LINT_ONLY exits early but ignore building compile_command.json
+ lint_only = not (iwyu or clang_tidy)
+ cmake_args = {"with_python": False, "with_lint_only": lint_only}
+ cmake_def = CppCMakeDefinition(src.cpp, CppConfiguration(**cmake_args))
+
+ build = cmake_def.build(build_dir)
+ if clang_format:
+ target = "format" if fix else "check-format"
+ yield LintResult.from_cmd(build.run(target, check=False))
+
+ if cpplint:
+ yield LintResult.from_cmd(build.run("lint", check=False))
+ yield LintResult.from_cmd(build.run("lint_cpp_cli", check=False))
+
+ if clang_tidy:
+ yield LintResult.from_cmd(build.run("check-clang-tidy", check=False))
+
+ if iwyu:
+ if iwyu_all:
+ iwyu_cmd = "iwyu-all"
+ else:
+ iwyu_cmd = "iwyu"
+ yield LintResult.from_cmd(build.run(iwyu_cmd, check=False))
+
+
+class CMakeFormat(Command):
+
+ def __init__(self, paths, cmake_format_bin=None):
+ self.check_version()
+ self.bin = default_bin(cmake_format_bin, "cmake-format")
+ self.paths = paths
+
+ @classmethod
+ def from_patterns(cls, base_path, include_patterns, exclude_patterns):
+ paths = {
+ str(path.as_posix())
+ for pattern in include_patterns
+ for path in base_path.glob(pattern)
+ }
+ for pattern in exclude_patterns:
+ pattern = (base_path / pattern).as_posix()
+ paths -= set(fnmatch.filter(paths, str(pattern)))
+ return cls(paths)
+
+ @staticmethod
+ def check_version():
+ try:
+ # cmake_format is part of the cmakelang package
+ import cmakelang
+ except ImportError:
+ raise ImportError(
+
+ )
+ # pin a specific version of cmake_format, must be updated in setup.py
+ if cmakelang.__version__ != "0.6.13":
+ raise LintValidationException(
+ f"Wrong version of cmake_format is detected. "
+ f"{_archery_install_msg}"
+ )
+
+ def check(self):
+ return self.run("-l", "error", "--check", *self.paths, check=False)
+
+ def fix(self):
+ return self.run("--in-place", *self.paths, check=False)
+
+
+def cmake_linter(src, fix=False):
+ """
+ Run cmake-format on all CMakeFiles.txt
+ """
+ logger.info("Running cmake-format linters")
+
+ cmake_format = CMakeFormat.from_patterns(
+ src.path,
+ include_patterns=[
+ 'ci/**/*.cmake',
+ 'cpp/CMakeLists.txt',
+ 'cpp/src/**/CMakeLists.txt',
+ 'cpp/cmake_modules/*.cmake',
+ 'go/**/CMakeLists.txt',
+ 'java/**/CMakeLists.txt',
+ 'matlab/**/CMakeLists.txt',
+ 'python/CMakeLists.txt',
+ ],
+ exclude_patterns=[
+ 'cpp/cmake_modules/FindNumPy.cmake',
+ 'cpp/cmake_modules/FindPythonLibsNew.cmake',
+ 'cpp/cmake_modules/UseCython.cmake',
+ 'cpp/src/arrow/util/config.h.cmake',
+ ]
+ )
+ method = cmake_format.fix if fix else cmake_format.check
+
+ yield LintResult.from_cmd(method())
+
+
+def python_linter(src, fix=False):
+ """Run Python linters on python/pyarrow, python/examples, setup.py
+ and dev/. """
+ setup_py = os.path.join(src.python, "setup.py")
+ setup_cfg = os.path.join(src.python, "setup.cfg")
+
+ logger.info("Running Python formatter (autopep8)")
+
+ autopep8 = Autopep8()
+ if not autopep8.available:
+ logger.error(
+ "Python formatter requested but autopep8 binary not found. "
+ f"{_archery_install_msg}")
+ return
+
+ # Gather files for autopep8
+ patterns = ["python/pyarrow/**/*.py",
+ "python/pyarrow/**/*.pyx",
+ "python/pyarrow/**/*.pxd",
+ "python/pyarrow/**/*.pxi",
+ "python/examples/**/*.py",
+ "dev/archery/**/*.py"]
+ files = [setup_py]
+ for pattern in patterns:
+ files += list(map(str, Path(src.path).glob(pattern)))
+
+ args = ['--global-config', setup_cfg, '--ignore-local-config']
+ if fix:
+ args += ['-j0', '--in-place']
+ args += sorted(files)
+ yield LintResult.from_cmd(autopep8(*args))
+ else:
+ # XXX `-j0` doesn't work well with `--exit-code`, so instead
+ # we capture the diff and check whether it's empty
+ # (https://github.com/hhatto/autopep8/issues/543)
+ args += ['-j0', '--diff']
+ args += sorted(files)
+ diff = autopep8.run_captured(*args)
+ if diff:
+ print(diff.decode('utf8'))
+ yield LintResult(success=False)
+ else:
+ yield LintResult(success=True)
+
+ # Run flake8 after autopep8 (the latter may have modified some files)
+ logger.info("Running Python linter (flake8)")
+
+ flake8 = Flake8()
+ if not flake8.available:
+ logger.error(
+ "Python linter requested but flake8 binary not found. "
+ f"{_archery_install_msg}")
+ return
+
+ flake8_exclude = ['.venv*']
+
+ yield LintResult.from_cmd(
+ flake8("--extend-exclude=" + ','.join(flake8_exclude),
+ setup_py, src.pyarrow, os.path.join(src.python, "examples"),
+ src.dev, check=False))
+ config = os.path.join(src.python, ".flake8.cython")
+ yield LintResult.from_cmd(
+ flake8("--config=" + config, src.pyarrow, check=False))
+
+
+def python_numpydoc(symbols=None, allow_rules=None, disallow_rules=None):
+ """Run numpydoc linter on python.
+
+ Pyarrow must be available for import.
+ """
+ logger.info("Running Python docstring linters")
+ # by default try to run on all pyarrow package
+ symbols = symbols or {
+ 'pyarrow',
+ 'pyarrow.compute',
+ 'pyarrow.csv',
+ 'pyarrow.dataset',
+ 'pyarrow.feather',
+ 'pyarrow.flight',
+ 'pyarrow.fs',
+ 'pyarrow.gandiva',
+ 'pyarrow.ipc',
+ 'pyarrow.json',
+ 'pyarrow.orc',
+ 'pyarrow.parquet',
+ 'pyarrow.plasma',
+ 'pyarrow.types',
+ }
+ try:
+ numpydoc = NumpyDoc(symbols)
+ except RuntimeError as e:
+ logger.error(str(e))
+ yield LintResult(success=False)
+ return
+
+ results = numpydoc.validate(
+ # limit the validation scope to the pyarrow package
+ from_package='pyarrow',
+ allow_rules=allow_rules,
+ disallow_rules=disallow_rules
+ )
+
+ if len(results) == 0:
+ yield LintResult(success=True)
+ return
+
+ number_of_violations = 0
+ for obj, result in results:
+ errors = result['errors']
+
+ # inspect doesn't play nice with cython generated source code,
+ # to use a hacky way to represent a proper __qualname__
+ doc = getattr(obj, '__doc__', '')
+ name = getattr(obj, '__name__', '')
+ qualname = getattr(obj, '__qualname__', '')
+ module = getattr(obj, '__module__', '')
+ instance = getattr(obj, '__self__', '')
+ if instance:
+ klass = instance.__class__.__name__
+ else:
+ klass = ''
+
+ try:
+ cython_signature = doc.splitlines()[0]
+ except Exception:
+ cython_signature = ''
+
+ desc = '.'.join(filter(None, [module, klass, qualname or name]))
+
+ click.echo()
+ click.echo(click.style(desc, bold=True, fg='yellow'))
+ if cython_signature:
+ qualname_with_signature = '.'.join([module, cython_signature])
+ click.echo(
+ click.style(
+ '-> {}'.format(qualname_with_signature),
+ fg='yellow'
+ )
+ )
+
+ for error in errors:
+ number_of_violations += 1
+ click.echo('{}: {}'.format(*error))
+
+ msg = 'Total number of docstring violations: {}'.format(
+ number_of_violations
+ )
+ click.echo()
+ click.echo(click.style(msg, fg='red'))
+
+ yield LintResult(success=False)
+
+
+def rat_linter(src, root):
+ """Run apache-rat license linter."""
+ logger.info("Running apache-rat linter")
+
+ if src.git_dirty:
+ logger.warn("Due to the usage of git-archive, uncommitted files will"
+ " not be checked for rat violations. ")
+
+ exclusion = exclusion_from_globs(
+ os.path.join(src.dev, "release", "rat_exclude_files.txt"))
+
+ # Creates a git-archive of ArrowSources, apache-rat expects a gzip
+ # compressed tar archive.
+ archive_path = os.path.join(root, "apache-arrow.tar.gz")
+ src.archive(archive_path, compressor=gzip.compress)
+ report = Rat().report(archive_path)
+
+ violations = list(report.validate(exclusion=exclusion))
+ for violation in violations:
+ print("apache-rat license violation: {}".format(violation))
+
+ yield LintResult(len(violations) == 0)
+
+
+def r_linter(src):
+ """Run R linter."""
+ logger.info("Running R linter")
+ r_lint_sh = os.path.join(src.r, "lint.sh")
+ yield LintResult.from_cmd(Bash().run(r_lint_sh, check=False))
+
+
+class Hadolint(Command):
+ def __init__(self, hadolint_bin=None):
+ self.bin = default_bin(hadolint_bin, "hadolint")
+
+
+def is_docker_image(path):
+ dirname = os.path.dirname(path)
+ filename = os.path.basename(path)
+
+ excluded = dirname.startswith(
+ "dev") or dirname.startswith("python/manylinux")
+
+ return filename.startswith("Dockerfile") and not excluded
+
+
+def docker_linter(src):
+ """Run Hadolint docker linter."""
+ logger.info("Running Docker linter")
+
+ hadolint = Hadolint()
+
+ if not hadolint.available:
+ logger.error(
+ "hadolint linter requested but hadolint binary not found.")
+ return
+
+ for path in git.ls_files(git_dir=src.path):
+ if is_docker_image(path):
+ yield LintResult.from_cmd(hadolint.run(path, check=False,
+ cwd=src.path))
+
+
+def linter(src, fix=False, *, clang_format=False, cpplint=False,
+ clang_tidy=False, iwyu=False, iwyu_all=False,
+ python=False, numpydoc=False, cmake_format=False, rat=False,
+ r=False, docker=False):
+ """Run all linters."""
+ with tmpdir(prefix="arrow-lint-") as root:
+ build_dir = os.path.join(root, "cpp-build")
+
+ # Linters yield LintResult without raising exceptions on failure.
+ # This allows running all linters in one pass and exposing all
+ # errors to the user.
+ results = []
+
+ if clang_format or cpplint or clang_tidy or iwyu:
+ results.extend(cpp_linter(src, build_dir,
+ clang_format=clang_format,
+ cpplint=cpplint,
+ clang_tidy=clang_tidy,
+ iwyu=iwyu,
+ iwyu_all=iwyu_all,
+ fix=fix))
+
+ if python:
+ results.extend(python_linter(src, fix=fix))
+
+ if numpydoc:
+ results.extend(python_numpydoc())
+
+ if cmake_format:
+ results.extend(cmake_linter(src, fix=fix))
+
+ if rat:
+ results.extend(rat_linter(src, root))
+
+ if r:
+ results.extend(r_linter(src))
+
+ if docker:
+ results.extend(docker_linter(src))
+
+ # Raise error if one linter failed, ensuring calling code can exit with
+ # non-zero.
+ for result in results:
+ result.ok()
diff --git a/src/arrow/dev/archery/archery/utils/logger.py b/src/arrow/dev/archery/archery/utils/logger.py
new file mode 100644
index 000000000..9d0feda88
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/logger.py
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+
+""" Global logger. """
+logger = logging.getLogger("archery")
+
+
+class LoggingContext:
+ def __init__(self, quiet=False):
+ self.quiet = quiet
+
+
+ctx = LoggingContext()
diff --git a/src/arrow/dev/archery/archery/utils/maven.py b/src/arrow/dev/archery/archery/utils/maven.py
new file mode 100644
index 000000000..96a3bf5bd
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/maven.py
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .command import Command, default_bin
+
+
+class Maven(Command):
+ def __init__(self, maven_bin=None):
+ self.bin = default_bin(maven_bin, "mvn")
+
+
+maven = Maven()
+
+
+class MavenDefinition:
+ """ MavenDefinition captures the maven invocation arguments.
+
+ It allows creating build directories with the same definition, e.g.
+ ```
+ build_1 = maven_def.build("/tmp/build-1")
+ build_2 = maven_def.build("/tmp/build-2")
+
+ ...
+
+ build1.install()
+ build2.install()
+ """
+
+ def __init__(self, source, build_definitions=None,
+ benchmark_definitions=None, env=None):
+ """ Initialize a MavenDefinition
+
+ Parameters
+ ----------
+ source : str
+ Source directory where the top-level pom.xml is
+ located. This is usually the root of the project.
+ build_definitions: list(str), optional
+ benchmark_definitions: list(str), optional
+ """
+ self.source = os.path.abspath(source)
+ self.build_definitions = build_definitions if build_definitions else []
+ self.benchmark_definitions =\
+ benchmark_definitions if benchmark_definitions else []
+ self.env = env
+
+ @property
+ def build_arguments(self):
+ """" Return the arguments to maven invocation for build. """
+ arguments = self.build_definitions + [
+ "-B", "-DskipTests", "-Drat.skip=true",
+ "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."
+ "Slf4jMavenTransferListener=warn",
+ "-T", "2C", "install"
+ ]
+ return arguments
+
+ def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
+ """ Invoke maven into a build directory.
+
+ Parameters
+ ----------
+ build_dir : str
+ Directory in which the Maven build will be instantiated.
+ force : bool
+ not used now
+ """
+ if os.path.exists(build_dir):
+ # Extra safety to ensure we're deleting a build folder.
+ if not MavenBuild.is_build_dir(build_dir):
+ raise FileExistsError(
+ "{} is not a maven build".format(build_dir)
+ )
+
+ cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+ assert MavenBuild.is_build_dir(build_dir)
+ maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
+ return MavenBuild(build_dir, definition=self, **kwargs)
+
+ @property
+ def list_arguments(self):
+ """" Return the arguments to maven invocation for list """
+ arguments = [
+ "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install"
+ ]
+ return arguments
+
+ @property
+ def benchmark_arguments(self):
+ """" Return the arguments to maven invocation for benchmark """
+ arguments = self.benchmark_definitions + [
+ "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
+ "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false "
+ "-Darrow.enable_unsafe_memory_access=true\"",
+ "install"
+ ]
+ return arguments
+
+ def __repr__(self):
+ return "MavenDefinition[source={}]".format(self.source)
+
+
+class MavenBuild(Maven):
+ """ MavenBuild represents a build directory initialized by maven.
+
+ The build instance can be used to build/test/install. It alleviates the
+ user to know which generator is used.
+ """
+
+ def __init__(self, build_dir, definition=None):
+ """ Initialize a MavenBuild.
+
+ The caller must ensure that maven was invoked in the build directory.
+
+ Parameters
+ ----------
+ definition : MavenDefinition
+ The definition to build from.
+ build_dir : str
+ The build directory to setup into.
+ """
+ assert MavenBuild.is_build_dir(build_dir)
+ super().__init__()
+ self.build_dir = os.path.abspath(build_dir)
+ self.definition = definition
+
+ @property
+ def binaries_dir(self):
+ return self.build_dir
+
+ def run(self, *argv, verbose=False, cwd=None, **kwargs):
+ extra = []
+ if verbose:
+ extra.append("-X")
+ if cwd is None:
+ cwd = self.build_dir
+ # Commands must be ran under the directory where pom.xml exists
+ return super().run(*extra, *argv, **kwargs, cwd=cwd)
+
+ def build(self, *argv, verbose=False, **kwargs):
+ definition_args = self.definition.build_arguments
+ cwd = self.binaries_dir
+ return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+ env=self.definition.env, **kwargs)
+
+ def list(self, *argv, verbose=False, **kwargs):
+ definition_args = self.definition.list_arguments
+ cwd = self.binaries_dir + "/performance"
+ return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+ env=self.definition.env, **kwargs)
+
+ def benchmark(self, *argv, verbose=False, **kwargs):
+ definition_args = self.definition.benchmark_arguments
+ cwd = self.binaries_dir + "/performance"
+ return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+ env=self.definition.env, **kwargs)
+
+ @staticmethod
+ def is_build_dir(path):
+ """ Indicate if a path is Maven top directory.
+
+ This method only checks for the existence of paths and does not do any
+ validation whatsoever.
+ """
+ pom_xml = os.path.join(path, "pom.xml")
+ performance_dir = os.path.join(path, "performance")
+ return os.path.exists(pom_xml) and os.path.isdir(performance_dir)
+
+ @staticmethod
+ def from_path(path):
+ """ Instantiate a Maven from a path.
+
+ This is used to recover from an existing physical directory (created
+ with or without Maven).
+
+ Note that this method is not idempotent as the original definition will
+ be lost.
+ """
+ if not MavenBuild.is_build_dir(path):
+ raise ValueError("Not a valid MavenBuild path: {}".format(path))
+
+ return MavenBuild(path, definition=None)
+
+ def __repr__(self):
+ return ("MavenBuild["
+ "build = {},"
+ "definition = {}]".format(self.build_dir,
+ self.definition))
diff --git a/src/arrow/dev/archery/archery/utils/rat.py b/src/arrow/dev/archery/archery/utils/rat.py
new file mode 100644
index 000000000..e7fe19a7e
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/rat.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import fnmatch
+import re
+from xml.etree import ElementTree
+
+from ..lang.java import Jar
+from .cache import Cache
+from .command import capture_stdout
+
+RAT_VERSION = 0.13
+RAT_JAR_FILENAME = "apache-rat-{}.jar".format(RAT_VERSION)
+RAT_URL_ = "https://repo1.maven.org/maven2/org/apache/rat/apache-rat"
+RAT_URL = "/".join([RAT_URL_, str(RAT_VERSION), RAT_JAR_FILENAME])
+
+
+class Rat(Jar):
+ def __init__(self):
+ jar = Cache().get_or_insert_from_url(RAT_JAR_FILENAME, RAT_URL)
+ Jar.__init__(self, jar)
+
+ @capture_stdout(strip=False)
+ def run_report(self, archive_path, **kwargs):
+ return self.run("--xml", archive_path, **kwargs)
+
+ def report(self, archive_path, **kwargs):
+ return RatReport(self.run_report(archive_path, **kwargs))
+
+
+def exclusion_from_globs(exclusions_path):
+ with open(exclusions_path, 'r') as exclusions_fd:
+ exclusions = [e.strip() for e in exclusions_fd]
+ return lambda path: any([fnmatch.fnmatch(path, e) for e in exclusions])
+
+
+class RatReport:
+ def __init__(self, xml):
+ self.xml = xml
+ self.tree = ElementTree.fromstring(xml)
+
+ def __repr__(self):
+ return "RatReport({})".format(self.xml)
+
+ def validate(self, exclusion=None):
+ for r in self.tree.findall('resource'):
+ approvals = r.findall('license-approval')
+ if not approvals or approvals[0].attrib['name'] == 'true':
+ continue
+
+ clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+
+ if exclusion and exclusion(clean_name):
+ continue
+
+ yield clean_name
diff --git a/src/arrow/dev/archery/archery/utils/report.py b/src/arrow/dev/archery/archery/utils/report.py
new file mode 100644
index 000000000..6c7587ddd
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/report.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABCMeta, abstractmethod
+import datetime
+
+import jinja2
+
+
+def markdown_escape(s):
+ for char in ('*', '#', '_', '~', '`', '>'):
+ s = s.replace(char, '\\' + char)
+ return s
+
+
+class Report(metaclass=ABCMeta):
+
+ def __init__(self, **kwargs):
+ for field in self.fields:
+ if field not in kwargs:
+ raise ValueError('Missing keyword argument {}'.format(field))
+ self._data = kwargs
+
+ def __getattr__(self, key):
+ return self._data[key]
+
+ @abstractmethod
+ def fields(self):
+ pass
+
+ @property
+ @abstractmethod
+ def templates(self):
+ pass
+
+
+class JinjaReport(Report):
+
+ def __init__(self, **kwargs):
+ self.env = jinja2.Environment(
+ loader=jinja2.PackageLoader('archery', 'templates')
+ )
+ self.env.filters['md'] = markdown_escape
+ self.env.globals['today'] = datetime.date.today
+ super().__init__(**kwargs)
+
+ def render(self, template_name):
+ template_path = self.templates[template_name]
+ template = self.env.get_template(template_path)
+ return template.render(**self._data)
diff --git a/src/arrow/dev/archery/archery/utils/source.py b/src/arrow/dev/archery/archery/utils/source.py
new file mode 100644
index 000000000..1080cb75d
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/source.py
@@ -0,0 +1,211 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+import subprocess
+
+from .git import git
+
+
+class InvalidArrowSource(Exception):
+ pass
+
+
+class ArrowSources:
+ """ ArrowSources is a companion class representing a directory containing
+ Apache Arrow's sources.
+ """
+ # Note that WORKSPACE is a reserved git revision name by this module to
+ # reference the current git workspace. In other words, this indicates to
+ # ArrowSources.at_revision that no cloning/checkout is required.
+ WORKSPACE = "WORKSPACE"
+
+ def __init__(self, path):
+ """ Initialize an ArrowSources
+
+ The caller must ensure that path is valid arrow source directory (can
+ be checked with ArrowSources.valid)
+
+ Parameters
+ ----------
+ path : src
+ """
+ path = Path(path)
+ # validate by checking a specific path in the arrow source tree
+ if not (path / 'cpp' / 'CMakeLists.txt').exists():
+ raise InvalidArrowSource(
+ "No Arrow C++ sources found in {}.".format(path)
+ )
+ self.path = path
+
+ @property
+ def archery(self):
+ """ Returns the archery directory of an Arrow sources. """
+ return self.dev / "archery"
+
+ @property
+ def cpp(self):
+ """ Returns the cpp directory of an Arrow sources. """
+ return self.path / "cpp"
+
+ @property
+ def dev(self):
+ """ Returns the dev directory of an Arrow sources. """
+ return self.path / "dev"
+
+ @property
+ def java(self):
+ """ Returns the java directory of an Arrow sources. """
+ return self.path / "java"
+
+ @property
+ def python(self):
+ """ Returns the python directory of an Arrow sources. """
+ return self.path / "python"
+
+ @property
+ def pyarrow(self):
+ """ Returns the python/pyarrow directory of an Arrow sources. """
+ return self.python / "pyarrow"
+
+ @property
+ def r(self):
+ """ Returns the r directory of an Arrow sources. """
+ return self.path / "r"
+
+ @property
+ def git_backed(self):
+ """ Indicate if the sources are backed by git. """
+ return (self.path / ".git").exists()
+
+ @property
+ def git_dirty(self):
+ """ Indicate if the sources is a dirty git directory. """
+ return self.git_backed and git.dirty(git_dir=self.path)
+
+ def archive(self, path, dereference=False, compressor=None, revision=None):
+ """ Saves a git archive at path. """
+ if not self.git_backed:
+ raise ValueError("{} is not backed by git".format(self))
+
+ rev = revision if revision else "HEAD"
+ archive = git.archive("--prefix=apache-arrow/", rev,
+ git_dir=self.path)
+
+ # TODO(fsaintjacques): fix dereference for
+
+ if compressor:
+ archive = compressor(archive)
+
+ with open(path, "wb") as archive_fd:
+ archive_fd.write(archive)
+
+ def at_revision(self, revision, clone_dir):
+ """ Return a copy of the current sources for a specified git revision.
+
+ This method may return the current object if no checkout is required.
+ The caller is responsible to remove the cloned repository directory.
+
+ The user can use the special WORKSPACE token to mean the current git
+ workspace (no checkout performed).
+
+ The second value of the returned tuple indicates if a clone was
+ performed.
+
+ Parameters
+ ----------
+ revision : str
+ Revision to checkout sources at.
+ clone_dir : str
+ Path to checkout the local clone.
+ """
+ if not self.git_backed:
+ raise ValueError("{} is not backed by git".format(self))
+
+ if revision == ArrowSources.WORKSPACE:
+ return self, False
+
+ # A local clone is required to leave the current sources intact such
+ # that builds depending on said sources are not invalidated (or worse
+ # slightly affected when re-invoking the generator).
+ # "--local" only works when dest dir is on same volume of source dir.
+ # "--shared" works even if dest dir is on different volume.
+ git.clone("--shared", self.path, clone_dir)
+
+ # Revision can reference "origin/" (or any remotes) that are not found
+ # in the local clone. Thus, revisions are dereferenced in the source
+ # repository.
+ original_revision = git.rev_parse(revision)
+
+ git.checkout(original_revision, git_dir=clone_dir)
+
+ return ArrowSources(clone_dir), True
+
+ @staticmethod
+ def find(path=None):
+ """ Infer Arrow sources directory from various method.
+
+ The following guesses are done in order until a valid match is found:
+
+ 1. Checks the given optional parameter.
+
+ 2. Checks if the environment variable `ARROW_SRC` is defined and use
+ this.
+
+ 3. Checks if the current working directory (cwd) is an Arrow source
+ directory.
+
+ 4. Checks if this file (cli.py) is still in the original source
+ repository. If so, returns the relative path to the source
+ directory.
+ """
+
+ # Explicit via environment
+ env = os.environ.get("ARROW_SRC")
+
+ # Implicit via cwd
+ cwd = Path.cwd()
+
+ # Implicit via current file
+ try:
+ this = Path(__file__).parents[4]
+ except IndexError:
+ this = None
+
+ # Implicit via git repository (if archery is installed system wide)
+ try:
+ repo = git.repository_root(git_dir=cwd)
+ except subprocess.CalledProcessError:
+ # We're not inside a git repository.
+ repo = None
+
+ paths = list(filter(None, [path, env, cwd, this, repo]))
+ for p in paths:
+ try:
+ return ArrowSources(p)
+ except InvalidArrowSource:
+ pass
+
+ searched_paths = "\n".join([" - {}".format(p) for p in paths])
+ raise InvalidArrowSource(
+ "Unable to locate Arrow's source directory. "
+ "Searched paths are:\n{}".format(searched_paths)
+ )
+
+ def __repr__(self):
+ return self.path
diff --git a/src/arrow/dev/archery/archery/utils/tmpdir.py b/src/arrow/dev/archery/archery/utils/tmpdir.py
new file mode 100644
index 000000000..07d7355c8
--- /dev/null
+++ b/src/arrow/dev/archery/archery/utils/tmpdir.py
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from contextlib import contextmanager
+from tempfile import mkdtemp, TemporaryDirectory
+
+
+@contextmanager
+def tmpdir(preserve=False, prefix="arrow-archery-"):
+ if preserve:
+ yield mkdtemp(prefix=prefix)
+ else:
+ with TemporaryDirectory(prefix=prefix) as tmp:
+ yield tmp
diff --git a/src/arrow/dev/archery/conftest.py b/src/arrow/dev/archery/conftest.py
new file mode 100644
index 000000000..06a643bea
--- /dev/null
+++ b/src/arrow/dev/archery/conftest.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import pytest
+
+
+def pytest_addoption(parser):
+ parser.addoption(
+ "--enable-integration",
+ action="store_true",
+ default=False,
+ help="run slow tests"
+ )
+
+
+def pytest_configure(config):
+ config.addinivalue_line(
+ "markers",
+ (
+ "integration: mark test as integration tests involving more "
+ "extensive setup (only used for crossbow at the moment)"
+ )
+ )
+
+
+def pytest_collection_modifyitems(config, items):
+ if config.getoption("--enable-integration"):
+ return
+ marker = pytest.mark.skip(reason="need --enable-integration option to run")
+ for item in items:
+ if "integration" in item.keywords:
+ item.add_marker(marker)
+
+
+@pytest.fixture
+def load_fixture(request):
+ current_test_directory = pathlib.Path(request.node.fspath).parent
+
+ def decoder(path):
+ with path.open('r') as fp:
+ if path.suffix == '.json':
+ import json
+ return json.load(fp)
+ elif path.suffix == '.yaml':
+ import yaml
+ return yaml.load(fp)
+ else:
+ return fp.read()
+
+ def loader(name, decoder=decoder):
+ path = current_test_directory / 'fixtures' / name
+ return decoder(path)
+
+ return loader
diff --git a/src/arrow/dev/archery/generate_files_for_endian_test.sh b/src/arrow/dev/archery/generate_files_for_endian_test.sh
new file mode 100755
index 000000000..ba3ce9f16
--- /dev/null
+++ b/src/arrow/dev/archery/generate_files_for_endian_test.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script generates json and arrow files of each type (e.g. primitive) for integration endian test
+# Usage: generate_files_for_endian_test.sh
+# ARROW_CPP_EXE_PATH : where Arrow C++ binaries can be found
+# TMP_DIR : where files will be generated
+
+set -e
+
+: ${ARROW_CPP_EXE_PATH:=/arrow/cpp/build/debug/}
+: ${TMP_DIR:=/tmp/arrow}
+
+json_dir=$TMP_DIR/arrow.$$
+mkdir -p $json_dir
+
+archery integration --stop-on-error --with-cpp=1 --tempdir=$json_dir
+
+for f in $json_dir/*.json ; do
+ $ARROW_CPP_EXE_PATH/arrow-json-integration-test -mode JSON_TO_ARROW -json $f -arrow ${f%.*}.arrow_file -integration true ;
+done
+for f in $json_dir/*.arrow_file ; do
+ $ARROW_CPP_EXE_PATH/arrow-file-to-stream $f > ${f%.*}.stream;
+done
+for f in $json_dir/*.json ; do
+ gzip $f ;
+done
+echo "The files are under $json_dir"
diff --git a/src/arrow/dev/archery/requirements.txt b/src/arrow/dev/archery/requirements.txt
new file mode 100644
index 000000000..0e1258adb
--- /dev/null
+++ b/src/arrow/dev/archery/requirements.txt
@@ -0,0 +1,4 @@
+click
+pygithub
+python-dotenv
+ruamel.yaml
diff --git a/src/arrow/dev/archery/setup.py b/src/arrow/dev/archery/setup.py
new file mode 100755
index 000000000..664807375
--- /dev/null
+++ b/src/arrow/dev/archery/setup.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import functools
+import operator
+import sys
+from setuptools import setup, find_packages
+
+if sys.version_info < (3, 6):
+ sys.exit('Python < 3.6 is not supported')
+
+# For pathlib.Path compatibility
+jinja_req = 'jinja2>=2.11'
+
+extras = {
+ 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.6.13'],
+ 'benchmark': ['pandas'],
+ 'docker': ['ruamel.yaml', 'python-dotenv'],
+ 'release': [jinja_req, 'jira', 'semver', 'gitpython'],
+ 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'ruamel.yaml',
+ 'setuptools_scm'],
+ 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
+ 'setuptools_scm'],
+}
+extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']
+extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
+
+setup(
+ name='archery',
+ version="0.1.0",
+ description='Apache Arrow Developers Tools',
+ url='http://github.com/apache/arrow',
+ maintainer='Arrow Developers',
+ maintainer_email='dev@arrow.apache.org',
+ packages=find_packages(),
+ include_package_data=True,
+ install_requires=['click>=7'],
+ tests_require=['pytest', 'responses'],
+ extras_require=extras,
+ entry_points='''
+ [console_scripts]
+ archery=archery.cli:archery
+ '''
+)
diff --git a/src/arrow/dev/benchmarking/.env b/src/arrow/dev/benchmarking/.env
new file mode 100644
index 000000000..7485f5866
--- /dev/null
+++ b/src/arrow/dev/benchmarking/.env
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+PG_USER=benchmark
+PG_PASS=benchmark
diff --git a/src/arrow/dev/benchmarking/.gitignore b/src/arrow/dev/benchmarking/.gitignore
new file mode 100644
index 000000000..cda00d658
--- /dev/null
+++ b/src/arrow/dev/benchmarking/.gitignore
@@ -0,0 +1 @@
+/machine.json
diff --git a/src/arrow/dev/benchmarking/Dockerfile b/src/arrow/dev/benchmarking/Dockerfile
new file mode 100644
index 000000000..f47033397
--- /dev/null
+++ b/src/arrow/dev/benchmarking/Dockerfile
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+FROM postgres:11-alpine
+
+# Any `.sh` and `.sql` files copied to the entrypoint directory
+# will be run during startup. See `docker-entrypoint.sh` in
+# https://github.com/docker-library/postgres/blob/master/11/alpine/
+COPY ddl/* /docker-entrypoint-initdb.d/
diff --git a/src/arrow/dev/benchmarking/README.md b/src/arrow/dev/benchmarking/README.md
new file mode 100644
index 000000000..c5ddd62e0
--- /dev/null
+++ b/src/arrow/dev/benchmarking/README.md
@@ -0,0 +1,255 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+
+> NOTE: For those deploying this database, Postgres does not by default use
+> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in
+> some columns to always work. This [stackoverflow post][so-utf8] describes
+> how to do it for Amazon RDS. This [section of the docs][pg-charset]
+> states how to do it in general, i.e.: `initdb -E UTF8`.
+
+# Benchmark database
+
+This directory contains files related to the benchmark database.
+
+- 'ddl/\*.sql' contains the database definition.
+- 'examples/' contain code to test the database and demonstrate its use.
+- 'Dockerfile' and 'docker-compose.yml' are for developing benchmarks
+ against a testing database.
+- An auto-generated summary of views in the [Data model][./data_model.rst].
+
+## Setup
+
+To create a 'machine.json' file that will uniquely identify a computer for
+benchmark submission, run the provided shell script and fill in the prompts
+to identify the GPU.
+
+> NOTE: this does not work on VMs or Windows.
+
+```shell
+./make_machine_json.sh
+```
+
+Submit the machine details via http using the command
+
+> NOTE: This will only work if we have selected graphql as a client
+> and have it running in production or if during development
+> you have run `docker-compose up` to create and run both a
+> database Docker container and graphql client Docker container.
+
+```shell
+./graphql_submit.sh machine machine.json localhost:5000/graphql
+```
+
+or submit after starting up the psql client from this directory, using
+
+```
+\set content `cat machine.json`
+SELECT ingest_machine_view(:'content'::jsonb);
+```
+
+> NOTE: If you don't have a "machine.json" file generated,
+> use the example file "examples/machine.json" instead.
+
+## Local testing
+
+There is a file named "[.env][.env]" in this directory that is used by
+`docker-compose` to set up the postgres user and password for the
+local containers. Currently the name and password are both
+`benchmark`. This will be the password for the psql client as well.
+
+The Postgres Alpine image runs any added '\*.sql' and '\*.sh' scripts placed
+in '/docker-entrypoint-initdb.d/' during its startup script, so the local
+database will be set up automatically once the container is running.
+
+To start the containers, be sure to have [Docker installed][docker],
+and then run the following from this directory (arrow/dev/benchmarking).
+
+
+```
+docker-compose up
+```
+
+This will start a process that will show logs from both the running
+Postgres container and the running GraphQL container.
+To stop the running containers gracefully, background the process
+and run
+
+```
+docker-compose down
+fg # To re-foreground the backgrounded process while it exits
+```
+
+You will still have the container images "benchmarking_pg",
+"graphile/postgraphile", and "postgres:11-alpine" on your
+computer. You should keep them if you want to run this again.
+If you don't, then remove them with the command:
+
+```
+docker rmi benchmarking_pg postgres:11-alpine graphile/postgraphile
+```
+
+### Postgres client
+
+The `psql` shell client is bundled with the PostgreSQL core distribution
+available from the [Postgres download page][postgres-downloads].
+Using the `PG_USER` defined in the `.env` file (currently "benchmark"),
+the command to connect to the container is:
+```shell
+psql -h localhost -p 5432 -U benchmark
+```
+There is an example script in [examples/example.sql](examples/example.sql) that
+runs some queries against the database. To run it in the psql client, type
+the following in the psql command-line interface:
+
+```
+\i examples/example.sql
+```
+
+#### Bulk ingestion using CSV
+
+An example CSV file for bulk ingestion is in
+[examples/benchmark_run_example.csv](examples/benchmark_run_example.csv).
+The columns are listed in the same order as they are defined, to avoid having
+to explicitly name every column in ingestion. The "id" column is left empty
+and will be automatically assigned on insert.
+
+To ingest the example CSV file from the command line,
+use the command below:
+
+```shell
+CSV='examples/benchmark_run_example.csv' && \
+psql -U benchmark -h localhost -p 5432 \
+ -c "\copy benchmark_run_view FROM '${CSV}' WITH (FORMAT csv, HEADER);"
+```
+
+#### Bulk ingestion using JSON
+
+To ingest the example JSON file using the psql client, use the command below.
+
+```
+\set content `cat examples/benchmark_example.json`
+SELECT ingest_benchmark_view(:'content'::jsonb);
+```
+
+### HTTP client
+
+This section requires an actual HTTP client to be up, either
+for the production database or via the testing setup.
+(See the [local testing section](#local-testing) for how to set it up).
+
+The 'graphile/postgraphile' container provides an HTTP interface
+to the database via two url routes:
+
+- A GraphiQL page ([localhost:5000/graphiql][graphiql])
+ to aid visual exploration of the data model.
+ (The `--watch` flag on the command line. Not recommended for production.)
+- An endpoint that receives POST requests only (localhost:5000/graphql).
+
+#### Ingestion
+
+The script [graphql_submit.sh](./graphql_submit.sh) simplifies submission
+to the database via curl. Examples:
+
+```shell
+./graphql_submit.sh benchmarks examples/benchmark_example.json
+./graphql_submit.sh runs examples/benchmark_run_example.json
+```
+
+#### Querying
+
+The output of the query is a JSON object that is hard to read on the command line.
+Here is an example query in the shell:
+```shell
+curl -X POST \
+ -H "Content-Type: application/json" \
+ --data '{"query": "{projectDetails{ projectName }}"}' \
+ localhost:5000/graphql
+```
+
+which (if you have previously run the "examples.sql" command) yields
+
+```
+{"data":{"projectDetails":{"projectName":"Apache Arrow"}}}
+```
+
+Here is an example query using Python:
+```python
+import json
+import requests
+
+uri = "http://localhost:5000/graphql"
+query = json.load(open("examples/graphql_query_environment_view.json"))
+response = requests.post(uri, json=query)
+message = "{benchmarkLanguage}: {languageImplementationVersion}, {dependencies}"
+
+for row in response.json()['data']['allEnvironmentViews']['edges']:
+ print(message.format(**row['node']))
+
+# result:
+#
+# Python: CPython 2.7, {"six":"","numpy":"1.14","other_lib":"1.0"}
+# Python: CPython 2.7, {"six":"","numpy":"1.15","other_lib":"1.0"}
+# Python: CPython 3.6, {"boost":"1.42","numpy":"1.15"}
+```
+
+## Deployment
+
+(work in progress).
+
+> NOTE: For those deploying this database, Postgres does not by default use
+> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in
+> some columns to always work. This [stackoverflow post][so-utf8] describes
+> how to do it for Amazon RDS. This [section of the docs][pg-charset]
+> states how to do it in general, i.e.: `initdb -E UTF8`.
+
+
+## Quick reference
+
+- String variables `'have single quotes'`
+- Arrays `'{"have", "curly", "braces"}'::text[]` or `'{1, 2, 3}'::integer[]`
+- JSONb `'{"has":"this", "format":42}'::jsonb`
+- Elements inserted using JSON-formatted strings can use standard
+ JSON-formatted arrays (`[1, 2, 3]`) and do not have to use the above
+ string formats.
+- When comparing nullable values use `x IS NOT DISTINCT FROM y` rather than `x = y`
+- An auto-generated summary of the [Data model][./data_model.rst].
+
+## Data model documentation
+
+To recreate the data model documentation,
+(1) install the [psql client][postgres-downloads]
+(sorry you need to download the whole thing),
+(2) start the docker container using `docker-compose up`,
+(3) and then run these scripts:
+
+```
+./make_dotfile.sh
+./make_data_model_rst.sh
+```
+
+[pg-jsonb]: https://www.postgresql.org/docs/11/datatype-json.html#id-1.5.7.22.3
+[so-utf8]: https://stackoverflow.com/a/33557023
+[pg-charset]: https://www.postgresql.org/docs/9.3/multibyte.html#AEN34424
+[docker]: https://www.docker.com/get-started
+[citext-limitations]: https://www.postgresql.org/docs/11/citext.html#id-1.11.7.17.7
+[postgres-downloads]: https://www.postgresql.org/download/
+[graphiql]: http://localhost:5000/graphiql
+[postgraphile-lambda]: https://github.com/graphile/postgraphile-lambda-example
+[postgraphile-cli]: https://www.graphile.org/postgraphile/usage-cli/
diff --git a/src/arrow/dev/benchmarking/data_model.dot b/src/arrow/dev/benchmarking/data_model.dot
new file mode 100644
index 000000000..d311acd4e
--- /dev/null
+++ b/src/arrow/dev/benchmarking/data_model.dot
@@ -0,0 +1,219 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+/*
+ WARNING
+ This is an auto-generated file. Please do not edit.
+
+ To reproduce, please run :code:`./make_data_model_rst.sh`.
+ (This requires you have the
+ `psql client <https://www.postgresql.org/download/>`_
+ and have started the docker containers using
+ :code:`docker-compose up`).
+*/
+digraph database {
+ concentrate = true;
+ rankdir = LR;
+ ratio = ".75";
+ node [shape = none, fontsize="11", fontname="Helvetica"];
+ edge [fontsize="8", fontname="Helvetica"];
+legend
+[fontsize = "14"
+label =
+<<table border="0" cellpadding="0">
+ <tr><td align="left"><font point-size="16">Legend</font></td></tr>
+ <tr><td align="left">pk = primary key</td></tr>
+ <tr><td align="left">fk = foreign key</td></tr>
+ <tr><td align="left">u = unique*</td></tr>
+ <tr><td align="left">o = optional</td></tr>
+ <tr><td align="left">* multiple uniques in the same table are a unique group</td></tr>
+</table>>
+];
+benchmark
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark</font></td></tr>
+ <tr><td port="benchmark_id"><b>benchmark_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>benchmark_name (u)</td></tr>
+ <tr><td>parameter_names (o)</td></tr>
+ <tr><td>benchmark_description</td></tr>
+ <tr><td>benchmark_version (u)</td></tr>
+ <tr><td port="unit_id">unit_id (fk) </td></tr>
+ </table>>
+];
+benchmark_language
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_language</font></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>benchmark_language (u)</td></tr>
+ </table>>
+];
+benchmark_run
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_run</font></td></tr>
+ <tr><td port="benchmark_run_id"><b>benchmark_run_id (pk)</b></td></tr>
+ <tr><td>parameter_values (u)</td></tr>
+ <tr><td>value</td></tr>
+ <tr><td>git_commit_timestamp (u)</td></tr>
+ <tr><td>git_hash</td></tr>
+ <tr><td>val_min (o)</td></tr>
+ <tr><td>val_q1 (o)</td></tr>
+ <tr><td>val_q3 (o)</td></tr>
+ <tr><td>val_max (o)</td></tr>
+ <tr><td>std_dev</td></tr>
+ <tr><td>n_obs</td></tr>
+ <tr><td>run_timestamp (u)</td></tr>
+ <tr><td>run_metadata (o)</td></tr>
+ <tr><td>run_notes (o)</td></tr>
+ <tr><td port="machine_id">machine_id (u) (fk) </td></tr>
+ <tr><td port="environment_id">environment_id (u) (fk) </td></tr>
+ <tr><td port="language_implementation_version_id">language_implementation_version_id (fk) </td></tr>
+ <tr><td port="benchmark_language_id">benchmark_language_id (fk) </td></tr>
+ <tr><td port="benchmark_id">benchmark_id (u) (fk) </td></tr>
+ </table>>
+];
+benchmark_type
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_type</font></td></tr>
+ <tr><td port="benchmark_type_id"><b>benchmark_type_id (pk)</b></td></tr>
+ <tr><td>benchmark_type (u)</td></tr>
+ <tr><td>lessisbetter</td></tr>
+ </table>>
+];
+cpu
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">cpu</font></td></tr>
+ <tr><td port="cpu_id"><b>cpu_id (pk)</b></td></tr>
+ <tr><td>cpu_model_name (u)</td></tr>
+ <tr><td>cpu_core_count</td></tr>
+ <tr><td>cpu_thread_count</td></tr>
+ <tr><td>cpu_frequency_max_hz</td></tr>
+ <tr><td>cpu_frequency_min_hz</td></tr>
+ <tr><td>cpu_l1d_cache_bytes</td></tr>
+ <tr><td>cpu_l1i_cache_bytes</td></tr>
+ <tr><td>cpu_l2_cache_bytes</td></tr>
+ <tr><td>cpu_l3_cache_bytes</td></tr>
+ </table>>
+];
+dependencies
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">dependencies</font></td></tr>
+ <tr><td port="dependencies_id"><b>dependencies_id (pk)</b></td></tr>
+ <tr><td>dependencies (u)</td></tr>
+ </table>>
+];
+gpu
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">gpu</font></td></tr>
+ <tr><td port="gpu_id"><b>gpu_id (pk)</b></td></tr>
+ <tr><td>gpu_information (u)</td></tr>
+ <tr><td>gpu_part_number</td></tr>
+ <tr><td>gpu_product_name</td></tr>
+ </table>>
+];
+language_implementation_version
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">language_implementation_version</font></td></tr>
+ <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>language_implementation_version (u)</td></tr>
+ </table>>
+];
+machine
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">machine</font></td></tr>
+ <tr><td port="machine_id"><b>machine_id (pk)</b></td></tr>
+ <tr><td>machine_name</td></tr>
+ <tr><td>mac_address (u)</td></tr>
+ <tr><td>memory_bytes</td></tr>
+ <tr><td>cpu_actual_frequency_hz</td></tr>
+ <tr><td>machine_other_attributes (o)</td></tr>
+ <tr><td port="cpu_id">cpu_id (fk) </td></tr>
+ <tr><td port="gpu_id">gpu_id (fk) </td></tr>
+ <tr><td port="os_id">os_id (fk) </td></tr>
+ </table>>
+];
+os
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">os</font></td></tr>
+ <tr><td port="os_id"><b>os_id (pk)</b></td></tr>
+ <tr><td>os_name (u)</td></tr>
+ <tr><td>architecture_name (u)</td></tr>
+ <tr><td>kernel_name (u)</td></tr>
+ </table>>
+];
+project
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">project</font></td></tr>
+ <tr><td port="project_id"><b>project_id (pk)</b></td></tr>
+ <tr><td>project_name (u)</td></tr>
+ <tr><td>project_url (u)</td></tr>
+ <tr><td>repo_url (u)</td></tr>
+ <tr><td>last_changed</td></tr>
+ </table>>
+];
+unit
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">unit</font></td></tr>
+ <tr><td port="unit_id"><b>unit_id (pk)</b></td></tr>
+ <tr><td>units (u)</td></tr>
+ <tr><td port="benchmark_type_id">benchmark_type_id (fk) </td></tr>
+ </table>>
+];
+environment
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">environment</font></td></tr>
+ <tr><td port="environment_id"><b>environment_id (pk)</b></td></tr>
+ <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td port="dependencies_id">dependencies_id (u) (fk) </td></tr>
+ </table>>
+];
+machine:cpu_id -> cpu:cpu_id;
+machine:gpu_id -> gpu:gpu_id;
+machine:os_id -> os:os_id;
+benchmark:benchmark_language_id -> benchmark_language:benchmark_language_id;
+environment:benchmark_language_id -> benchmark_language:benchmark_language_id;
+language_implementation_version:benchmark_language_id -> benchmark_language:benchmark_language_id;
+environment:dependencies_id -> dependencies:dependencies_id;
+environment:benchmark_language_id -> language_implementation_version:benchmark_language_id;
+environment:language_implementation_version_id -> language_implementation_version:language_implementation_version_id;
+unit:benchmark_type_id -> benchmark_type:benchmark_type_id;
+benchmark_run:machine_id -> machine:machine_id;
+benchmark:unit_id -> unit:unit_id;
+benchmark_run:language_implementation_version_id -> environment:language_implementation_version_id;
+benchmark_run:benchmark_language_id -> environment:benchmark_language_id;
+benchmark_run:environment_id -> environment:environment_id;
+benchmark_run:benchmark_language_id -> benchmark:benchmark_language_id;
+benchmark_run:benchmark_id -> benchmark:benchmark_id;
+}
+
diff --git a/src/arrow/dev/benchmarking/data_model.rst b/src/arrow/dev/benchmarking/data_model.rst
new file mode 100644
index 000000000..d0f3dc7fc
--- /dev/null
+++ b/src/arrow/dev/benchmarking/data_model.rst
@@ -0,0 +1,373 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+.. WARNING
+.. This is an auto-generated file. Please do not edit.
+
+.. To reproduce, please run :code:`./make_data_model_rst.sh`.
+.. (This requires you have the
+.. `psql client <https://www.postgresql.org/download/>`_
+.. and have started the docker containers using
+.. :code:`docker-compose up`).
+
+
+.. _benchmark-data-model:
+
+Benchmark data model
+====================
+
+
+.. graphviz:: data_model.dot
+
+
+.. _benchmark-ingestion:
+
+Benchmark ingestion helper functions
+====================================
+
+ingest_benchmark_run_view
+-------------------------
+
+:code:`ingest_benchmark_run_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ [
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:05 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "std_dev": 3.14,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:10 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ }
+ ]
+To identify which columns in "benchmark_run_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_benchmark_view
+---------------------
+
+:code:`ingest_benchmark_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ [
+ {
+ "benchmark_name": "Benchmark 1",
+ "parameter_names": ["arg0", "arg1", "arg2"],
+ "benchmark_description": "First benchmark",
+ "benchmark_type": "Time",
+ "units": "miliseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_names": ["arg0", "arg1"],
+ "benchmark_description": "Description 2.",
+ "benchmark_type": "Time",
+ "units": "nanoseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ }
+ ]
+
+To identify which columns in "benchmark_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_benchmark_runs_with_context
+----------------------------------
+
+:code:`ingest_benchmark_runs_with_context(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+The object contains three key-value pairs::
+
+ {"context": {
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 3.6",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},
+ "git_commit_timestamp": "2019-02-14 22:42:22 +0100",
+ "git_hash": "123456789abcde",
+ "run_timestamp": "2019-02-14 03:00:40 -0600",
+ "extra stuff": "does not hurt anything and will not be added."
+ },
+ "benchmark_version": {
+ "Benchmark Name 1": "Any string can be a version.",
+ "Benchmark Name 2": "A git hash can be a version.",
+ "An Unused Benchmark Name": "Will be ignored."
+ },
+ "benchmarks": [
+ {
+ "benchmark_name": "Benchmark Name 1",
+ "parameter_values": {"argument1": 1, "argument2": "value2"},
+ "value": 42,
+ "val_min": 41.2,
+ "val_q1": 41.5,
+ "val_q3": 42.5,
+ "val_max": 42.8,
+ "std_dev": 0.5,
+ "n_obs": 100,
+ "run_metadata": {"any": "key-value pairs"},
+ "run_notes": "Any relevant notes."
+ },
+ {
+ "benchmark_name": "Benchmark Name 2",
+ "parameter_values": {"not nullable": "Use {} if no params."},
+ "value": 8,
+ "std_dev": 1,
+ "n_obs": 2,
+ }
+ ]
+ }
+
+- The entry for "context" contains the machine, environment, and timestamp
+ information common to all of the runs
+- The entry for "benchmark_version" maps benchmark
+ names to their version strings. (Which can be a git hash,
+ the entire code string, a number, or any other string of your choice.)
+- The entry for "benchmarks" is a list of benchmark run data
+ for the given context and benchmark versions. The first example
+ benchmark run entry contains all possible values, even
+ nullable ones, and the second entry omits all nullable values.
+
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_machine_view
+-------------------
+
+:code:`ingest_machine_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ {
+ "mac_address": "0a:00:2d:01:02:03",
+ "machine_name": "Yet-Another-Machine-Name",
+ "memory_bytes": 8589934592,
+ "cpu_actual_frequency_hz": 2300000000,
+ "os_name": "OSX",
+ "architecture_name": "x86_64",
+ "kernel_name": "18.2.0",
+ "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",
+ "cpu_core_count": 2,
+ "cpu_thread_count": 4,
+ "cpu_frequency_max_hz": 2300000000,
+ "cpu_frequency_min_hz": 2300000000,
+ "cpu_l1d_cache_bytes": 32768,
+ "cpu_l1i_cache_bytes": 32768,
+ "cpu_l2_cache_bytes": 262144,
+ "cpu_l3_cache_bytes": 4194304,
+ "machine_other_attributes": {"just": "an example"},
+ "gpu_information": "",
+ "gpu_part_number": "",
+ "gpu_product_name": ""
+ }
+
+To identify which columns in "machine_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+
+.. _benchmark-views:
+
+Benchmark views
+===============
+
+
+benchmark_run_view
+------------------
+
+Each benchmark run.
+
+- Each entry is unique on the machine, environment, benchmark,
+ and git commit timestamp.
+
+=============================== =========== ======== =========== ===========
+Column Type Nullable Default Description
+=============================== =========== ======== =========== ===========
+benchmark_run_id int8 not null serial primary key
+benchmark_name citext not null unique
+benchmark_version citext not null unique
+parameter_values jsonb not null '{}'::jsonb unique
+value numeric not null
+git_commit_timestamp timestamptz not null unique
+git_hash text not null
+val_min numeric
+val_q1 numeric
+val_q3 numeric
+val_max numeric
+std_dev numeric not null
+n_obs int4 not null
+run_timestamp timestamptz not null unique
+run_metadata jsonb
+run_notes text
+mac_address macaddr not null unique
+benchmark_language citext not null unique
+language_implementation_version citext not null ''::citext unique
+dependencies jsonb not null '{}'::jsonb unique
+=============================== =========== ======== =========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+benchmark_view
+--------------
+
+The details about a particular benchmark.
+
+- "benchmark_name" is unique for a given "benchmark_language"
+- Each entry is unique on
+ ("benchmark_language", "benchmark_name", "benchmark_version")
+
+===================== ====== ======== ======= ===========
+Column Type Nullable Default Description
+===================== ====== ======== ======= ===========
+benchmark_id int4 not null serial primary key
+benchmark_name citext not null unique
+parameter_names _text
+benchmark_description text not null
+benchmark_type citext not null unique
+units citext not null unique
+lessisbetter bool not null
+benchmark_version citext not null unique
+benchmark_language citext not null unique
+===================== ====== ======== ======= ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+environment_view
+----------------
+
+The build environment used for a reported benchmark run.
+(Will be inferred from each "benchmark_run" if not explicitly added).
+
+- Each entry is unique on
+ ("benchmark_language", "language_implementation_version", "dependencies")
+- "benchmark_language" is unique in the "benchmark_language" table
+- "benchmark_language" plus "language_implementation_version" is unique in
+ the "language_implementation_version" table
+- "dependencies" is unique in the "dependencies" table
+
+=============================== ====== ======== =========== ===========
+Column Type Nullable Default Description
+=============================== ====== ======== =========== ===========
+environment_id int4 not null serial primary key
+benchmark_language citext not null unique
+language_implementation_version citext not null ''::citext unique
+dependencies jsonb not null '{}'::jsonb unique
+=============================== ====== ======== =========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+machine_view
+------------
+
+The machine environment (CPU, GPU, OS) used for each benchmark run.
+
+- "mac_address" is unique in the "machine" table
+- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table
+ Empty string (''), not null, is used for machines that won't use the GPU
+- "cpu_model_name" is unique in the "cpu" (central processing unit) table
+- "os_name", "os_architecture_name", and "os_kernel_name"
+ are unique in the "os" (operating system) table
+- "machine_other_attributes" is a key-value store for any other relevant
+ data, e.g. '{"hard_disk_type": "solid state"}'
+
+======================== ======= ======== ========== ===========
+Column Type Nullable Default Description
+======================== ======= ======== ========== ===========
+machine_id int4 not null serial primary key
+mac_address macaddr not null unique
+machine_name citext not null
+memory_bytes int8 not null
+cpu_actual_frequency_hz int8 not null
+os_name citext not null unique
+architecture_name citext not null unique
+kernel_name citext not null ''::citext unique
+cpu_model_name citext not null unique
+cpu_core_count int4 not null
+cpu_thread_count int4 not null
+cpu_frequency_max_hz int8 not null
+cpu_frequency_min_hz int8 not null
+cpu_l1d_cache_bytes int4 not null
+cpu_l1i_cache_bytes int4 not null
+cpu_l2_cache_bytes int4 not null
+cpu_l3_cache_bytes int4 not null
+gpu_information citext not null ''::citext unique
+gpu_part_number citext not null ''::citext
+gpu_product_name citext not null ''::citext
+machine_other_attributes jsonb
+======================== ======= ======== ========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
diff --git a/src/arrow/dev/benchmarking/ddl/0_setup.sql b/src/arrow/dev/benchmarking/ddl/0_setup.sql
new file mode 100644
index 000000000..ec1044641
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/0_setup.sql
@@ -0,0 +1,23 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text
+
+-- For future fine-grained control over function execution by user group.
+ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public;
diff --git a/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql
new file mode 100644
index 000000000..c52d66cfd
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql
@@ -0,0 +1,45 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- PROJECT
+CREATE TABLE IF NOT EXISTS public.project
+(
+ project_id SERIAL PRIMARY KEY
+ , project_name citext NOT NULL
+ , project_url text NOT NULL
+ , repo_url text NOT NULL
+ , last_changed timestamp (0) without time zone NOT NULL DEFAULT now()
+);
+COMMENT ON TABLE public.project
+ IS 'Project name and relevant URLs.';
+COMMENT ON COLUMN public.project.project_url
+ IS 'Homepage URL.';
+COMMENT ON COLUMN public.project.repo_url
+ IS 'Git repo URL to link stored commit hashes to code in a webpage.';
+COMMENT ON COLUMN public.project.last_changed
+ IS 'New project details are added with a new timestamp. '
+ 'The project details with the newest timestamp will be used.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX project_unique_index_on_project_name_urls
+ ON public.project(project_name, project_url, repo_url);
+COMMENT ON INDEX
+ public.project_unique_index_on_project_name_urls
+ IS 'Enforce uniqueness of project name and urls.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql
new file mode 100644
index 000000000..df1a9e757
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql
@@ -0,0 +1,63 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- CPU
+CREATE TABLE IF NOT EXISTS public.cpu
+(
+ cpu_id SERIAL PRIMARY KEY
+ , cpu_model_name citext NOT NULL UNIQUE
+ , cpu_core_count integer NOT NULL
+ , cpu_thread_count integer NOT NULL
+ , cpu_frequency_max_Hz bigint NOT NULL
+ , cpu_frequency_min_Hz bigint NOT NULL
+ , cpu_L1d_cache_bytes integer NOT NULL
+ , cpu_L1i_cache_bytes integer NOT NULL
+ , cpu_L2_cache_bytes integer NOT NULL
+ , cpu_L3_cache_bytes integer NOT NULL
+);
+COMMENT ON TABLE public.cpu
+ IS 'CPU model and its specifications.';
+COMMENT ON COLUMN public.cpu.cpu_id
+ IS 'The primary key for the CPU table. '
+ 'NOTE: This is a synthetic primary key and not meant to represent a '
+ 'processor instruction to read capabilities.';
+COMMENT ON COLUMN public.cpu.cpu_model_name
+ IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.';
+COMMENT ON COLUMN public.cpu.cpu_core_count
+ IS 'The output of `sysctl -n hw.physicalcpu`.';
+COMMENT ON COLUMN public.cpu.cpu_thread_count
+ IS 'The output of `sysctl -n hw.logicalcpu`.';
+COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency_max`.';
+COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency_min`.';
+COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes
+ IS 'The output of `sysctl -n hw.l1dcachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes
+ IS 'The output of `sysctl -n hw.l1icachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes
+ IS 'The output of `sysctl -n hw.l2cachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes
+ IS 'The output of `sysctl -n hw.l3cachesize`.';
+
+-- CONSTRAINTS
+ALTER TABLE public.cpu
+ ADD CONSTRAINT cpu_check_cpu_model_name_length
+ CHECK (char_length(cpu_model_name) < 255);
diff --git a/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql
new file mode 100644
index 000000000..564af19de
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql
@@ -0,0 +1,43 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- GPU
+CREATE TABLE IF NOT EXISTS public.gpu
+(
+ gpu_id SERIAL PRIMARY KEY
+ , gpu_information citext UNIQUE NOT NULL DEFAULT ''
+ , gpu_part_number citext NOT NULL DEFAULT ''
+ , gpu_product_name citext NOT NULL DEFAULT ''
+);
+COMMENT ON TABLE public.gpu IS 'GPU specifications.';
+COMMENT ON COLUMN public.gpu.gpu_information
+ IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` '
+ 'or `kextstat | grep -i cuda` on OSX, or another command; anything '
+ 'that gets a string to uniquely identify the GPU.';
+
+-- CONSTRAINTS
+CREATE INDEX gpu_index_on_part_number
+ ON public.gpu (gpu_part_number);
+
+CREATE INDEX gpu_index_on_product_name
+ ON public.gpu (gpu_product_name);
+
+CREATE INDEX gpu_index_on_product_name_and_part_number
+ ON public.gpu (gpu_product_name, gpu_part_number);
diff --git a/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql
new file mode 100644
index 000000000..7b03d82f4
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql
@@ -0,0 +1,57 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- OS
+CREATE TABLE IF NOT EXISTS public.os
+(
+ os_id SERIAL PRIMARY KEY
+ , os_name citext NOT NULL
+ , architecture_name citext NOT NULL
+ , kernel_name citext NOT NULL DEFAULT ''
+);
+-- @name os. forces retention of an 's' in the Graphile GraphQL api.
+COMMENT ON TABLE public.os
+ IS E'@name os.\nOperating system name and kernel (version).';
+COMMENT ON COLUMN public.os.os_name
+ IS 'Operating system name. For example, OSX, Ubuntu, Windows`.';
+COMMENT ON COLUMN public.os.architecture_name
+ IS 'Operating system architecture; the output of `uname -m`.';
+COMMENT ON COLUMN public.os.kernel_name
+ IS 'Operating system kernel, or NULL. '
+ 'On Linux/OSX, the output of `uname -r`. '
+ 'On Windows, the output of `ver`.';
+
+-- CONSTRAINTS
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_os_name_length
+ CHECK (char_length(os_name) < 63);
+
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_architecture_name_length
+ CHECK (char_length(architecture_name) < 63);
+
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_kernel_name_length
+ CHECK (char_length(kernel_name) < 63);
+
+CREATE UNIQUE INDEX os_unique_index
+ ON public.os(os_name, architecture_name, kernel_name);
+COMMENT ON INDEX public.os_unique_index
+ IS 'Enforce uniqueness of os, architecture, and kernel names.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql
new file mode 100644
index 000000000..2e3553677
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql
@@ -0,0 +1,35 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_LANGUAGE
+CREATE TABLE IF NOT EXISTS public.benchmark_language
+(
+ benchmark_language_id SERIAL PRIMARY KEY
+ , benchmark_language citext NOT NULL UNIQUE
+);
+COMMENT ON TABLE public.benchmark_language
+ IS 'The language the benchmark was written in (and presumably for).';
+COMMENT ON COLUMN public.benchmark_language.benchmark_language
+ IS 'The benchmark language. For example: Python';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_language
+ ADD CONSTRAINT benchmark_language_check_language_length
+ CHECK (char_length(benchmark_language) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql
new file mode 100644
index 000000000..3744a0c35
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql
@@ -0,0 +1,31 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- DEPENDENCIES
+CREATE TABLE IF NOT EXISTS public.dependencies
+(
+ dependencies_id SERIAL PRIMARY KEY
+ , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb
+);
+COMMENT ON TABLE public.dependencies
+ IS E'@name dependencies.\n'
+ 'A JSON object mapping dependencies to their versions.';
+COMMENT ON COLUMN public.dependencies.dependencies
+ IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql
new file mode 100644
index 000000000..f7d26e4e2
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql
@@ -0,0 +1,46 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION
+CREATE TABLE IF NOT EXISTS public.language_implementation_version
+(
+ language_implementation_version_id SERIAL
+ , language_implementation_version citext NOT NULL DEFAULT ''
+ , benchmark_language_id integer NOT NULL
+ , PRIMARY KEY (language_implementation_version_id, benchmark_language_id)
+ , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language
+);
+COMMENT ON TABLE public.language_implementation_version
+ IS 'The benchmark language implementation or compiler version, e.g. '
+ '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or '
+ '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.';
+COMMENT ON COLUMN public.language_implementation_version.language_implementation_version
+ IS 'The version number used in the benchmark environment (e.g. ''2.7'').';
+
+-- CONSTRAINTS
+ALTER TABLE public.language_implementation_version
+ ADD CONSTRAINT language_implementation_version_check_version_length
+ CHECK (char_length(language_implementation_version) < 255);
+
+CREATE UNIQUE INDEX language_implementation_version_unique_index
+ ON public.language_implementation_version
+ (benchmark_language_id, language_implementation_version);
+COMMENT ON INDEX language_implementation_version_unique_index
+ IS 'Enforce unique implementation versions for the languages.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql
new file mode 100644
index 000000000..1143cdb00
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql
@@ -0,0 +1,39 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_TYPE
+CREATE TABLE IF NOT EXISTS public.benchmark_type
+(
+ benchmark_type_id SERIAL PRIMARY KEY
+ , benchmark_type citext NOT NULL UNIQUE
+ , lessisbetter boolean NOT NULL
+);
+COMMENT ON TABLE public.benchmark_type
+ IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"';
+COMMENT ON COLUMN public.benchmark_type.benchmark_type
+ IS 'The type of units, so ''time'' for seconds, miliseconds, or '
+ '''mem'' for kilobytes, megabytes.';
+COMMENT ON COLUMN public.benchmark_type.lessisbetter
+ IS 'True if a smaller benchmark value is better.';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_type
+ ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length
+ CHECK (char_length(benchmark_type) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql
new file mode 100644
index 000000000..8f219d3e0
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql
@@ -0,0 +1,69 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- MACHINE
+CREATE TABLE IF NOT EXISTS public.machine
+(
+ machine_id SERIAL PRIMARY KEY
+ , machine_name citext NOT NULL
+ , mac_address macaddr NOT NULL
+ , memory_bytes bigint NOT NULL
+ , cpu_actual_frequency_Hz bigint NOT NULL
+ , machine_other_attributes jsonb
+ , cpu_id integer NOT NULL
+ , gpu_id integer NOT NULL
+ , os_id integer NOT NULL
+ , FOREIGN KEY (cpu_id) REFERENCES public.cpu
+ , FOREIGN KEY (gpu_id) REFERENCES public.gpu
+ , FOREIGN KEY (os_id) REFERENCES public.os
+);
+COMMENT ON TABLE public.machine
+ IS 'Unique identifiers for a machine.';
+COMMENT ON COLUMN public.machine.machine_name
+ IS 'A machine name of your choice.';
+COMMENT ON COLUMN public.machine.mac_address
+ IS 'The mac_address of a physical network interface to uniquely '
+ 'identify a computer. Postgres accepts standard formats, including '
+ '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203''';
+COMMENT ON COLUMN public.machine.memory_bytes
+ IS 'The output of `sysctl -n hw.memsize`.';
+COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency`.';
+COMMENT ON COLUMN public.machine.machine_other_attributes
+ IS 'Additional attributes of interest, as a JSON object. '
+ 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX machine_index_on_mac_address
+ ON public.machine(mac_address);
+COMMENT ON INDEX machine_index_on_mac_address
+ IS 'Enforce unique mac address';
+
+CREATE INDEX machine_index_on_cpu_id
+ ON public.machine(cpu_id);
+
+CREATE INDEX machine_index_on_gpu_id
+ ON public.machine(gpu_id);
+
+CREATE INDEX machine_index_on_os_id
+ ON public.machine(os_id);
+
+CREATE INDEX machine_index_on_cpu_gpu_os_id
+ ON public.machine(cpu_id, gpu_id, os_id);
diff --git a/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql
new file mode 100644
index 000000000..a8cf57669
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql
@@ -0,0 +1,37 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- UNIT
+CREATE TABLE IF NOT EXISTS public.unit
+(
+ unit_id SERIAL PRIMARY KEY
+ , units citext NOT NULL UNIQUE
+ , benchmark_type_id integer NOT NULL
+ , FOREIGN KEY (benchmark_type_id)
+ REFERENCES public.benchmark_type(benchmark_type_id)
+);
+COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.';
+COMMENT ON COLUMN public.unit.units
+ IS 'For example: nanoseconds, microseconds, bytes, megabytes.';
+
+-- CONSTRAINTS
+ALTER TABLE public.unit
+ ADD CONSTRAINT unit_check_units_string_length
+ CHECK (char_length(units) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql
new file mode 100644
index 000000000..e3a6d2395
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql
@@ -0,0 +1,51 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- ENVIRONMENT
+CREATE TABLE IF NOT EXISTS public.environment
+(
+ environment_id SERIAL
+ , language_implementation_version_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , dependencies_id integer NOT NULL
+ , PRIMARY KEY
+ (environment_id, benchmark_language_id, language_implementation_version_id)
+ , FOREIGN KEY
+ (benchmark_language_id)
+ REFERENCES public.benchmark_language
+ , FOREIGN KEY
+ (language_implementation_version_id, benchmark_language_id)
+ REFERENCES public.language_implementation_version(
+ language_implementation_version_id
+ , benchmark_language_id
+ )
+ , FOREIGN KEY
+ (dependencies_id)
+ REFERENCES public.dependencies
+);
+COMMENT ON TABLE public.environment
+ IS 'Identifies a build environment for a specific suite of benchmarks.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX environment_unique_index
+ ON public.environment
+ (benchmark_language_id, language_implementation_version_id, dependencies_id);
+COMMENT ON INDEX environment_unique_index
+ IS 'Enforce unique combinations of language version and dependencies.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql
new file mode 100644
index 000000000..18895823d
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql
@@ -0,0 +1,54 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK
+CREATE TABLE IF NOT EXISTS public.benchmark
+(
+ benchmark_id SERIAL
+ , benchmark_name citext NOT NULL
+ , parameter_names text[]
+ , benchmark_description text NOT NULL
+ , benchmark_version citext NOT NULL
+ , unit_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , PRIMARY KEY (benchmark_id, benchmark_language_id)
+ , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language
+ , FOREIGN KEY (unit_id) REFERENCES public.unit
+);
+COMMENT ON TABLE public.benchmark
+ IS 'Identifies an individual benchmark.';
+COMMENT ON COLUMN public.benchmark.parameter_names
+ IS 'A list of strings identifying the parameter names in the benchmark.';
+COMMENT ON COLUMN public.benchmark.benchmark_version
+ IS 'Can be any string. In Airspeed Velocity, the version is '
+ 'by default the hash of the entire code string for the benchmark.';
+
+-- CONSTRAINTS
+CREATE INDEX benchmark_index_on_benchmark_language_id
+ ON public.benchmark(benchmark_language_id);
+
+CREATE INDEX benchmark_index_on_unit_id
+ ON public.benchmark(unit_id);
+
+CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version
+ ON public.benchmark
+ (benchmark_language_id, benchmark_name, benchmark_version);
+COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version
+ IS 'Enforce uniqueness of benchmark name and version for a given language.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql
new file mode 100644
index 000000000..20b9ef0bb
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql
@@ -0,0 +1,112 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_RUN
+CREATE TABLE IF NOT EXISTS public.benchmark_run
+(
+ benchmark_run_id BIGSERIAL PRIMARY KEY
+ , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb
+ , value numeric NOT NULL
+ , git_commit_timestamp timestamp (0) with time zone NOT NULL
+ , git_hash text NOT NULL
+ , val_min numeric
+ , val_q1 numeric
+ , val_q3 numeric
+ , val_max numeric
+ , std_dev numeric NOT NULL
+ , n_obs integer NOT NULL
+ , run_timestamp timestamp (0) with time zone NOT NULL
+ , run_metadata jsonb
+ , run_notes text
+ , machine_id integer NOT NULL
+ , environment_id integer NOT NULL
+ , language_implementation_version_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , benchmark_id integer NOT NULL
+ , FOREIGN KEY (machine_id) REFERENCES public.machine
+ , FOREIGN KEY
+ (environment_id, benchmark_language_id, language_implementation_version_id)
+ REFERENCES public.environment
+ , FOREIGN KEY (benchmark_id, benchmark_language_id)
+ REFERENCES public.benchmark(benchmark_id, benchmark_language_id)
+);
+COMMENT ON TABLE public.benchmark_run
+ IS 'One run per benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.parameter_values
+ IS 'A JSON object mapping the parameter names from '
+ '"benchmark.parameter_names" to values.';
+COMMENT ON COLUMN public.benchmark_run.value
+ IS 'The average value from the benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp
+ IS 'Get this using `git show -s --date=local --format="%ci" <hash>`. '
+ 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.';
+COMMENT ON COLUMN public.benchmark_run.git_hash
+ IS 'The commit has of the codebase currently being benchmarked.';
+COMMENT ON COLUMN public.benchmark_run.val_min
+ IS 'The smallest benchmark run value for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_q1
+ IS 'The first quartile of the benchmark run values for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_q3
+ IS 'The third quartile of the benchmark run values for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_max
+ IS 'The largest benchmark run value for this run.';
+COMMENT ON COLUMN public.benchmark_run.std_dev
+ IS 'The standard deviation of the run values for this benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.n_obs
+ IS 'The number of observations for this benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.run_metadata
+ IS 'Additional metadata of interest, as a JSON object. '
+ 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.';
+COMMENT ON COLUMN public.benchmark_run.run_notes
+ IS 'Additional notes of interest, as a text string. ';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_run
+ ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative
+ CHECK (std_dev >= 0);
+
+ALTER TABLE public.benchmark_run
+ ADD CONSTRAINT benchmark_run_check_n_obs_positive
+ CHECK (n_obs > 0);
+
+CREATE INDEX benchmark_run_index_on_environment_id
+ ON public.benchmark_run(environment_id);
+
+CREATE INDEX benchmark_run_index_on_machine_id
+ ON public.benchmark_run(machine_id);
+
+CREATE INDEX benchmark_run_index_on_benchmark_id
+ ON public.benchmark_run(benchmark_id, benchmark_language_id);
+
+CREATE INDEX benchmark_run_index_on_benchmark_environment_time
+ ON public.benchmark_run
+ (benchmark_id, environment_id, git_commit_timestamp);
+COMMENT ON INDEX
+ public.benchmark_run_index_on_benchmark_environment_time
+ IS 'Index to improve sorting by benchmark, environment, and timestamp.';
+
+CREATE UNIQUE INDEX
+ benchmark_run_unique_index_on_env_benchmark_timestamp_params
+ ON public.benchmark_run
+ (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp);
+COMMENT ON INDEX
+ public.benchmark_run_unique_index_on_env_benchmark_timestamp_params
+ IS 'Enforce uniqueness of benchmark run for a given machine, '
+ 'environment, benchmark, git commit timestamp, and parameter values.';
diff --git a/src/arrow/dev/benchmarking/ddl/2_00_views.sql b/src/arrow/dev/benchmarking/ddl/2_00_views.sql
new file mode 100644
index 000000000..cbd295e50
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/2_00_views.sql
@@ -0,0 +1,324 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+-- NOTE:
+-- The function for documentation depends on view columns
+-- being named exactly the same as in the table view.
+
+-- MACHINE_VIEW
+CREATE OR REPLACE VIEW public.machine_view AS
+ SELECT
+ machine.machine_id
+ , mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ , gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ , machine_other_attributes
+ FROM public.machine AS machine
+ JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id
+ JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id
+ JOIN public.os AS os ON machine.os_id = os.os_id;
+COMMENT ON VIEW public.machine_view IS
+E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n'
+ '- "mac_address" is unique in the "machine" table\n'
+ '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n'
+ ' Empty string (''''), not null, is used for machines that won''t use the GPU\n'
+ '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n'
+ '- "os_name", "os_architecture_name", and "os_kernel_name"\n'
+ ' are unique in the "os" (operating system) table\n'
+ '- "machine_other_attributes" is a key-value store for any other relevant\n'
+ ' data, e.g. ''{"hard_disk_type": "solid state"}''';
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW
+CREATE OR REPLACE VIEW public.language_implementation_version_view AS
+ SELECT
+ lv.language_implementation_version_id
+ , bl.benchmark_language
+ , lv.language_implementation_version
+ FROM public.language_implementation_version AS lv
+ JOIN public.benchmark_language AS bl
+ ON lv.benchmark_language_id = bl.benchmark_language_id;
+
+-- ENVIRONMENT_VIEW
+CREATE OR REPLACE VIEW public.environment_view AS
+ SELECT
+ env.environment_id
+ , benchmark_language
+ , language_implementation_version
+ , dependencies
+ FROM public.environment AS env
+ JOIN public.benchmark_language AS language
+ ON env.benchmark_language_id = language.benchmark_language_id
+ JOIN public.language_implementation_version AS version
+ ON env.language_implementation_version_id = version.language_implementation_version_id
+ JOIN public.dependencies AS deps
+ ON env.dependencies_id = deps.dependencies_id;
+COMMENT ON VIEW public.environment_view IS
+E'The build environment used for a reported benchmark run.\n'
+ '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n'
+ '- Each entry is unique on\n'
+ ' ("benchmark_language", "language_implementation_version", "dependencies")\n'
+ '- "benchmark_language" is unique in the "benchmark_language" table\n'
+ '- "benchmark_language" plus "language_implementation_version" is unique in\n'
+ ' the "language_implementation_version" table\n'
+ '- "dependencies" is unique in the "dependencies" table';
+
+-- UNIT_VIEW
+CREATE OR REPLACE VIEW public.unit_view AS
+ SELECT
+ unit.unit_id
+ , units
+ , benchmark_type
+ , lessisbetter
+ FROM public.unit AS unit
+ JOIN public.benchmark_type AS bt
+ ON unit.benchmark_type_id = bt.benchmark_type_id;
+
+-- BENCHMARK_VIEW
+CREATE OR REPLACE VIEW public.benchmark_view AS
+ SELECT
+ b.benchmark_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ , benchmark_language
+ FROM public.benchmark AS b
+ JOIN public.benchmark_language AS benchmark_language
+ ON b.benchmark_language_id = benchmark_language.benchmark_language_id
+ JOIN public.unit AS unit
+ ON b.unit_id = unit.unit_id
+ JOIN public.benchmark_type AS benchmark_type
+ ON unit.benchmark_type_id = benchmark_type.benchmark_type_id;
+COMMENT ON VIEW public.benchmark_view IS
+E'The details about a particular benchmark.\n\n'
+ '- "benchmark_name" is unique for a given "benchmark_language"\n'
+ '- Each entry is unique on\n'
+ ' ("benchmark_language", "benchmark_name", "benchmark_version")';
+
+-- BENCHMARK_RUN_VIEW
+CREATE OR REPLACE VIEW public.benchmark_run_view AS
+ SELECT
+ run.benchmark_run_id
+ -- benchmark_view (name, version, language only)
+ , benchmark_name
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view (mac address only)
+ , mac_address
+ -- environment_view
+ , env.benchmark_language
+ , language_implementation_version
+ , dependencies
+ FROM public.benchmark_run AS run
+ JOIN public.benchmark_view AS benchmark
+ ON run.benchmark_id = benchmark.benchmark_id
+ JOIN public.machine_view AS machine
+ ON run.machine_id = machine.machine_id
+ JOIN public.environment_view AS env
+ ON run.environment_id = env.environment_id;
+COMMENT ON VIEW public.benchmark_run_view IS
+E'Each benchmark run.\n\n'
+ '- Each entry is unique on the machine, environment, benchmark,\n'
+ ' and git commit timestamp.';
+
+-- FULL_BENCHMARK_RUN_VIEW
+CREATE OR REPLACE VIEW public.full_benchmark_run_view AS
+ SELECT
+ run.benchmark_run_id
+ -- benchmark_view
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ , gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ , machine_other_attributes
+ -- environment_view
+ , env.benchmark_language
+ , env.language_implementation_version
+ , dependencies
+ FROM public.benchmark_run AS run
+ JOIN public.benchmark_view AS benchmark
+ ON run.benchmark_id = benchmark.benchmark_id
+ JOIN public.machine_view AS machine
+ ON run.machine_id = machine.machine_id
+ JOIN public.environment_view AS env
+ ON run.environment_id = env.environment_id;
+
+-- SUMMARIZED_TABLES_VIEW
+CREATE VIEW public.summarized_tables_view AS
+ WITH chosen AS (
+ SELECT
+ cls.oid AS id
+ , cls.relname as tbl_name
+ FROM pg_catalog.pg_class AS cls
+ JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid
+ WHERE
+ cls.relkind = 'r'
+ AND ns.nspname = 'public'
+ ), all_constraints AS (
+ SELECT
+ chosen.id AS tbl_id
+ , chosen.tbl_name
+ , unnest(conkey) AS col_id
+ , 'foreign key' AS col_constraint
+ FROM pg_catalog.pg_constraint
+ JOIN chosen ON chosen.id = conrelid
+ WHERE contype = 'f'
+
+ UNION
+
+ SELECT
+ chosen.id
+ , chosen.tbl_name
+ , unnest(indkey)
+ , 'unique'
+ FROM pg_catalog.pg_index i
+ JOIN chosen ON chosen.id = i.indrelid
+ WHERE i.indisunique AND NOT i.indisprimary
+
+ UNION
+
+ SELECT
+ chosen.id
+ , chosen.tbl_name
+ , unnest(indkey)
+ , 'primary key'
+ FROM pg_catalog.pg_index i
+ JOIN chosen on chosen.id = i.indrelid
+ WHERE i.indisprimary
+ ), gathered_constraints AS (
+ SELECT
+ tbl_id
+ , tbl_name
+ , col_id
+ , string_agg(col_constraint, ', ' ORDER BY col_constraint)
+ AS col_constraint
+ FROM all_constraints
+ GROUP BY tbl_id, tbl_name, col_id
+ )
+ SELECT
+ chosen.tbl_name AS table_name
+ , columns.attnum AS column_number
+ , columns.attname AS column_name
+ , typ.typname AS type_name
+ , CASE
+ WHEN columns.attnotnull
+ THEN 'not null'
+ ELSE ''
+ END AS nullable
+ , CASE
+ WHEN defaults.adsrc like 'nextval%'
+ THEN 'serial'
+ ELSE defaults.adsrc
+ END AS default_value
+ , CASE
+ WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL
+ THEN cnstrnt.consrc
+ WHEN cnstrnt.consrc IS NULL
+ THEN gc.col_constraint
+ ELSE gc.col_constraint || ', ' || cnstrnt.consrc
+ END AS description
+ FROM pg_catalog.pg_attribute AS columns
+ JOIN chosen ON columns.attrelid = chosen.id
+ JOIN pg_catalog.pg_type AS typ
+ ON typ.oid = columns.atttypid
+ LEFT JOIN gathered_constraints AS gc
+ ON gc.col_id = columns.attnum
+ AND gc.tbl_id = columns.attrelid
+ LEFT JOIN pg_attrdef AS defaults
+ ON defaults.adrelid = chosen.id
+ AND defaults.adnum = columns.attnum
+ LEFT JOIN pg_catalog.pg_constraint AS cnstrnt
+ ON cnstrnt.conrelid = columns.attrelid
+ AND columns.attrelid = ANY(cnstrnt.conkey)
+ WHERE
+ columns.attnum > 0
+ ORDER BY table_name, column_number;
+COMMENT ON VIEW public.summarized_tables_view
+ IS 'A summary of all columns from all tables in the public schema, '
+ ' identifying nullability, primary/foreign keys, and data type.';
diff --git a/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql
new file mode 100644
index 000000000..b10b69a4e
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql
@@ -0,0 +1,643 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- PROJECT_DETAILS
+CREATE TYPE public.type_project_details AS (
+ project_name text
+ , project_url text
+ , repo_url text
+);
+
+CREATE OR REPLACE FUNCTION public.project_details()
+RETURNS public.type_project_details AS
+$$
+ SELECT project_name, project_url, repo_url
+ FROM public.project
+ ORDER BY last_changed DESC
+ LIMIT 1
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.project_details()
+IS 'Get the current project name, url, and repo url.';
+
+
+-------------------------- GET-OR-SET FUNCTIONS --------------------------
+-- The following functions have the naming convention "get_<tablename>_id".
+-- All of them attempt to SELECT the desired row given the column
+-- values, and if it does not exist will INSERT it.
+--
+-- When functions are overloaded with fewer columns, it is to allow
+-- selection only, given columns that comprise a unique index.
+
+-- GET_CPU_ID
+CREATE OR REPLACE FUNCTION public.get_cpu_id(
+ cpu_model_name citext
+ , cpu_core_count integer
+ , cpu_thread_count integer
+ , cpu_frequency_max_Hz bigint
+ , cpu_frequency_min_Hz bigint
+ , cpu_L1d_cache_bytes integer
+ , cpu_L1i_cache_bytes integer
+ , cpu_L2_cache_bytes integer
+ , cpu_L3_cache_bytes integer
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT cpu_id INTO result FROM public.cpu AS cpu
+ WHERE cpu.cpu_model_name = $1
+ AND cpu.cpu_core_count = $2
+ AND cpu.cpu_thread_count = $3
+ AND cpu.cpu_frequency_max_Hz = $4
+ AND cpu.cpu_frequency_min_Hz = $5
+ AND cpu.cpu_L1d_cache_bytes = $6
+ AND cpu.cpu_L1i_cache_bytes = $7
+ AND cpu.cpu_L2_cache_bytes = $8
+ AND cpu.cpu_L3_cache_bytes = $9;
+
+ IF result IS NULL THEN
+ INSERT INTO public.cpu(
+ cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ )
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+ RETURNING cpu_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_cpu_id(
+ citext
+ , integer
+ , integer
+ , bigint -- cpu_frequency_max_Hz
+ , bigint -- cpu_frequency_min_Hz
+ , integer
+ , integer
+ , integer
+ , integer
+)
+IS 'Insert or select CPU data, returning "cpu.cpu_id".';
+
+-- GET_GPU_ID
+CREATE OR REPLACE FUNCTION public.get_gpu_id(
+ gpu_information citext DEFAULT NULL
+ , gpu_part_number citext DEFAULT NULL
+ , gpu_product_name citext DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT gpu_id INTO result FROM public.gpu AS gpu
+ WHERE
+ gpu.gpu_information = COALESCE($1, '')
+ AND gpu.gpu_part_number = COALESCE($2, '')
+ AND gpu.gpu_product_name = COALESCE($3, '');
+
+ IF result IS NULL THEN
+ INSERT INTO public.gpu(
+ gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ )
+ VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, ''))
+ RETURNING gpu_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext)
+IS 'Insert or select GPU data, returning "gpu.gpu_id".';
+
+-- GET_OS_ID
+CREATE OR REPLACE FUNCTION public.get_os_id(
+ os_name citext
+ , architecture_name citext
+ , kernel_name citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT os_id INTO result FROM public.os AS os
+ WHERE os.os_name = $1
+ AND os.architecture_name = $2
+ AND os.kernel_name = COALESCE($3, '');
+
+ IF result is NULL THEN
+ INSERT INTO public.os(os_name, architecture_name, kernel_name)
+ VALUES ($1, $2, COALESCE($3, ''))
+ RETURNING os_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_os_id(citext, citext, citext)
+IS 'Insert or select OS data, returning "os.os_id".';
+
+-- GET_MACHINE_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_machine_id(
+ mac_address macaddr
+ , machine_name citext
+ , memory_bytes bigint
+ , cpu_actual_frequency_Hz bigint
+ -- os
+ , os_name citext
+ , architecture_name citext
+ , kernel_name citext
+ -- cpu
+ , cpu_model_name citext
+ , cpu_core_count integer
+ , cpu_thread_count integer
+ , cpu_frequency_max_Hz bigint
+ , cpu_frequency_min_Hz bigint
+ , L1d_cache_bytes integer
+ , L1i_cache_bytes integer
+ , L2_cache_bytes integer
+ , L3_cache_bytes integer
+ -- gpu
+ , gpu_information citext DEFAULT ''
+ , gpu_part_number citext DEFAULT NULL
+ , gpu_product_name citext DEFAULT NULL
+ -- nullable machine attributes
+ , machine_other_attributes jsonb DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_cpu_id integer;
+ found_gpu_id integer;
+ found_os_id integer;
+ result integer;
+ BEGIN
+ -- Can't bypass looking up all the values because of unique constraint.
+ SELECT public.get_cpu_id(
+ cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , L1d_cache_bytes
+ , L1i_cache_bytes
+ , L2_cache_bytes
+ , L3_cache_bytes
+ ) INTO found_cpu_id;
+
+ SELECT public.get_gpu_id(
+ gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ ) INTO found_gpu_id;
+
+ SELECT public.get_os_id(
+ os_name
+ , architecture_name
+ , kernel_name
+ ) INTO found_os_id;
+
+ SELECT machine_id INTO result FROM public.machine AS m
+ WHERE m.os_id = found_os_id
+ AND m.cpu_id = found_cpu_id
+ AND m.gpu_id = found_gpu_id
+ AND m.mac_address = $1
+ AND m.machine_name = $2
+ AND m.memory_bytes = $3
+ AND m.cpu_actual_frequency_Hz = $4;
+
+ IF result IS NULL THEN
+ INSERT INTO public.machine(
+ os_id
+ , cpu_id
+ , gpu_id
+ , mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , machine_other_attributes
+ )
+ VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20)
+ RETURNING machine_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_machine_id(
+ macaddr
+ , citext
+ , bigint -- memory_bytes
+ , bigint -- cpu_frequency_actual_Hz
+ -- os
+ , citext
+ , citext
+ , citext
+ -- cpu
+ , citext
+ , integer
+ , integer
+ , bigint -- cpu_frequency_max_Hz
+ , bigint -- cpu_frequency_min_Hz
+ , integer
+ , integer
+ , integer
+ , integer
+ -- gpu
+ , citext
+ , citext
+ , citext
+ -- nullable machine attributes
+ , jsonb
+)
+IS 'Insert or select machine data, returning "machine.machine_id".';
+
+-- GET_MACHINE_ID (given unique mac_address)
+CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr)
+RETURNS integer AS
+$$
+ SELECT machine_id FROM public.machine AS m
+ WHERE m.mac_address = $1;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_machine_id(macaddr)
+IS 'Select machine_id given its mac address, returning "machine.machine_id".';
+
+-- GET_BENCHMARK_LANGUAGE_ID
+CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_language_id INTO result
+ FROM public.benchmark_language AS bl
+ WHERE bl.benchmark_language = language;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark_language(benchmark_language)
+ VALUES (language)
+ RETURNING benchmark_language_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_language_id(citext)
+IS 'Insert or select benchmark_language returning '
+ '"benchmark_language.benchmark_language_id".';
+
+-- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID
+CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id(
+ language citext
+ , language_implementation_version citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ language_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id($1) INTO language_id;
+
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = COALESCE($2, '');
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.language_implementation_version(benchmark_language_id, language_implementation_version)
+ VALUES (language_id, COALESCE($2, ''))
+ RETURNING language_implementation_version_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext)
+IS 'Insert or select language and version data, '
+ 'returning "language_implementation_version.language_implementation_version_id".';
+
+CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id(
+ -- overload for when language_id is known
+ language_id integer
+ , language_implementation_version citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = COALESCE($2, '');
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.language_implementation_version(benchmark_language_id, language_implementation_version)
+ VALUES (language_id, COALESCE($2, ''))
+ RETURNING language_implementation_version_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID
+CREATE OR REPLACE FUNCTION public.get_dependencies_id(
+ dependencies jsonb DEFAULT '{}'::jsonb
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT dependencies_id INTO result
+ FROM public.dependencies AS ldl
+ WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb);
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.dependencies(dependencies)
+ VALUES (COALESCE($1, '{}'::jsonb))
+ RETURNING dependencies_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_dependencies_id(jsonb)
+IS 'Insert or select dependencies, returning "dependencies.dependencies_id".';
+
+-- GET_ENVIRONMENT_ID
+CREATE OR REPLACE FUNCTION public.get_environment_id(
+ language citext,
+ language_implementation_version citext DEFAULT '',
+ dependencies jsonb DEFAULT '{}'::jsonb
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_language_id integer;
+ found_version_id integer;
+ found_dependencies_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id($1) INTO found_language_id;
+ SELECT
+ public.get_language_implementation_version_id(found_language_id, $2)
+ INTO found_version_id;
+ SELECT
+ public.get_dependencies_id ($3)
+ INTO found_dependencies_id;
+
+ SELECT environment_id INTO result FROM public.environment AS e
+ WHERE e.benchmark_language_id = found_language_id
+ AND e.language_implementation_version_id = found_version_id
+ AND e.dependencies_id = found_dependencies_id;
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.environment(
+ benchmark_language_id
+ , language_implementation_version_id
+ , dependencies_id
+ )
+ VALUES (found_language_id, found_version_id, found_dependencies_id)
+ RETURNING environment_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb)
+IS 'Insert or select language, language version, and dependencies, '
+ 'returning "environment.environment_id".';
+
+-- GET_BENCHMARK_TYPE_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_benchmark_type_id(
+ benchmark_type citext
+ , lessisbetter boolean
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt
+ WHERE bt.benchmark_type = $1
+ AND bt.lessisbetter = $2;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark_type(benchmark_type, lessisbetter)
+ VALUES($1, $2)
+ RETURNING benchmark_type_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean)
+IS 'Insert or select benchmark type and lessisbetter, '
+ 'returning "benchmark_type.benchmark_type_id".';
+
+-- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only)
+CREATE OR REPLACE FUNCTION public.get_benchmark_type_id(
+ benchmark_type citext
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt
+ WHERE bt.benchmark_type = $1;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_type_id(citext)
+IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), '
+ 'returning "benchmark_type.benchmark_type_id".';
+
+-- GET_UNIT_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_unit_id(
+ benchmark_type citext
+ , units citext
+ , lessisbetter boolean DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_benchmark_type_id integer;
+ result integer;
+ BEGIN
+
+ IF ($3 IS NOT NULL) -- if lessisbetter is not null
+ THEN
+ SELECT public.get_benchmark_type_id($1, $3)
+ INTO found_benchmark_type_id;
+ ELSE
+ SELECT public.get_benchmark_type_id($1)
+ INTO found_benchmark_type_id;
+ END IF;
+
+ SELECT unit_id INTO result FROM public.unit AS u
+ WHERE u.benchmark_type_id = found_benchmark_type_id
+ AND u.units = $2;
+
+ IF result IS NULL THEN
+ INSERT INTO public.unit(benchmark_type_id, units)
+ VALUES(found_benchmark_type_id, $2)
+ RETURNING unit_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean)
+IS 'Insert or select benchmark type (e.g. ''time''), '
+ 'units string (e.g. ''miliseconds''), '
+ 'and "lessisbetter" (true if smaller benchmark values are better), '
+ 'returning "unit.unit_id".';
+
+-- GET_UNIT_ID (given unique units string only)
+CREATE OR REPLACE FUNCTION public.get_unit_id(units citext)
+RETURNS integer AS
+$$
+ SELECT unit_id FROM public.unit AS u
+ WHERE u.units = units;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_unit_id(citext)
+IS 'Select unit_id given unit name, returning "unit.unit_id".';
+
+-- GET_BENCHMARK_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_benchmark_id(
+ benchmark_language citext
+ , benchmark_name citext
+ , parameter_names text[]
+ , benchmark_description text
+ , benchmark_version citext
+ , benchmark_type citext
+ , units citext
+ , lessisbetter boolean
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_benchmark_language_id integer;
+ found_unit_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id(
+ benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_unit_id(
+ benchmark_type
+ , units
+ , lessisbetter
+ ) INTO found_unit_id;
+
+ SELECT benchmark_id INTO result FROM public.benchmark AS b
+ WHERE b.benchmark_language_id = found_benchmark_language_id
+ AND b.benchmark_name = $2
+ -- handle nullable "parameter_names"
+ AND b.parameter_names IS NOT DISTINCT FROM $3
+ AND b.benchmark_description = $4
+ AND b.benchmark_version = $5
+ AND b.unit_id = found_unit_id;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark(
+ benchmark_language_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_version
+ , unit_id
+ )
+ VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id)
+ RETURNING benchmark_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_id(
+ citext
+ , citext
+ , text[]
+ , text
+ , citext
+ , citext
+ , citext
+ , boolean
+)
+IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".';
+
+-- GET_BENCHMARK_ID (by unique columns)
+CREATE OR REPLACE FUNCTION public.get_benchmark_id(
+ benchmark_language citext
+ , benchmark_name citext
+ , benchmark_version citext
+)
+RETURNS integer AS
+$$
+ WITH language AS (
+ SELECT public.get_benchmark_language_id(benchmark_language) AS id
+ )
+ SELECT b.benchmark_id
+ FROM public.benchmark AS b
+ JOIN language ON b.benchmark_language_id = language.id
+ WHERE b.benchmark_name = benchmark_name
+ AND benchmark_version = benchmark_version
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext)
+IS 'Select existing benchmark given unique columns, '
+ 'returning "benchmark.benchmark_id".';
diff --git a/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql
new file mode 100644
index 000000000..b6ce4741a
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql
@@ -0,0 +1,574 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-------------------------- TRIGGER FUNCTIONS --------------------------
+-- Views that do not select from a single table or view are not
+-- automatically updatable. These trigger functions are intended
+-- to be run instead of INSERT into the complicated views.
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ language_id integer;
+ result integer;
+ BEGIN
+ IF NEW.benchmark_language IS NULL THEN
+ RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.';
+ END IF;
+ IF NEW.language_implementation_version IS NULL THEN
+ RAISE EXCEPTION
+ 'Column "language_implementation_version" cannot be NULL (use '''' instead).';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(NEW.benchmark_language)
+ INTO language_id;
+
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = NEW.language_implementation_version;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO
+ public.language_implementation_version(
+ benchmark_language_id
+ , language_implementation_version
+ )
+ VALUES (language_id, NEW.language_implementation_version)
+ RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- ENVIRONMENT_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.environment_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_language_id integer;
+ found_version_id integer;
+ found_dependencies_id integer;
+ result integer;
+ BEGIN
+ IF NEW.benchmark_language IS NULL
+ THEN
+ RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.';
+ END IF;
+ IF NEW.language_implementation_version IS NULL THEN
+ RAISE EXCEPTION
+ 'Column "language_implementation_version" cannot be NULL (use '''' instead).';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(NEW.benchmark_language)
+ INTO found_language_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_language_id
+ , NEW.language_implementation_version
+ )
+ INTO found_version_id;
+
+ SELECT public.get_dependencies_id(NEW.dependencies)
+ INTO found_dependencies_id;
+
+ SELECT environment_id INTO result FROM public.environment AS e
+ WHERE e.benchmark_language_id = found_language_id
+ AND e.language_implementation_version_id = found_version_id
+ AND e.dependencies_id = found_dependencies_id;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO
+ public.environment(
+ benchmark_language_id
+ , language_implementation_version_id
+ , dependencies_id
+ )
+ VALUES (found_language_id, found_version_id, found_dependencies_id)
+ RETURNING environment_id INTO NEW.environment_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- MACHINE_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.machine_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_cpu_id integer;
+ found_gpu_id integer;
+ found_os_id integer;
+ result integer;
+ BEGIN
+ IF (
+ NEW.machine_name IS NULL
+ OR NEW.memory_bytes IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.cpu_thread_count IS NULL
+ OR NEW.cpu_frequency_max_Hz IS NULL
+ OR NEW.cpu_frequency_min_Hz IS NULL
+ OR NEW.cpu_L1d_cache_bytes IS NULL
+ OR NEW.cpu_L1i_cache_bytes IS NULL
+ OR NEW.cpu_L2_cache_bytes IS NULL
+ OR NEW.cpu_L3_cache_bytes IS NULL
+ OR NEW.os_name IS NULL
+ OR NEW.architecture_name IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. '
+ 'all columns in table "gpu" will default to the empty string '''', '
+ 'as will blank "os.kernel_name". This is to allow uniqueness '
+ 'constraints to work. Thank you!.';
+ END IF;
+
+ SELECT public.get_cpu_id(
+ NEW.cpu_model_name
+ , NEW.cpu_core_count
+ , NEW.cpu_thread_count
+ , NEW.cpu_frequency_max_Hz
+ , NEW.cpu_frequency_min_Hz
+ , NEW.cpu_L1d_cache_bytes
+ , NEW.cpu_L1i_cache_bytes
+ , NEW.cpu_L2_cache_bytes
+ , NEW.cpu_L3_cache_bytes
+ ) INTO found_cpu_id;
+
+ SELECT public.get_gpu_id(
+ NEW.gpu_information
+ , NEW.gpu_part_number
+ , NEW.gpu_product_name
+ ) INTO found_gpu_id;
+
+ SELECT public.get_os_id(
+ NEW.os_name
+ , NEW.architecture_name
+ , NEW.kernel_name
+ ) INTO found_os_id;
+
+ SELECT machine_id INTO result FROM public.machine AS m
+ WHERE m.os_id = found_os_id
+ AND m.cpu_id = found_cpu_id
+ AND m.gpu_id = found_gpu_id
+ AND m.machine_name = NEW.machine_name
+ AND m.memory_bytes = NEW.memory_bytes
+ AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.machine(
+ os_id
+ , cpu_id
+ , gpu_id
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , machine_other_attributes
+ )
+ VALUES (
+ found_os_id
+ , found_cpu_id
+ , found_gpu_id
+ , NEW.machine_name
+ , NEW.mac_address
+ , NEW.memory_bytes
+ , NEW.cpu_actual_frequency_Hz
+ , NEW.machine_other_attributes
+ )
+ RETURNING machine_id INTO NEW.machine_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- UNIT_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.unit_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_type_id integer;
+ result integer;
+ BEGIN
+ IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL)
+ THEN
+ RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n'
+ 'Further, if the "benchmark_type" has never been defined, '
+ '"lessisbetter" must be defined or there will be an error.';
+ END IF;
+
+ -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists.
+ SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter)
+ INTO found_benchmark_type_id;
+
+ SELECT unit_id INTO result FROM public.unit AS u
+ WHERE u.benchmark_type_id = found_benchmark_type_id
+ AND u.units = NEW.units;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.unit (
+ benchmark_type_id
+ , units
+ )
+ VALUES (
+ found_benchmark_type_id
+ , NEW.units
+ )
+ RETURNING unit_id INTO NEW.unit_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- BENCHMARK_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_language_id integer;
+ found_units_id integer;
+ result integer;
+ BEGIN
+ IF (
+ NEW.benchmark_name IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ OR NEW.benchmark_type IS NULL
+ OR NEW.benchmark_description IS NULL
+ OR NEW.units IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'The only nullable column in this view is '
+ '"benchmark.parameter_names".';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_unit_id(NEW.units) INTO found_units_id;
+
+ SELECT benchmark_id INTO result FROM public.benchmark AS b
+ WHERE b.benchmark_language_id = found_benchmark_language_id
+ AND b.benchmark_name = NEW.benchmark_name
+ -- handle nullable "parameter_names"
+ AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names
+ AND b.benchmark_description = NEW.benchmark_description
+ AND b.benchmark_version = NEW.benchmark_version
+ AND b.unit_id = found_units_id;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.benchmark(
+ benchmark_language_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_version
+ , unit_id
+ )
+ VALUES (
+ found_benchmark_language_id
+ , NEW.benchmark_name
+ , NEW.parameter_names
+ , NEW.benchmark_description
+ , NEW.benchmark_version
+ , found_units_id
+ )
+ RETURNING benchmark_id INTO NEW.benchmark_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- BENCHMARK_RUN_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_id integer;
+ found_benchmark_language_id integer;
+ found_machine_id integer;
+ found_environment_id integer;
+ found_language_implementation_version_id integer;
+ BEGIN
+ IF (
+ NEW.benchmark_name IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ OR NEW.value IS NULL
+ OR NEW.run_timestamp IS NULL
+ OR NEW.git_commit_timestamp IS NULL
+ OR NEW.git_hash IS NULL
+ OR NEW.language_implementation_version IS NULL
+ OR NEW.mac_address IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'Only the following columns can be NULL: '
+ '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".';
+ END IF;
+
+ SELECT public.get_benchmark_id(
+ NEW.benchmark_language
+ , NEW.benchmark_name
+ , NEW.benchmark_version
+ ) INTO found_benchmark_id;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_machine_id(
+ NEW.mac_address
+ ) INTO found_machine_id;
+
+ SELECT public.get_environment_id(
+ NEW.benchmark_language
+ , NEW.language_implementation_version
+ , NEW.dependencies
+ ) INTO found_environment_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_benchmark_language_id,
+ NEW.language_implementation_version
+ ) INTO found_language_implementation_version_id;
+
+ INSERT INTO public.benchmark_run (
+ parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ , machine_id
+ , benchmark_language_id
+ , language_implementation_version_id
+ , environment_id
+ , benchmark_id
+ )
+ VALUES (
+ COALESCE(NEW.parameter_values, '{}'::jsonb)
+ , NEW.value
+ , NEW.git_commit_timestamp
+ , NEW.git_hash
+ , NEW.val_min
+ , NEW.val_q1
+ , NEW.val_q3
+ , NEW.val_max
+ , NEW.std_dev
+ , NEW.n_obs
+ , NEW.run_timestamp
+ , NEW.run_metadata
+ , NEW.run_notes
+ , found_machine_id
+ , found_benchmark_language_id
+ , found_language_implementation_version_id
+ , found_environment_id
+ , found_benchmark_id
+ ) returning benchmark_run_id INTO NEW.benchmark_run_id;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_id integer;
+ found_benchmark_language_id integer;
+ found_machine_id integer;
+ found_environment_id integer;
+ found_language_implementation_version_id integer;
+ BEGIN
+ IF (
+ NEW.value IS NULL
+ OR NEW.git_hash IS NULL
+ OR NEW.git_commit_timestamp IS NULL
+ OR NEW.run_timestamp IS NULL
+ -- benchmark
+ OR NEW.benchmark_name IS NULL
+ OR NEW.benchmark_description IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ -- unit
+ OR NEW.benchmark_type IS NULL
+ OR NEW.units IS NULL
+ OR NEW.lessisbetter IS NULL
+ -- machine
+ OR NEW.machine_name IS NULL
+ OR NEW.memory_bytes IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.os_name IS NULL
+ OR NEW.architecture_name IS NULL
+ OR NEW.kernel_name IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.cpu_thread_count IS NULL
+ OR NEW.cpu_frequency_max_Hz IS NULL
+ OR NEW.cpu_frequency_min_Hz IS NULL
+ OR NEW.cpu_L1d_cache_bytes IS NULL
+ OR NEW.cpu_L1i_cache_bytes IS NULL
+ OR NEW.cpu_L2_cache_bytes IS NULL
+ OR NEW.cpu_L3_cache_bytes IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'Only the following columns can be NULL: '
+ '"machine_other_attributes", "parameter_names", "val_min", '
+ '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". '
+ 'If "gpu_information", "gpu_part_number", "gpu_product_name", or '
+ '"kernel_name" are null, they will be silently turned into an '
+ 'empty string ('''').';
+ END IF;
+
+ SELECT public.get_benchmark_id(
+ NEW.benchmark_language
+ , NEW.benchmark_name
+ , NEW.parameter_names
+ , NEW.benchmark_description
+ , NEW.benchmark_version
+ , NEW.benchmark_type
+ , NEW.units
+ , NEW.lessisbetter
+ ) INTO found_benchmark_id;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_machine_id(
+ NEW.mac_address
+ , NEW.machine_name
+ , NEW.memory_bytes
+ , NEW.cpu_actual_frequency_Hz
+ -- os
+ , NEW.os_name
+ , NEW.architecture_name
+ , NEW.kernel_name
+ -- cpu
+ , NEW.cpu_model_name
+ , NEW.cpu_core_count
+ , NEW.cpu_thread_count
+ , NEW.cpu_frequency_max_Hz
+ , NEW.cpu_frequency_min_Hz
+ , NEW.cpu_L1d_cache_bytes
+ , NEW.cpu_L1i_cache_bytes
+ , NEW.cpu_L2_cache_bytes
+ , NEW.cpu_L3_cache_bytes
+ -- gpu
+ , NEW.gpu_information
+ , NEW.gpu_part_number
+ , NEW.gpu_product_name
+ -- nullable machine attributes
+ , NEW.machine_other_attributes
+ ) INTO found_machine_id;
+
+ SELECT public.get_environment_id(
+ NEW.benchmark_language
+ , NEW.language_implementation_version
+ , NEW.dependencies
+ ) INTO found_environment_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_benchmark_language_id,
+ NEW.language_implementation_version
+ ) INTO found_language_implementation_version_id;
+
+ INSERT INTO public.benchmark_run (
+ parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ , machine_id
+ , benchmark_language_id
+ , language_implementation_version_id
+ , environment_id
+ , benchmark_id
+ )
+ VALUES (
+ NEW.parameter_values
+ , NEW.value
+ , NEW.git_commit_timestamp
+ , NEW.git_hash
+ , NEW.val_min
+ , NEW.val_q1
+ , NEW.val_q3
+ , NEW.val_max
+ , NEW.std_dev
+ , NEW.n_obs
+ , NEW.run_timestamp
+ , NEW.run_metadata
+ , NEW.run_notes
+ , found_machine_id
+ , found_benchmark_language_id
+ , found_language_implementation_version_id
+ , found_environment_id
+ , found_benchmark_id
+ ) returning benchmark_run_id INTO NEW.benchmark_run_id;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
diff --git a/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql
new file mode 100644
index 000000000..000c61d00
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql
@@ -0,0 +1,323 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-------------------------- IMPORT HELPERS --------------------------
+-- Load from JSON (from https://stackoverflow.com/a/48396608)
+-- How to use it in the psql client:
+-- \set content `cat /examples/machine.json`
+-- select ingest_machine(:'content'::jsonb);
+-- INGEST_MACHINE_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ INSERT INTO public.machine_view
+ SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb)
+ RETURNING machine_id INTO result;
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' {\n'
+ ' "mac_address": "0a:00:2d:01:02:03",\n'
+ ' "machine_name": "Yet-Another-Machine-Name",\n'
+ ' "memory_bytes": 8589934592,\n'
+ ' "cpu_actual_frequency_hz": 2300000000,\n'
+ ' "os_name": "OSX",\n'
+ ' "architecture_name": "x86_64",\n'
+ ' "kernel_name": "18.2.0",\n'
+ ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n'
+ ' "cpu_core_count": 2,\n'
+ ' "cpu_thread_count": 4,\n'
+ ' "cpu_frequency_max_hz": 2300000000,\n'
+ ' "cpu_frequency_min_hz": 2300000000,\n'
+ ' "cpu_l1d_cache_bytes": 32768,\n'
+ ' "cpu_l1i_cache_bytes": 32768,\n'
+ ' "cpu_l2_cache_bytes": 262144,\n'
+ ' "cpu_l3_cache_bytes": 4194304,\n'
+ ' "machine_other_attributes": {"just": "an example"},\n'
+ ' "gpu_information": "",\n'
+ ' "gpu_part_number": "",\n'
+ ' "gpu_product_name": ""\n'
+ ' }\n\n'
+ 'To identify which columns in "machine_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb)
+RETURNS setof integer AS
+$$
+ BEGIN
+ RETURN QUERY
+ INSERT INTO public.benchmark_view
+ SELECT * FROM jsonb_populate_recordset(
+ null::public.benchmark_view
+ , from_jsonb
+ )
+ RETURNING benchmark_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 1",\n'
+ ' "parameter_names": ["arg0", "arg1", "arg2"],\n'
+ ' "benchmark_description": "First benchmark",\n'
+ ' "benchmark_type": "Time",\n'
+ ' "units": "miliseconds",\n'
+ ' "lessisbetter": true,\n'
+ ' "benchmark_version": "second version",\n'
+ ' "benchmark_language": "Python"\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "parameter_names": ["arg0", "arg1"],\n'
+ ' "benchmark_description": "Description 2.",\n'
+ ' "benchmark_type": "Time",\n'
+ ' "units": "nanoseconds",\n'
+ ' "lessisbetter": true,\n'
+ ' "benchmark_version": "second version",\n'
+ ' "benchmark_language": "Python"\n'
+ ' }\n'
+ ' ]\n\n'
+ 'To identify which columns in "benchmark_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_RUN_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb)
+RETURNS setof bigint AS
+$$
+ BEGIN
+ RETURN QUERY
+ INSERT INTO public.benchmark_run_view
+ SELECT * FROM
+ jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb)
+ RETURNING benchmark_run_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "benchmark_version": "version 0",\n'
+ ' "parameter_values": {"arg0": 100, "arg1": 5},\n'
+ ' "value": 2.5,\n'
+ ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n'
+ ' "git_hash": "324d3cf198444a",\n'
+ ' "val_min": 1,\n'
+ ' "val_q1": 2,\n'
+ ' "val_q3": 3,\n'
+ ' "val_max": 4,\n'
+ ' "std_dev": 1.41,\n'
+ ' "n_obs": 8,\n'
+ ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 2.7",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "benchmark_version": "version 0",\n'
+ ' "parameter_values": {"arg0": 1000, "arg1": 5},\n'
+ ' "value": 5,\n'
+ ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n'
+ ' "git_hash": "324d3cf198444a",\n'
+ ' "std_dev": 3.14,\n'
+ ' "n_obs": 8,\n'
+ ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 2.7",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n'
+ ' }\n'
+ ' ]\n'
+ 'To identify which columns in "benchmark_run_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_RUNS_WITH_CONTEXT
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb)
+RETURNS setof bigint AS
+$$
+ DECLARE
+ context_jsonb jsonb;
+ found_environment_id integer;
+ found_machine_id integer;
+ BEGIN
+ SELECT from_jsonb -> 'context' INTO context_jsonb;
+
+ SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr)
+ INTO found_machine_id;
+
+ SELECT get_environment_id(
+ (context_jsonb ->> 'benchmark_language')::citext
+ , (context_jsonb ->> 'language_implementation_version')::citext
+ , context_jsonb -> 'dependencies'
+ ) INTO found_environment_id;
+
+ RETURN QUERY
+ WITH run_datum AS (
+ SELECT *
+ FROM jsonb_to_recordset(from_jsonb -> 'benchmarks')
+ AS x(
+ benchmark_name citext
+ , parameter_values jsonb
+ , value numeric
+ , val_min numeric
+ , val_q1 numeric
+ , val_q3 numeric
+ , val_max numeric
+ , std_dev numeric
+ , n_obs integer
+ , run_timestamp timestamp (0) with time zone
+ , run_metadata jsonb
+ , run_notes text
+ )
+ ), benchmark_name_and_id AS (
+ SELECT
+ key AS benchmark_name
+ , public.get_benchmark_id(
+ (context_jsonb ->> 'benchmark_language')::citext
+ , key::citext -- benchmark_name
+ , value::citext -- benchmark_version
+ ) AS benchmark_id
+ FROM jsonb_each_text(from_jsonb -> 'benchmark_version')
+ )
+ INSERT INTO public.benchmark_run (
+ benchmark_id
+ -- run_datum
+ , parameter_values
+ , value
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_metadata
+ , run_notes
+ -- additional context information
+ , git_commit_timestamp
+ , git_hash
+ , run_timestamp
+ -- machine
+ , machine_id
+ -- environment
+ , environment_id
+ , language_implementation_version_id
+ , benchmark_language_id
+ )
+ SELECT
+ b.benchmark_id
+ -- run_datum
+ , run_datum.parameter_values
+ , run_datum.value
+ , run_datum.val_min
+ , run_datum.val_q1
+ , run_datum.val_q3
+ , run_datum.val_max
+ , run_datum.std_dev
+ , run_datum.n_obs
+ , run_datum.run_metadata
+ , run_datum.run_notes
+ -- additional context information
+ , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone
+ , context_jsonb ->> 'git_hash'
+ , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone
+ -- machine
+ , found_machine_id
+ -- environment
+ , e.environment_id
+ , e.language_implementation_version_id
+ , e.benchmark_language_id
+ FROM run_datum
+ JOIN public.environment AS e
+ ON e.environment_id = found_environment_id
+ JOIN benchmark_name_and_id AS b
+ ON b.benchmark_name = run_datum.benchmark_name
+ RETURNING benchmark_run_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'The object contains three key-value pairs::\n\n'
+ ' {"context": {\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 3.6",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n'
+ ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n'
+ ' "git_hash": "123456789abcde",\n'
+ ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n'
+ ' "extra stuff": "does not hurt anything and will not be added."\n'
+ ' },\n'
+ ' "benchmark_version": {\n'
+ ' "Benchmark Name 1": "Any string can be a version.",\n'
+ ' "Benchmark Name 2": "A git hash can be a version.",\n'
+ ' "An Unused Benchmark Name": "Will be ignored."\n'
+ ' },\n'
+ ' "benchmarks": [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark Name 1",\n'
+ ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n'
+ ' "value": 42,\n'
+ ' "val_min": 41.2,\n'
+ ' "val_q1": 41.5,\n'
+ ' "val_q3": 42.5,\n'
+ ' "val_max": 42.8,\n'
+ ' "std_dev": 0.5,\n'
+ ' "n_obs": 100,\n'
+ ' "run_metadata": {"any": "key-value pairs"},\n'
+ ' "run_notes": "Any relevant notes."\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark Name 2",\n'
+ ' "parameter_values": {"not nullable": "Use {} if no params."},\n'
+ ' "value": 8,\n'
+ ' "std_dev": 1,\n'
+ ' "n_obs": 2,\n'
+ ' }\n'
+ ' ]\n'
+ ' }\n\n'
+ '- The entry for "context" contains the machine, environment, and timestamp\n'
+ ' information common to all of the runs\n'
+ '- The entry for "benchmark_version" maps benchmark\n'
+ ' names to their version strings. (Which can be a git hash,\n'
+ ' the entire code string, a number, or any other string of your choice.)\n'
+ '- The entry for "benchmarks" is a list of benchmark run data\n'
+ ' for the given context and benchmark versions. The first example\n'
+ ' benchmark run entry contains all possible values, even\n'
+ ' nullable ones, and the second entry omits all nullable values.\n\n';
diff --git a/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql
new file mode 100644
index 000000000..6b2a05790
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql
@@ -0,0 +1,395 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- _DOCUMENTATION_INGESTION
+CREATE OR REPLACE FUNCTION public._documentation_ingestion()
+RETURNS text AS
+$$
+ WITH ingestion_docs AS (
+ SELECT
+ proname || E'\n'
+ || rpad('', character_length(proname), '-')
+ || E'\n\n:code:`'
+ || proname || '('
+ || string_agg(a.argname || ' ' || typname , ', ')
+ || E')`\n\n'
+ || description
+ || E'\n\n\nback to `Benchmark data model <benchmark-data-model>`_\n'
+ AS docs
+ FROM pg_catalog.pg_proc
+ JOIN pg_catalog.pg_namespace
+ ON nspname='public'
+ AND pg_namespace.oid = pronamespace
+ AND proname LIKE '%ingest%'
+ JOIN pg_catalog.pg_description
+ ON pg_description.objoid=pg_proc.oid,
+ LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype)
+ JOIN pg_catalog.pg_type
+ ON pg_type.oid = a.argtype
+ GROUP BY proname, description
+ )
+ SELECT
+ string_agg(docs, E'\n\n') AS docs
+ FROM ingestion_docs;
+$$
+LANGUAGE sql STABLE;
+
+-- _DOCUMENTATION_VIEW_DETAILS
+CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext)
+RETURNS TABLE(
+ column_name name
+ , type_name name
+ , nullable text
+ , default_value text
+ , description text
+) AS
+$$
+ WITH view_columns AS (
+ SELECT
+ attname AS column_name
+ , attnum AS column_order
+ FROM pg_catalog.pg_attribute
+ WHERE attrelid=view_name::regclass
+ )
+ SELECT
+ t.column_name
+ , type_name
+ , coalesce(nullable, '')
+ , coalesce(default_value, '')
+ , coalesce(description, '')
+ FROM public.summarized_tables_view AS t
+ JOIN view_columns AS v ON v.column_name = t.column_name
+ WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id'
+ ORDER BY column_order;
+$$
+LANGUAGE sql STABLE;
+
+
+-- _DOCUMENTATION_VIEW_PIECES
+CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext)
+RETURNS TABLE (rst_formatted text)
+AS
+$$
+DECLARE
+ column_length integer;
+ type_length integer;
+ nullable_length integer;
+ default_length integer;
+ description_length integer;
+ sep text;
+ border text;
+BEGIN
+
+ -- All of the hard-coded constants here are the string length of the table
+ -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description'
+ SELECT greatest(6, max(character_length(column_name)))
+ FROM public._documentation_view_details(view_name) INTO column_length;
+
+ SELECT greatest(4, max(character_length(type_name)))
+ FROM public._documentation_view_details(view_name) INTO type_length;
+
+ SELECT greatest(8, max(character_length(nullable)))
+ FROM public._documentation_view_details(view_name) INTO nullable_length;
+
+ SELECT greatest(7, max(character_length(default_value)))
+ FROM public._documentation_view_details(view_name) INTO default_length;
+
+ SELECT greatest(11, max(character_length(description)))
+ FROM public._documentation_view_details(view_name) INTO description_length;
+
+ SELECT ' ' INTO sep;
+
+ SELECT
+ concat_ws(sep
+ , rpad('', column_length, '=')
+ , rpad('', type_length, '=')
+ , rpad('', nullable_length, '=')
+ , rpad('', default_length, '=')
+ , rpad('', description_length, '=')
+ )
+ INTO border;
+
+ RETURN QUERY
+ SELECT
+ border
+ UNION ALL
+ SELECT
+ concat_ws(sep
+ , rpad('Column', column_length, ' ')
+ , rpad('Type', type_length, ' ')
+ , rpad('Nullable', nullable_length, ' ')
+ , rpad('Default', default_length, ' ')
+ , rpad('Description', description_length, ' ')
+ )
+ UNION ALL
+ SELECT border
+ UNION ALL
+ SELECT
+ concat_ws(sep
+ , rpad(v.column_name, column_length, ' ')
+ , rpad(v.type_name, type_length, ' ')
+ , rpad(v.nullable, nullable_length, ' ')
+ , rpad(v.default_value, default_length, ' ')
+ , rpad(v.description, description_length, ' ')
+ )
+ FROM public._documentation_view_details(view_name) AS v
+ UNION ALL
+ SELECT border;
+
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- DOCUMENTATION_FOR
+CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext)
+RETURNS text AS
+$$
+ DECLARE
+ view_description text;
+ view_table_markup text;
+ BEGIN
+ SELECT description FROM pg_catalog.pg_description
+ WHERE pg_description.objoid = view_name::regclass
+ INTO view_description;
+
+ SELECT
+ view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' ||
+ view_description || E'\n\n' ||
+ string_agg(rst_formatted, E'\n')
+ INTO view_table_markup
+ FROM public._documentation_view_pieces(view_name);
+
+ RETURN view_table_markup;
+ END
+$$
+LANGUAGE plpgsql STABLE;
+COMMENT ON FUNCTION public.documentation_for(citext)
+IS E'Create an ".rst"-formatted table describing a specific view.\n'
+ 'Example: SELECT public.documentation_for(''endpoint'');';
+
+
+-- DOCUMENTATION
+CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text)
+RETURNS TABLE (full_text text) AS
+$$
+ WITH v AS (
+ SELECT
+ public.documentation_for(relname::citext)
+ || E'\n\nback to `Benchmark data model <benchmark-data-model>`_\n'
+ AS view_documentation
+ FROM pg_catalog.pg_trigger
+ JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid
+ WHERE NOT tgisinternal
+ )
+ SELECT
+ E'\n.. _benchmark-data-model:\n\n'
+ 'Benchmark data model\n'
+ '====================\n\n\n'
+ '.. graphviz:: '
+ || dotfile_name
+ || E'\n\n\n.. _benchmark-ingestion:\n\n'
+ 'Benchmark ingestion helper functions\n'
+ '====================================\n\n'
+ || public._documentation_ingestion()
+ || E'\n\n\n.. _benchmark-views:\n\n'
+ 'Benchmark views\n'
+ '===============\n\n\n'
+ || string_agg(v.view_documentation, E'\n')
+ FROM v
+ GROUP BY True;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.documentation(text)
+IS E'Create an ".rst"-formatted file that shows the columns in '
+ 'every insertable view in the "public" schema.\n'
+ 'The text argument is the name of the generated dotfile to be included.\n'
+ 'Example: SELECT public.documentation(''data_model.dot'');';
+
+
+-- _DOCUMENTATION_DOTFILE_NODE_FOR
+CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name)
+RETURNS text AS
+$$
+DECLARE
+ result text;
+BEGIN
+ WITH node AS (
+ SELECT
+ tablename::text AS lines
+ UNION ALL
+ SELECT
+ E'[label = \n'
+ ' <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">'
+ UNION ALL
+ -- table name
+ SELECT
+ ' <tr><td border="0"><font point-size="14">'
+ || tablename
+ || '</font></td></tr>'
+ UNION ALL
+ -- primary keys
+ SELECT
+ ' <tr><td port="' || column_name || '"><b>'
+ || column_name
+ || ' (pk)</b></td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND description LIKE '%primary key%'
+ UNION ALL
+ -- columns
+ SELECT
+ ' <tr><td>'
+ || column_name
+ || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END
+ || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END
+ || '</td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND (description IS NULL OR description not like '%key%')
+ UNION ALL
+ -- foreign keys
+ SELECT
+ ' <tr><td port="' || column_name || '">'
+ || column_name
+ || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END
+ || ' (fk) </td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND description LIKE '%foreign key%'
+ AND description NOT LIKE '%primary key%'
+ UNION ALL
+ SELECT
+ E' </table>>\n];'
+ )
+ SELECT
+ string_agg(lines, E'\n')
+ INTO result
+ FROM node;
+
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- _DOCUMENTATION_DOTFILE_EDGES
+CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges()
+RETURNS text AS
+$$
+DECLARE
+ result text;
+BEGIN
+ WITH relationship AS (
+ SELECT
+ conrelid AS fk_table_id
+ , confrelid AS pk_table_id
+ , unnest(conkey) AS fk_colnum
+ , unnest(confkey) AS pk_colnum
+ FROM pg_catalog.pg_constraint
+ WHERE confkey IS NOT NULL
+ AND connamespace='public'::regnamespace
+ ), all_edges AS (
+ SELECT
+ fk_tbl.relname || ':' || fk_col.attname
+ || ' -> '
+ || pk_tbl.relname || ':' || pk_col.attname
+ || ';' AS lines
+ FROM relationship
+ -- foreign key table + column
+ JOIN pg_catalog.pg_attribute AS fk_col
+ ON fk_col.attrelid = relationship.fk_table_id
+ AND fk_col.attnum = relationship.fk_colnum
+ JOIN pg_catalog.pg_class AS fk_tbl
+ ON fk_tbl.oid = relationship.fk_table_id
+ -- primary key table + column
+ JOIN pg_catalog.pg_attribute AS pk_col
+ ON pk_col.attrelid = relationship.pk_table_id
+ AND pk_col.attnum = relationship.pk_colnum
+ JOIN pg_catalog.pg_class AS pk_tbl
+ ON pk_tbl.oid = relationship.pk_table_id
+ )
+ SELECT
+ string_agg(lines, E'\n')
+ INTO result
+ FROM all_edges;
+
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- DOCUMENTATION_DOTFILE
+CREATE OR REPLACE FUNCTION public.documentation_dotfile()
+RETURNS text AS
+$$
+DECLARE
+ schemaname name := 'public';
+ result text;
+BEGIN
+ WITH file_contents AS (
+ SELECT
+ E'digraph database {\n concentrate = true;\n'
+ ' rankdir = LR;\n'
+ ' ratio = ".75";\n'
+ ' node [shape = none, fontsize="11", fontname="Helvetica"];\n'
+ ' edge [fontsize="8", fontname="Helvetica"];'
+ AS lines
+ UNION ALL
+ SELECT
+ E'legend\n[fontsize = "14"\nlabel =\n'
+ '<<table border="0" cellpadding="0">\n'
+ ' <tr><td align="left"><font point-size="16">Legend</font></td></tr>\n'
+ ' <tr><td align="left">pk = primary key</td></tr>\n'
+ ' <tr><td align="left">fk = foreign key</td></tr>\n'
+ ' <tr><td align="left">u = unique*</td></tr>\n'
+ ' <tr><td align="left">o = optional</td></tr>\n'
+ ' <tr><td align="left">'
+ '* multiple uniques in the same table are a unique group</td></tr>\n'
+ '</table>>\n];'
+ UNION ALL
+ SELECT
+ string_agg(
+ public._documentation_dotfile_node_for(relname),
+ E'\n' -- Forcing the 'env' table to the end makes a better image
+ ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END)
+ )
+ FROM pg_catalog.pg_class
+ WHERE relkind='r' AND relnamespace = schemaname::regnamespace
+ UNION ALL
+ SELECT
+ public._documentation_dotfile_edges()
+ UNION ALL
+ SELECT
+ '}'
+ )
+ SELECT
+ string_agg(lines, E'\n') AS dotfile
+ INTO result
+ FROM file_contents;
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+COMMENT ON FUNCTION public.documentation_dotfile()
+IS E'Create a Graphviz dotfile of the data model: '
+ 'every table in the "public" schema.\n'
+ 'Example: SELECT public.documentation_dotfile();';
diff --git a/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql
new file mode 100644
index 000000000..5fb0e5018
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql
@@ -0,0 +1,61 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT
+CREATE TRIGGER language_implementation_version_view_trigger_insert
+ INSTEAD OF INSERT ON public.language_implementation_version_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.language_implementation_version_view_insert_row();
+
+-- ENVIRONMENT_VIEW_TRIGGER_INSERT
+CREATE TRIGGER environment_view_trigger_insert
+ INSTEAD OF INSERT ON public.environment_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.environment_view_insert_row();
+
+-- MACHINE_VIEW_TRIGGER_INSERT
+CREATE TRIGGER machine_view_trigger_insert
+ INSTEAD OF INSERT ON public.machine_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.machine_view_insert_row();
+
+-- UNIT_VIEW_TRIGGER_INSERT
+CREATE TRIGGER unit_view_trigger_insert
+ INSTEAD OF INSERT ON public.unit_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.unit_view_insert_row();
+
+-- BENCHMARK_VIEW_TRIGGER_INSERT
+CREATE TRIGGER benchmark_view_trigger_insert
+ INSTEAD OF INSERT ON public.benchmark_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.benchmark_view_insert_row();
+
+-- BENCHMARK_RUN_VIEW_TRIGGER_INSERT
+CREATE TRIGGER benchmark_run_view_trigger_insert
+ INSTEAD OF INSERT ON public.benchmark_run_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.benchmark_run_view_insert_row();
+
+-- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT
+CREATE TRIGGER full_benchmark_run_view_trigger_insert
+ INSTEAD OF INSERT ON public.full_benchmark_run_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.full_benchmark_run_view_insert_row();
diff --git a/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql
new file mode 100644
index 000000000..dd72c40db
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql
@@ -0,0 +1,73 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+---------------------------- ROLES ----------------------------
+-- ARROW_WEB
+CREATE ROLE arrow_web login password 'arrow';
+COMMENT ON ROLE arrow_web IS 'Anonymous login user.';
+
+-- ARROW_ADMIN
+CREATE ROLE arrow_admin;
+COMMENT ON ROLE arrow_admin
+ IS 'Can select, insert, update, and delete on all public tables.';
+
+-- ARROW_ANONYMOUS
+CREATE ROLE arrow_anonymous;
+COMMENT ON ROLE arrow_anonymous
+ IS 'Can insert and select on all public tables.';
+
+GRANT arrow_anonymous TO arrow_web;
+
+
+---------------------------- PRIVILEGES ----------------------------
+GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin;
+
+-- ARROW_ADMIN
+GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin;
+GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin;
+GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public
+ TO arrow_admin;
+
+-- ARROW_ANONYMOUS
+GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous;
+GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous;
+GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous;
+GRANT INSERT ON
+ public.benchmark
+ , public.benchmark_language
+ , public.dependencies
+ , public.language_implementation_version
+ , public.benchmark_run
+ , public.benchmark_type
+ , public.cpu
+ , public.environment
+ , public.environment_view
+ , public.gpu
+ , public.machine
+ , public.machine_view
+ , public.os
+ , public.unit
+ --, public.project -- The only disallowed table is `project`.
+ , public.benchmark_run_view
+ , public.benchmark_view
+ , public.environment_view
+ , public.full_benchmark_run_view
+ , public.language_implementation_version_view
+ , public.machine_view
+ , public.unit_view
+TO arrow_anonymous;
diff --git a/src/arrow/dev/benchmarking/docker-compose.yml b/src/arrow/dev/benchmarking/docker-compose.yml
new file mode 100644
index 000000000..ca60206bf
--- /dev/null
+++ b/src/arrow/dev/benchmarking/docker-compose.yml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+version: '3'
+services:
+
+ pg:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ restart: always
+ ports:
+ - '5432:5432'
+ environment:
+ - POSTGRES_PASSWORD=${PG_PASS}
+ - POSTGRES_USER=${PG_USER}
+
+ graphile:
+ image: graphile/postgraphile
+ restart: always
+ ports:
+ - 5000:5000
+ depends_on:
+ - pg
+ command:
+ - --connection
+ - postgres://${PG_USER}:${PG_PASS}@pg:5432/${PG_USER}
+ - --schema
+ - public
+ - --watch
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_example.json b/src/arrow/dev/benchmarking/examples/benchmark_example.json
new file mode 100644
index 000000000..d6f58c286
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_example.json
@@ -0,0 +1,32 @@
+[
+ {
+ "benchmark_name": "Benchmark 1",
+ "parameter_names": ["arg0", "arg1", "arg2"],
+ "benchmark_description": "First benchmark",
+ "benchmark_type": "Time",
+ "units": "miliseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_names": ["arg0", "arg1"],
+ "benchmark_description": "Description 2.",
+ "benchmark_type": "Time",
+ "units": "nanoseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 3",
+ "parameter_names": ["arg0"],
+ "benchmark_description": "Third benchmark",
+ "benchmark_type": "Memory",
+ "units": "kilobytes",
+ "lessisbetter": true,
+ "benchmark_version": "1",
+ "benchmark_language": "Python"
+ }
+]
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv
new file mode 100644
index 000000000..eab208a1c
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv
@@ -0,0 +1,6 @@
+benchmark_run_id,benchmark_name,benchmark_version,parameter_values,value,git_commit_timestamp,git_hash,val_min,val_q1,val_q3,val_max,std_dev,n_obs,run_timestamp,run_metadata,run_notes,mac_address,benchmark_language,language_implementation_version,dependencies
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,3,4,1.41,8,2019-02-14 02:00:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,4,6,8,3.14,8,2019-02-14 02:01:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,0.5,1,3,5,3,8,2019-02-14 02:02:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,2.5,4,4.5,1.5,8,2019-02-14 02:03:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 10}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,4,5,2,8,2019-02-14 02:03:30 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.15"", ""other_lib"": ""1.0""}"
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.json b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json
new file mode 100644
index 000000000..2ded776c9
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json
@@ -0,0 +1,97 @@
+[
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:05 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 2,
+ "val_q1": 4,
+ "val_q3": 6,
+ "val_max": 8,
+ "std_dev": 3.14,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:10 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 0.5,
+ "val_q1": 1,
+ "val_q3": 3,
+ "val_max": 5,
+ "std_dev": 3,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:20 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"boost": "1.42", "numpy": "1.15"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 3,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 2,
+ "val_q1": 2.5,
+ "val_q3": 4,
+ "val_max": 4.5,
+ "std_dev": 1.5,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:30 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"boost": "1.42", "numpy": "1.15"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 10},
+ "value": 3,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 4,
+ "val_max": 5,
+ "std_dev": 2,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:40 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.15", "other_lib": "1.0"}
+ }
+]
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json
new file mode 100644
index 000000000..f9e6e3130
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json
@@ -0,0 +1,73 @@
+{
+ "context": {
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},
+ "git_commit_timestamp": "2019-02-14 22:42:22 +0100",
+ "git_hash": "123456789abcde",
+ "run_timestamp": "2019-02-25 03:00:40 -0600",
+ "Extra stuff": "does not hurt anything and won't be added.",
+ "However": "all of the entries above 'Extra stuff' are required."
+ },
+ "benchmark_version": {
+ "Benchmark 2": "version 0",
+ "Benchmark 3": "any string is a version. (Benchmark 3 not actually used)"
+ },
+ "benchmarks": [
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 1, "arg1": 5},
+ "value": 2.5,
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_metadata": {"any": "json object is admissible"},
+ "run_notes": "This value is an arbitrary-length string."
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 2, "arg1": 5},
+ "value": 5,
+ "std_dev": 3.14,
+ "n_obs": 8
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 3, "arg1": 5},
+ "value": 2.5,
+ "val_min": 0.5,
+ "val_q1": 1,
+ "val_q3": 3,
+ "val_max": 5,
+ "std_dev": 3,
+ "n_obs": 8,
+ "run_notes": "The previous run in this list has the minimal set of keys."
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 4, "arg1": 5},
+ "value": 3,
+ "val_min": 2,
+ "val_q1": 2.5,
+ "val_q3": 4,
+ "val_max": 4.5,
+ "std_dev": 1.5,
+ "n_obs": 8
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 5, "arg1": 5},
+ "value": 3,
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 4,
+ "val_max": 5,
+ "std_dev": 2,
+ "n_obs": 8
+ }
+ ]
+}
diff --git a/src/arrow/dev/benchmarking/examples/example.sql b/src/arrow/dev/benchmarking/examples/example.sql
new file mode 100644
index 000000000..e93269af7
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/example.sql
@@ -0,0 +1,232 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- Example insert into each of the views:
+INSERT INTO public.project(project_name, project_url, repo_url)
+VALUES (
+ 'Apache Arrow'
+ , 'https://arrow.apache.org/'
+ , 'https://github.com/apache/arrow');
+
+INSERT INTO public.environment_view
+ (benchmark_language, language_implementation_version, dependencies)
+VALUES
+ ('Python', 'CPython 2.7', '{"six": "", "numpy": "1.14", "other_lib": "1.0"}'),
+ ('Python', 'CPython 3.6', '{"boost": "1.42", "numpy": "1.15"}');
+
+INSERT INTO public.dependencies(dependencies)
+VALUES
+ ('{"boost": "1.68", "numpy": "1.14"}'),
+ ('{"boost": "1.42", "numpy": "1.16"}');
+
+INSERT INTO public.language_implementation_version_view
+ (benchmark_language, language_implementation_version)
+VALUES
+ ('Python', 'CPython 2.7'),
+ ('Python', 'CPython 3.6');
+
+INSERT INTO public.unit_view
+ (benchmark_type, units, lessisbetter)
+VALUES
+ ('Memory', 'gigabytes', True),
+ ('Memory', 'kilobytes', True);
+
+
+\echo 'use \\dv to list the views views';
+\dv
+
+
+SELECT * FROM environment_view;
+SELECT * FROM unit_view;
+
+
+INSERT INTO public.machine_view (
+ mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_hz
+ , cpu_frequency_min_hz
+ , cpu_l1d_cache_bytes
+ , cpu_l1i_cache_bytes
+ , cpu_l2_cache_bytes
+ , cpu_l3_cache_bytes
+ , machine_other_attributes
+) VALUES (
+ '08:00:2b:01:02:03' -- mac_address
+ , 'My-Machine-Name' -- machine_name
+ , 8589934592 -- memory_bytes
+ -- All (?) standard mac address formats are allowable:
+ -- https://www.postgresql.org/docs/11/datatype-net-types.html
+ , 2300000000 -- cpu_actual_frequency_Hz
+ , 'OSX' -- os_name
+ , 'x86_64' -- architecture_name
+ , '18.2.0' -- kernel
+ , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' -- cpu_model_name
+ , 2 -- cpu_core_count
+ , 4 -- cpu_thread_count
+ , 2300000000 -- cpu_frequency_max_Hz
+ , 2300000000 -- cpu_frequency_min_Hz
+ , 32768 -- cpu_l1d_cache_bytes
+ , 32768 -- cpu_l1i_cache_bytes
+ , 262144 -- cpu_l2_cache_bytes
+ , 4194304 -- cpu_l3_cache_bytes
+ , '{"example": "for machine_other_attributes"}'::jsonb
+);
+
+
+INSERT INTO public.full_benchmark_run_view (
+ benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_hz
+ , cpu_frequency_min_hz
+ , cpu_l1d_cache_bytes
+ , cpu_l1i_cache_bytes
+ , cpu_l2_cache_bytes
+ , cpu_l3_cache_bytes
+ , machine_other_attributes
+ -- environment_view
+ , benchmark_language
+ , language_implementation_version
+ , dependencies
+) VALUES (
+ 'Benchmark 3'
+ , '{"arg0"}'::text[]
+ , 'Third benchmark'
+ , 'Memory'
+ , 'kilobytes'
+ , TRUE
+ , '0'
+ -- datum
+ , '{"arg0": 10}'::jsonb
+ , 0.5
+ , '2019-01-31 14:31:10 -0600'
+ , '8136c46d5c60fb'
+ , 0.5
+ , 0.5
+ , 0.5
+ , 0.5
+ , 0
+ , 2
+ , '2019-02-14 14:00:00 -0600'
+ , '{"ci_99": [2.7e-06, 3.1e-06]}'::jsonb
+ , 'Additional run_notes.'
+ -- machine_view
+ , 'My-Machine-Name'
+ , '09-00-2c-01-02-03'
+ , 8589934592
+ , 2300000000
+ , 'OSX'
+ , 'x86_64'
+ , '18.2.0'
+ , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz'
+ , 2
+ , 4
+ , 2300000000
+ , 2300000000
+ , 32768
+ , 32768
+ , 262144
+ , 4194304
+ , '{"example": "for machine_other_attributes"}'::jsonb
+ -- environment_view
+ , 'Python'
+ , 'CPython 2.7'
+ , '{"six": "", "numpy": "1.15", "other_lib": "1.0"}'::jsonb
+);
+
+
+-- Bulk load from CSV. First column is empty; serial "benchmark_run_id" will be assigned.
+--\copy benchmark_run_view FROM 'examples/benchmark_run_example.csv' WITH (FORMAT csv, HEADER);
+
+-- Load from JSON
+--\set content `cat examples/benchmark_example.json`
+--SELECT ingest_benchmark_view(:'content'::jsonb);
+
+INSERT INTO public.benchmark_view (
+ benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ , benchmark_language
+ ) VALUES (
+ 'Benchmark 1'
+ , '{"arg0", "arg1", "arg2"}'::text[]
+ , E'Description.\nNewlines are OK in a string escaped with leading "E".'
+ , 'Time'
+ , 'miliseconds'
+ , TRUE
+ , 'Hash of code or other way to identify distinct benchmark versions.'
+ , 'Python'
+ ), (
+ 'Benchmark 2'
+ , '{"arg0", "arg1"}'::text[]
+ , 'Description 2.'
+ , 'Time'
+ , 'nanoseconds'
+ , TRUE
+ , 'version 0'
+ , 'Python'
+ );
+
+
+\x
+SELECT * from benchmark_run_view;
+
+\x
diff --git a/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json
new file mode 100644
index 000000000..fec5eed0a
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json
@@ -0,0 +1,12 @@
+{
+ "query": "mutation ($p: CreateProjectInput!){createProject(input:$p){project{id}}}",
+ "variables": {
+ "p": {
+ "project": {
+ "projectName": "Apache Arrow",
+ "projectUrl": "https://www.arrow.apache.org",
+ "repoUrl": "https://www.github.com/apache/arrow"
+ }
+ }
+ }
+}
diff --git a/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json
new file mode 100644
index 000000000..78804fa91
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json
@@ -0,0 +1,3 @@
+{
+ "query": "{allEnvironmentViews(orderBy: [BENCHMARK_LANGUAGE_ASC, LANGUAGE_IMPLEMENTATION_VERSION_ASC, DEPENDENCIES_ASC]) {edges {node {environmentId, benchmarkLanguage, languageImplementationVersion, dependencies}}}}"
+}
diff --git a/src/arrow/dev/benchmarking/examples/machine.json b/src/arrow/dev/benchmarking/examples/machine.json
new file mode 100644
index 000000000..2485e2bc1
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/machine.json
@@ -0,0 +1,22 @@
+{
+ "mac_address": "0a:00:2d:01:02:03",
+ "machine_name": "Yet-Another-Machine-Name",
+ "memory_bytes": 8589934592,
+ "cpu_actual_frequency_hz": 2300000000,
+ "os_name": "OSX",
+ "architecture_name": "x86_64",
+ "kernel_name": "18.2.0",
+ "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",
+ "cpu_core_count": 2,
+ "cpu_thread_count": 4,
+ "cpu_frequency_max_hz": 2300000000,
+ "cpu_frequency_min_hz": 2300000000,
+ "cpu_l1d_cache_bytes": 32768,
+ "cpu_l1i_cache_bytes": 32768,
+ "cpu_l2_cache_bytes": 262144,
+ "cpu_l3_cache_bytes": 4194304,
+ "machine_other_attributes": {"just": "an example"},
+ "gpu_information": "",
+ "gpu_part_number": "",
+ "gpu_product_name": ""
+}
diff --git a/src/arrow/dev/benchmarking/graphql_submit.sh b/src/arrow/dev/benchmarking/graphql_submit.sh
new file mode 100755
index 000000000..2eaab9cdf
--- /dev/null
+++ b/src/arrow/dev/benchmarking/graphql_submit.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+OPTIONS=("machine" "benchmarks" "runs")
+
+option=${1-help}
+datafile=${2-machine.json}
+uri=${3-localhost:5000/graphql}
+
+help() {
+ cat <<HELP
+ Submit data via GraphQL
+
+ Usage:
+ ${0} [option] [JSON_file] [URI]
+
+ Arguments:
+ option - $(echo ${OPTIONS[@]} | sed 's/ /|/g')
+ JSON_file - path to the submission file (default 'machine.json')
+ URI - URI to submit to (default 'localhost:5000/graphql')
+HELP
+}
+
+escape_quote() { sed 's/"/\\"/g'; }
+
+template() {
+ cat <<TEMPLATE
+ {
+ "query": "mutation (\$jsonb: JSON!){${1}(input:{fromJsonb:\$jsonb}){${2}}}",
+ "variables": {
+ "jsonb": "$(echo $(cat ${datafile}) | escape_quote )"
+ }
+ }
+TEMPLATE
+}
+
+submit () {
+ curl -X POST -H "Content-Type: application/json" --data @<(template $1 $2) ${uri}
+}
+
+
+case "$1" in
+ machine)
+ submit ingestMachineView integer;;
+
+ benchmarks)
+ submit ingestBenchmarkView integers;;
+
+ runs)
+ if grep -q context <(head -n2 ${2})
+ then
+ submit ingestBenchmarkRunsWithContext bigInts
+ else
+ submit ingestBenchmarkRunView bigInts
+ fi;;
+
+ *)
+ help
+ exit 1
+esac
diff --git a/src/arrow/dev/benchmarking/make_data_model_rst.sh b/src/arrow/dev/benchmarking/make_data_model_rst.sh
new file mode 100755
index 000000000..6a4f5f5b6
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_data_model_rst.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+DOTFILE=data_model.dot
+OUTFILE=data_model.rst
+
+license() {
+ cat <<'LICENSE' > ${1}
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+LICENSE
+}
+
+warning() {
+ cat <<'WARNING' >> ${1}
+.. WARNING
+.. This is an auto-generated file. Please do not edit.
+
+.. To reproduce, please run :code:`./make_data_model_rst.sh`.
+.. (This requires you have the
+.. `psql client <https://www.postgresql.org/download/>`_
+.. and have started the docker containers using
+.. :code:`docker-compose up`).
+
+WARNING
+}
+
+echo "Making ${OUTFILE}"
+
+license ${OUTFILE}
+warning ${OUTFILE}
+
+PGPASSWORD=arrow \
+ psql --tuples-only --username=arrow_web \
+ --dbname=benchmark --port=5432 --host=localhost \
+ --command="select public.documentation('${DOTFILE}');" \
+ | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE}
diff --git a/src/arrow/dev/benchmarking/make_dotfile.sh b/src/arrow/dev/benchmarking/make_dotfile.sh
new file mode 100755
index 000000000..b86dc3eb3
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_dotfile.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+OUTFILE=data_model.dot
+
+license() {
+ cat <<'LICENSE' > ${1}
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+LICENSE
+}
+
+warning() {
+ cat <<'WARNING' >> ${1}
+/*
+ WARNING
+ This is an auto-generated file. Please do not edit.
+
+ To reproduce, please run :code:`./make_data_model_rst.sh`.
+ (This requires you have the
+ `psql client <https://www.postgresql.org/download/>`_
+ and have started the docker containers using
+ :code:`docker-compose up`).
+*/
+WARNING
+}
+
+echo "Making ${OUTFILE}"
+
+license ${OUTFILE}
+warning ${OUTFILE}
+
+PGPASSWORD=arrow \
+ psql --tuples-only --username=arrow_web \
+ --dbname=benchmark --port=5432 --host=localhost \
+ --command="select public.documentation_dotfile();" \
+ | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE}
diff --git a/src/arrow/dev/benchmarking/make_machine_json.sh b/src/arrow/dev/benchmarking/make_machine_json.sh
new file mode 100755
index 000000000..09bf0ea2d
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_machine_json.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+OUTFILE=machine.json
+
+echo "Making ${OUTFILE}"
+echo "** NOTE: This command fails on everything but OSX right now. **"
+echo "* also, the intent is to make this script not suck, just not now. *"
+echo "Please type GPU details here (or manually modify ${OUTFILE} later)."
+read -p "GPU information string (or <enter>): " gpu_information
+read -p "GPU part number (or <enter>): " gpu_part_number
+read -p "GPU product name (or <enter>): " gpu_product_name
+
+
+cat <<MACHINE_JSON > ${OUTFILE}
+{
+ "mac_address": "$(ifconfig en1 | awk '/ether/{print $2}')",
+ "machine_name": "$(uname -n)",
+ "memory_bytes": $(sysctl -n hw.memsize),
+ "cpu_actual_frequency_hz": $(sysctl -n hw.cpufrequency),
+ "os_name": "$(uname -s)",
+ "architecture_name": "$(uname -m)",
+ "kernel_name": "$(uname -r)",
+ "cpu_model_name": "$(sysctl -n machdep.cpu.brand_string)",
+ "cpu_core_count": $(sysctl -n hw.physicalcpu),
+ "cpu_thread_count": $(sysctl -n hw.logicalcpu),
+ "cpu_frequency_max_hz": $(sysctl -n hw.cpufrequency_max),
+ "cpu_frequency_min_hz": $(sysctl -n hw.cpufrequency_min),
+ "cpu_l1d_cache_bytes": $(sysctl -n hw.l1dcachesize),
+ "cpu_l1i_cache_bytes": $(sysctl -n hw.l1icachesize),
+ "cpu_l2_cache_bytes": $(sysctl -n hw.l2cachesize),
+ "cpu_l3_cache_bytes": $(sysctl -n hw.l3cachesize),
+ "gpu_information": "${gpu_information}",
+ "gpu_part_number": "${gpu_part_number}",
+ "gpu_product_name": "${gpu_product_name}"
+}
+MACHINE_JSON
+
+echo "Machine details saved in ${OUTFILE}"
diff --git a/src/arrow/dev/conbench_envs/README.md b/src/arrow/dev/conbench_envs/README.md
new file mode 100644
index 000000000..5a4eb58b2
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/README.md
@@ -0,0 +1,214 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+# Benchmark Builds Env and Hooks
+This directory contains:
+- [benchmarks.env](benchmarks.env) - list of env vars used for building Arrow C++/Python/R/Java/JavaScript and running benchmarks using [conbench](https://ursalabs.org/blog/announcing-conbench/).
+- [hooks.sh](hooks.sh) - hooks used by <b>@ursabot</b> benchmark builds that are triggered by `@ursabot please benchmark` PR comments.
+
+## How to add or update Arrow build and run env vars used by `@ursabot` benchmark builds
+1. Create `apache/arrow` PR
+2. Update or add env var value in [benchmarks.env](../../dev/conbench_envs/benchmarks.env)
+3. Add `@ursabot please benchmark` comment to PR
+4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where
+- baseline = PR base HEAD commit with unaltered `/dev/conbench_envs/benchmarks.env`
+- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env`
+
+## Why do`@ursabot` benchmark builds need `hooks.sh`?
+`@ursabot` benchmark builds are maintained in Ursa's private repo.
+Benchmark builds use `hooks.sh` functions as hooks to create conda env with Arrow dependencies and build Arrow C++/Python/R/Java/JavaScript from source for a specific Arrow repo's commit.
+
+Defining hooks in Arrow repo allows benchmark builds for a specific commit to be
+compatible with the files/scripts *in that commit* which are used for installing Arrow
+dependencies and building Arrow. This allows Arrow contributors to asses the perfomance
+implications of different build options, dependency versions, etc by updating
+`hooks.sh`.
+
+## Can other repos and services use `benchmarks.env` and `hooks.sh`?
+
+Yes, other repos and services are welcome to use `benchmarks.env` and `hooks.sh` as long as
+- existing hooks are not removed or renamed.
+- function definitions for exiting hooks can only be updated in the Arrow commit where Arrow build scripts or files with dependencies have been renamed, moved or added.
+- benchmark builds are run using `@ursabot please benchmark` PR comment to confirm that function definition updates do not break benchmark builds.
+
+## How can other repos and services use `benchmarks.env` and `hooks.sh` to setup benchmark env?
+Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.sh` to setup benchmarking env on Ubuntu:
+
+### 1. Install Arrow dependencies
+ sudo su
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-regex-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libgflags-dev \
+ libcurl4-openssl-dev \
+ libgoogle-glog-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ rapidjson-dev \
+ tzdata \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+ apt-get update -y -q && \
+ apt-get install -y -q \
+ python3 \
+ python3-pip \
+ python3-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+### 2. Install Arrow dependencies for Java
+ sudo su
+ apt-get install openjdk-8-jdk
+ apt-get install maven
+
+Verify that you have at least these versions of `java`, `javac` and `maven`:
+
+ # java -version
+ openjdk version "1.8.0_292"
+ ..
+ # javac -version
+ javac 1.8.0_292
+ ...
+ # mvn -version
+ Apache Maven 3.6.3
+ ...
+
+### 3. Install Arrow dependencies for Java Script
+ sudo apt update
+ sudo apt -y upgrade
+ sudo apt update
+ sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates
+ curl -fsSL https://deb.nodesource.com/setup_14.x | sudo -E bash -
+ sudo apt-get install -y nodejs
+ sudo apt -y install yarn
+ sudo apt -y install gcc g++ make
+
+Verify that you have at least these versions of `node` and `yarn`:
+
+ # node --version
+ v14.17.2
+ ...
+ # yarn --version
+ 1.22.5
+ ...
+
+### 4. Install Conda
+ sudo apt install curl
+ curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+ sudo bash Miniconda3-latest-Linux-x86_64.sh
+
+### 5. Set env vars:
+ export ARROW_REPO=https://github.com/apache/arrow.git
+ export BENCHMARKABLE=e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6
+ export BENCHMARKABLE_TYPE=arrow-commit
+ export PYTHON_VERSION=3.8
+ export CONBENCH_EMAIL=...
+ export CONBENCH_URL="https://conbench.ursa.dev"
+ export CONBENCH_PASSWORD=...
+ export MACHINE=...
+
+### 6. Use `create_conda_env_with_arrow_python` hook to create conda env and build Arrow C++ and Arrow Python
+ git clone "${ARROW_REPO}"
+ pushd arrow
+ git fetch -v --prune -- origin "${BENCHMARKABLE}"
+ git checkout -f "${BENCHMARKABLE}"
+ source dev/conbench_envs/hooks.sh create_conda_env_with_arrow_python
+ popd
+
+### 7. Install conbench
+ git clone https://github.com/ursacomputing/conbench.git
+ pushd conbench
+ pip install -r requirements-cli.txt
+ pip install -U PyYAML
+ python setup.py install
+ popd
+
+### 8. Setup benchmarks repo
+ git clone https://github.com/ursacomputing/benchmarks.git
+ pushd benchmarks
+ python setup.py develop
+ popd
+
+### 9. Setup conbench credentials
+ pushd benchmarks
+ touch .conbench
+ echo "url: $CONBENCH_URL" >> .conbench
+ echo "email: $CONBENCH_EMAIL" >> .conbench
+ echo "password: $CONBENCH_PASSWORD" >> .conbench
+ echo "host_name: $MACHINE" >> .conbench
+ popd
+
+### 10. Run Python benchmarks
+ cd benchmarks
+ conbench file-read ALL --iterations=3 --all=true --drop-caches=true
+
+### 11. Use `install_archery` hook to setup archery and run C++ benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh install_archery
+ popd
+ cd benchmarks
+ conbench cpp-micro --iterations=1
+
+### 12. Use `build_arrow_r` hook to build Arrow R and run R benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh build_arrow_r
+ popd
+ R -e "remotes::install_github('ursacomputing/arrowbench')"
+ cd benchmarks
+ conbench dataframe-to-table ALL --iterations=3 --drop-caches=true --language=R
+
+### 13. Use `build_arrow_java` and `install_archery` hooks to build Arrow Java and run Java benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh build_arrow_java
+ source dev/conbench_envs/hooks.sh install_archery
+ popd
+ cd benchmarks
+ conbench java-micro --iterations=1
+
+### 14. Use `install_java_script_project_dependencies` hook to install Java Script dependencies and run Java Script benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh install_java_script_project_dependencies
+ popd
+ cd benchmarks
+ conbench js-micro
diff --git a/src/arrow/dev/conbench_envs/benchmarks.env b/src/arrow/dev/conbench_envs/benchmarks.env
new file mode 100644
index 000000000..6c151aa7c
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/benchmarks.env
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARROW_BUILD_TESTS=OFF
+ARROW_BUILD_TYPE=release
+ARROW_DEPENDENCY_SOURCE=AUTO
+ARROW_DATASET=ON
+ARROW_DEFAULT_MEMORY_POOL=mimalloc
+ARROW_ENABLE_UNSAFE_MEMORY_ACCESS=true
+ARROW_ENABLE_NULL_CHECK_FOR_GET=false
+ARROW_FLIGHT=OFF
+ARROW_GANDIVA=OFF
+ARROW_HDFS=ON
+ARROW_HOME=$CONDA_PREFIX
+ARROW_INSTALL_NAME_RPATH=OFF
+ARROW_MIMALLOC=ON
+ARROW_NO_DEPRECATED_API=ON
+ARROW_ORC=ON
+ARROW_PARQUET=ON
+ARROW_PLASMA=ON
+ARROW_PYTHON=ON
+ARROW_S3=ON
+ARROW_USE_ASAN=OFF
+ARROW_USE_CCACHE=ON
+ARROW_USE_UBSAN=OFF
+ARROW_WITH_BROTLI=ON
+ARROW_WITH_BZ2=ON
+ARROW_WITH_LZ4=ON
+ARROW_WITH_SNAPPY=ON
+ARROW_WITH_ZLIB=ON
+ARROW_WITH_ZSTD=ON
+GTest_SOURCE=BUNDLED
+ORC_SOURCE=BUNDLED
+PARQUET_BUILD_EXAMPLES=ON
+PARQUET_BUILD_EXECUTABLES=ON
+PYTHON=python
+LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH \ No newline at end of file
diff --git a/src/arrow/dev/conbench_envs/hooks.sh b/src/arrow/dev/conbench_envs/hooks.sh
new file mode 100755
index 000000000..6bcfbe446
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/hooks.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+## These hooks are used by benchmark builds
+# to create a conda env with Arrow dependencies and build Arrow C++, Python, etc
+create_conda_env_for_benchmark_build() {
+ conda create -y -n "${BENCHMARKABLE_TYPE}" -c conda-forge \
+ --file ci/conda_env_unix.txt \
+ --file ci/conda_env_cpp.txt \
+ --file ci/conda_env_python.txt \
+ --file ci/conda_env_gandiva.txt \
+ compilers \
+ python="${PYTHON_VERSION}" \
+ pandas \
+ aws-sdk-cpp \
+ r
+}
+
+activate_conda_env_for_benchmark_build() {
+ conda init bash
+ conda activate "${BENCHMARKABLE_TYPE}"
+}
+
+install_arrow_python_dependencies() {
+ pip install -r python/requirements-build.txt -r python/requirements-test.txt
+}
+
+set_arrow_build_and_run_env_vars() {
+ set -a
+ source dev/conbench_envs/benchmarks.env
+ set +a
+}
+
+build_arrow_cpp() {
+ # Ignore the error when a cache can't be created
+ if ! ci/scripts/cpp_build.sh $(pwd) $(pwd) 2> error.log; then
+ if ! grep -q -F "Can\'t create temporary cache file" error.log; then
+ cat error.log
+ fi
+ fi
+}
+
+build_arrow_python() {
+ ci/scripts/python_build.sh $(pwd) $(pwd)
+}
+
+build_arrow_r() {
+ cat ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+ ci/scripts/r_deps.sh $(pwd) $(pwd)
+ (cd r; R CMD INSTALL .;)
+}
+
+build_arrow_java() {
+ ci/scripts/java_build.sh $(pwd) $(pwd)
+}
+
+install_archery() {
+ pip install -e dev/archery
+}
+
+install_java_script_project_dependencies() {
+ (cd js; yarn;)
+}
+
+create_conda_env_with_arrow_python() {
+ create_conda_env_for_benchmark_build
+ activate_conda_env_for_benchmark_build
+ install_arrow_python_dependencies
+ set_arrow_build_and_run_env_vars
+ build_arrow_cpp
+ build_arrow_python
+}
+
+"$@"
diff --git a/src/arrow/dev/merge.conf.sample b/src/arrow/dev/merge.conf.sample
new file mode 100644
index 000000000..c71b21161
--- /dev/null
+++ b/src/arrow/dev/merge.conf.sample
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Configuration for the merge_arrow_pr.py tool
+# Install a copy of this file at ~/.config/arrow/merge.conf
+
+[jira]
+# issues.apache.org JIRA credentials. Sadly, the jira instance doesn't offer
+# token credentials. Ensure that the file is properly protected.
+username=johnsmith
+password=123456
diff --git a/src/arrow/dev/merge_arrow_pr.py b/src/arrow/dev/merge_arrow_pr.py
new file mode 100755
index 000000000..a8ff0b0e2
--- /dev/null
+++ b/src/arrow/dev/merge_arrow_pr.py
@@ -0,0 +1,613 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Utility for creating well-formed pull request merges and pushing them to
+# Apache.
+# usage: ./merge_arrow_pr.py (see config env vars below)
+#
+# This utility assumes you already have a local Arrow git clone and that you
+# have added remotes corresponding to both (i) the GitHub Apache Arrow mirror
+# and (ii) the apache git repo.
+#
+# There are several pieces of authorization possibly needed via environment
+# variables
+#
+# APACHE_JIRA_USERNAME: your Apache JIRA id
+# APACHE_JIRA_PASSWORD: your Apache JIRA password
+# ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid
+# rate limiting)
+
+import configparser
+import os
+import pprint
+import re
+import subprocess
+import sys
+import requests
+import getpass
+
+from six.moves import input
+import six
+
+try:
+ import jira.client
+ import jira.exceptions
+except ImportError:
+ print("Could not find jira library. "
+ "Run 'sudo pip install jira' to install.")
+ print("Exiting without trying to close the associated JIRA.")
+ sys.exit(1)
+
+# Remote name which points to the GitHub site
+PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache")
+
+# For testing to avoid accidentally pushing to apache
+DEBUG = bool(int(os.environ.get("DEBUG", 0)))
+
+
+if DEBUG:
+ print("**************** DEBUGGING ****************")
+
+
+# Prefix added to temporary branches
+BRANCH_PREFIX = "PR_TOOL"
+JIRA_API_BASE = "https://issues.apache.org/jira"
+
+
+def get_json(url, headers=None):
+ req = requests.get(url, headers=headers)
+ return req.json()
+
+
+def run_cmd(cmd):
+ if isinstance(cmd, six.string_types):
+ cmd = cmd.split(' ')
+
+ try:
+ output = subprocess.check_output(cmd)
+ except subprocess.CalledProcessError as e:
+ # this avoids hiding the stdout / stderr of failed processes
+ print('Command failed: %s' % cmd)
+ print('With output:')
+ print('--------------')
+ print(e.output)
+ print('--------------')
+ raise e
+
+ if isinstance(output, six.binary_type):
+ output = output.decode('utf-8')
+ return output
+
+
+original_head = run_cmd("git rev-parse HEAD")[:8]
+
+
+def clean_up():
+ print("Restoring head pointer to %s" % original_head)
+ run_cmd("git checkout %s" % original_head)
+
+ branches = run_cmd("git branch").replace(" ", "").split("\n")
+
+ for branch in [x for x in branches
+ if x.startswith(BRANCH_PREFIX)]:
+ print("Deleting local branch %s" % branch)
+ run_cmd("git branch -D %s" % branch)
+
+
+_REGEX_CI_DIRECTIVE = re.compile(r'\[[^\]]*\]')
+
+
+def strip_ci_directives(commit_message):
+ # Remove things like '[force ci]', '[skip appveyor]' from the assembled
+ # commit message
+ return _REGEX_CI_DIRECTIVE.sub('', commit_message)
+
+
+def fix_version_from_branch(branch, versions):
+ # Note: Assumes this is a sorted (newest->oldest) list of un-released
+ # versions
+ if branch == "master":
+ return versions[-1]
+ else:
+ branch_ver = branch.replace("branch-", "")
+ return [x for x in versions if x.name.startswith(branch_ver)][-1]
+
+
+# We can merge both ARROW and PARQUET patchesa
+SUPPORTED_PROJECTS = ['ARROW', 'PARQUET']
+PR_TITLE_REGEXEN = [(project, re.compile(r'^(' + project + r'-[0-9]+)\b.*$'))
+ for project in SUPPORTED_PROJECTS]
+
+
+class JiraIssue(object):
+
+ def __init__(self, jira_con, jira_id, project, cmd):
+ self.jira_con = jira_con
+ self.jira_id = jira_id
+ self.project = project
+ self.cmd = cmd
+
+ try:
+ self.issue = jira_con.issue(jira_id)
+ except Exception as e:
+ self.cmd.fail("ASF JIRA could not find %s\n%s" % (jira_id, e))
+
+ @property
+ def current_fix_versions(self):
+ return self.issue.fields.fixVersions
+
+ def get_candidate_fix_versions(self, merge_branches=('master',)):
+ # Only suggest versions starting with a number, like 0.x but not JS-0.x
+ all_versions = self.jira_con.project_versions(self.project)
+ unreleased_versions = [x for x in all_versions
+ if not x.raw['released']]
+
+ unreleased_versions = sorted(unreleased_versions,
+ key=lambda x: x.name, reverse=True)
+
+ mainline_versions = self._filter_mainline_versions(unreleased_versions)
+
+ mainline_non_patch_versions = []
+ for v in mainline_versions:
+ (major, minor, patch) = v.name.split(".")
+ if patch == "0":
+ mainline_non_patch_versions.append(v)
+
+ if len(mainline_versions) > len(mainline_non_patch_versions):
+ # If there is a non-patch release, suggest that instead
+ mainline_versions = mainline_non_patch_versions
+
+ default_fix_versions = [
+ fix_version_from_branch(x, mainline_versions).name
+ for x in merge_branches]
+
+ return all_versions, default_fix_versions
+
+ def _filter_mainline_versions(self, versions):
+ if self.project == 'PARQUET':
+ mainline_regex = re.compile(r'cpp-\d.*')
+ else:
+ mainline_regex = re.compile(r'\d.*')
+
+ return [x for x in versions if mainline_regex.match(x.name)]
+
+ def resolve(self, fix_versions, comment):
+ fields = self.issue.fields
+ cur_status = fields.status.name
+
+ if cur_status == "Resolved" or cur_status == "Closed":
+ self.cmd.fail("JIRA issue %s already has status '%s'"
+ % (self.jira_id, cur_status))
+
+ if DEBUG:
+ print("JIRA issue %s untouched" % (self.jira_id))
+ return
+
+ resolve = [x for x in self.jira_con.transitions(self.jira_id)
+ if x['name'] == "Resolve Issue"][0]
+
+ # ARROW-6915: do not overwrite existing fix versions corresponding to
+ # point releases
+ fix_versions = list(fix_versions)
+ fix_version_names = set(x['name'] for x in fix_versions)
+ for version in self.current_fix_versions:
+ major, minor, patch = version.name.split('.')
+ if patch != '0' and version.name not in fix_version_names:
+ fix_versions.append(version.raw)
+
+ self.jira_con.transition_issue(self.jira_id, resolve["id"],
+ comment=comment,
+ fixVersions=fix_versions)
+
+ print("Successfully resolved %s!" % (self.jira_id))
+
+ self.issue = self.jira_con.issue(self.jira_id)
+ self.show()
+
+ def show(self):
+ fields = self.issue.fields
+ print(format_jira_output(self.jira_id, fields.status.name,
+ fields.summary, fields.assignee,
+ fields.components))
+
+
+def format_jira_output(jira_id, status, summary, assignee, components):
+ if assignee is None:
+ assignee = "NOT ASSIGNED!!!"
+ else:
+ assignee = assignee.displayName
+
+ if len(components) == 0:
+ components = 'NO COMPONENTS!!!'
+ else:
+ components = ', '.join((x.name for x in components))
+
+ return """=== JIRA {} ===
+Summary\t\t{}
+Assignee\t{}
+Components\t{}
+Status\t\t{}
+URL\t\t{}/{}""".format(jira_id, summary, assignee, components, status,
+ '/'.join((JIRA_API_BASE, 'browse')), jira_id)
+
+
+class GitHubAPI(object):
+
+ def __init__(self, project_name):
+ self.github_api = ("https://api.github.com/repos/apache/{0}"
+ .format(project_name))
+
+ token = os.environ.get('ARROW_GITHUB_API_TOKEN', None)
+ if token:
+ self.headers = {'Authorization': 'token {0}'.format(token)}
+ else:
+ self.headers = None
+
+ def get_pr_data(self, number):
+ return get_json("%s/pulls/%s" % (self.github_api, number),
+ headers=self.headers)
+
+
+class CommandInput(object):
+ """
+ Interface to input(...) to enable unit test mocks to be created
+ """
+
+ def fail(self, msg):
+ clean_up()
+ raise Exception(msg)
+
+ def prompt(self, prompt):
+ return input(prompt)
+
+ def getpass(self, prompt):
+ return getpass.getpass(prompt)
+
+ def continue_maybe(self, prompt):
+ while True:
+ result = input("\n%s (y/n): " % prompt)
+ if result.lower() == "y":
+ return
+ elif result.lower() == "n":
+ self.fail("Okay, exiting")
+ else:
+ prompt = "Please input 'y' or 'n'"
+
+
+class PullRequest(object):
+
+ def __init__(self, cmd, github_api, git_remote, jira_con, number):
+ self.cmd = cmd
+ self.git_remote = git_remote
+ self.con = jira_con
+ self.number = number
+ self._pr_data = github_api.get_pr_data(number)
+ try:
+ self.url = self._pr_data["url"]
+ self.title = self._pr_data["title"]
+ self.body = self._pr_data["body"]
+ self.target_ref = self._pr_data["base"]["ref"]
+ self.user_login = self._pr_data["user"]["login"]
+ self.base_ref = self._pr_data["head"]["ref"]
+ except KeyError:
+ pprint.pprint(self._pr_data)
+ raise
+ self.description = "%s/%s" % (self.user_login, self.base_ref)
+
+ self.jira_issue = self._get_jira()
+
+ def show(self):
+ print("\n=== Pull Request #%s ===" % self.number)
+ print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s"
+ % (self.title, self.description, self.target_ref, self.url))
+ if self.jira_issue is not None:
+ self.jira_issue.show()
+ else:
+ print("Minor PR. Please ensure it meets guidelines for minor.\n")
+
+ @property
+ def is_merged(self):
+ return bool(self._pr_data["merged"])
+
+ @property
+ def is_mergeable(self):
+ return bool(self._pr_data["mergeable"])
+
+ def _get_jira(self):
+ if self.title.startswith("MINOR:"):
+ return None
+
+ jira_id = None
+ for project, regex in PR_TITLE_REGEXEN:
+ m = regex.search(self.title)
+ if m:
+ jira_id = m.group(1)
+ break
+
+ if jira_id is None:
+ options = ' or '.join('{0}-XXX'.format(project)
+ for project in SUPPORTED_PROJECTS)
+ self.cmd.fail("PR title should be prefixed by a jira id "
+ "{0}, but found {1}".format(options, self.title))
+
+ return JiraIssue(self.con, jira_id, project, self.cmd)
+
+ def merge(self):
+ """
+ merge the requested PR and return the merge hash
+ """
+ pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, self.number)
+ target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX,
+ self.number,
+ self.target_ref.upper())
+ run_cmd("git fetch %s pull/%s/head:%s" % (self.git_remote,
+ self.number,
+ pr_branch_name))
+ run_cmd("git fetch %s %s:%s" % (self.git_remote, self.target_ref,
+ target_branch_name))
+ run_cmd("git checkout %s" % target_branch_name)
+
+ had_conflicts = False
+ try:
+ run_cmd(['git', 'merge', pr_branch_name, '--ff', '--squash'])
+ except Exception as e:
+ msg = ("Error merging: %s\nWould you like to "
+ "manually fix-up this merge?" % e)
+ self.cmd.continue_maybe(msg)
+ msg = ("Okay, please fix any conflicts and 'git add' "
+ "conflicting files... Finished?")
+ self.cmd.continue_maybe(msg)
+ had_conflicts = True
+
+ commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
+ '--pretty=format:%an <%ae>']).split("\n")
+ distinct_authors = sorted(set(commit_authors),
+ key=lambda x: commit_authors.count(x),
+ reverse=True)
+
+ for i, author in enumerate(distinct_authors):
+ print("Author {}: {}".format(i + 1, author))
+
+ if len(distinct_authors) > 1:
+ primary_author, distinct_authors = get_primary_author(
+ self.cmd, distinct_authors)
+ else:
+ # If there is only one author, do not prompt for a lead author
+ primary_author = distinct_authors[0]
+
+ merge_message_flags = []
+
+ merge_message_flags += ["-m", self.title]
+ if self.body is not None:
+ merge_message_flags += ["-m", self.body]
+
+ committer_name = run_cmd("git config --get user.name").strip()
+ committer_email = run_cmd("git config --get user.email").strip()
+
+ authors = ("Authored-by:" if len(distinct_authors) == 1
+ else "Lead-authored-by:")
+ authors += " %s" % (distinct_authors.pop(0))
+ if len(distinct_authors) > 0:
+ authors += "\n" + "\n".join(["Co-authored-by: %s" % a
+ for a in distinct_authors])
+ authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name,
+ committer_email)
+
+ if had_conflicts:
+ committer_name = run_cmd("git config --get user.name").strip()
+ committer_email = run_cmd("git config --get user.email").strip()
+ message = ("This patch had conflicts when merged, "
+ "resolved by\nCommitter: %s <%s>" %
+ (committer_name, committer_email))
+ merge_message_flags += ["-m", message]
+
+ # The string "Closes #%s" string is required for GitHub to correctly
+ # close the PR
+ merge_message_flags += [
+ "-m",
+ "Closes #%s from %s"
+ % (self.number, self.description)]
+ merge_message_flags += ["-m", authors]
+
+ if DEBUG:
+ print("\n".join(merge_message_flags))
+
+ run_cmd(['git', 'commit',
+ '--no-verify', # do not run commit hooks
+ '--author="%s"' % primary_author] +
+ merge_message_flags)
+
+ self.cmd.continue_maybe("Merge complete (local ref %s). Push to %s?"
+ % (target_branch_name, self.git_remote))
+
+ try:
+ push_cmd = ('git push %s %s:%s' % (self.git_remote,
+ target_branch_name,
+ self.target_ref))
+ if DEBUG:
+ print(push_cmd)
+ else:
+ run_cmd(push_cmd)
+ except Exception as e:
+ clean_up()
+ self.cmd.fail("Exception while pushing: %s" % e)
+
+ merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8]
+ clean_up()
+ print("Pull request #%s merged!" % self.number)
+ print("Merge hash: %s" % merge_hash)
+ return merge_hash
+
+
+def get_primary_author(cmd, distinct_authors):
+ author_pat = re.compile(r'(.*) <(.*)>')
+
+ while True:
+ primary_author = cmd.prompt(
+ "Enter primary author in the format of "
+ "\"name <email>\" [%s]: " % distinct_authors[0])
+
+ if primary_author == "":
+ return distinct_authors[0], distinct_authors
+
+ if author_pat.match(primary_author):
+ break
+ print('Bad author "{}", please try again'.format(primary_author))
+
+ # When primary author is specified manually, de-dup it from
+ # author list and put it at the head of author list.
+ distinct_authors = [x for x in distinct_authors
+ if x != primary_author]
+ distinct_authors = [primary_author] + distinct_authors
+ return primary_author, distinct_authors
+
+
+def prompt_for_fix_version(cmd, jira_issue):
+ (all_versions,
+ default_fix_versions) = jira_issue.get_candidate_fix_versions()
+
+ default_fix_versions = ",".join(default_fix_versions)
+
+ issue_fix_versions = cmd.prompt("Enter comma-separated "
+ "fix version(s) [%s]: "
+ % default_fix_versions)
+ if issue_fix_versions == "":
+ issue_fix_versions = default_fix_versions
+ issue_fix_versions = issue_fix_versions.replace(" ", "").split(",")
+
+ def get_version_json(version_str):
+ return [x for x in all_versions if x.name == version_str][0].raw
+
+ return [get_version_json(v) for v in issue_fix_versions]
+
+
+CONFIG_FILE = "~/.config/arrow/merge.conf"
+
+
+def load_configuration():
+ config = configparser.ConfigParser()
+ config.read(os.path.expanduser(CONFIG_FILE))
+ return config
+
+
+def get_credentials(cmd):
+ username, password = None, None
+
+ config = load_configuration()
+ if "jira" in config.sections():
+ username = config["jira"].get("username")
+ password = config["jira"].get("password")
+
+ # Fallback to environment variables
+ if not username:
+ username = os.environ.get("APACHE_JIRA_USERNAME")
+
+ if not password:
+ password = os.environ.get("APACHE_JIRA_PASSWORD")
+
+ # Fallback to user tty prompt
+ if not username:
+ username = cmd.prompt("Env APACHE_JIRA_USERNAME not set, "
+ "please enter your JIRA username:")
+
+ if not password:
+ password = cmd.getpass("Env APACHE_JIRA_PASSWORD not set, "
+ "please enter your JIRA password:")
+
+ return (username, password)
+
+
+def connect_jira(cmd):
+ try:
+ return jira.client.JIRA(options={'server': JIRA_API_BASE},
+ basic_auth=get_credentials(cmd))
+ except jira.exceptions.JIRAError as e:
+ if "CAPTCHA_CHALLENGE" in e.text:
+ print("")
+ print("It looks like you need to answer a captcha challenge for "
+ "this account (probably due to a login attempt with an "
+ "incorrect password). Please log in at "
+ "https://issues.apache.org/jira and complete the captcha "
+ "before running this tool again.")
+ print("Exiting.")
+ sys.exit(1)
+ raise e
+
+
+def get_pr_num():
+ if len(sys.argv) == 2:
+ return sys.argv[1]
+
+ return input("Which pull request would you like to merge? (e.g. 34): ")
+
+
+def cli():
+ # Location of your Arrow git clone
+ ARROW_HOME = os.path.abspath(os.path.dirname(__file__))
+ PROJECT_NAME = os.environ.get('ARROW_PROJECT_NAME') or 'arrow'
+ print("ARROW_HOME = " + ARROW_HOME)
+ print("PROJECT_NAME = " + PROJECT_NAME)
+
+ cmd = CommandInput()
+
+ pr_num = get_pr_num()
+
+ os.chdir(ARROW_HOME)
+
+ github_api = GitHubAPI(PROJECT_NAME)
+
+ jira_con = connect_jira(cmd)
+ pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
+
+ if pr.is_merged:
+ print("Pull request %s has already been merged")
+ sys.exit(0)
+
+ if not pr.is_mergeable:
+ msg = ("Pull request %s is not mergeable in its current form.\n"
+ % pr_num + "Continue? (experts only!)")
+ cmd.continue_maybe(msg)
+
+ pr.show()
+
+ cmd.continue_maybe("Proceed with merging pull request #%s?" % pr_num)
+
+ # merged hash not used
+ pr.merge()
+
+ if pr.jira_issue is None:
+ print("Minor PR. No JIRA issue to update.\n")
+ return
+
+ cmd.continue_maybe("Would you like to update the associated JIRA?")
+ jira_comment = (
+ "Issue resolved by pull request %s\n[%s/%s]"
+ % (pr_num,
+ "https://github.com/apache/" + PROJECT_NAME + "/pull",
+ pr_num))
+
+ fix_versions_json = prompt_for_fix_version(cmd, pr.jira_issue)
+ pr.jira_issue.resolve(fix_versions_json, jira_comment)
+
+
+if __name__ == '__main__':
+ try:
+ cli()
+ except Exception:
+ raise
diff --git a/src/arrow/dev/merge_arrow_pr.sh b/src/arrow/dev/merge_arrow_pr.sh
new file mode 100755
index 000000000..5b55780dc
--- /dev/null
+++ b/src/arrow/dev/merge_arrow_pr.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper script that automatically creates a Python virtual environment
+# and runs merge_arrow_pr.py inside it.
+
+set -e
+
+PYTHON=$(which python3)
+PYVER=$($PYTHON -c "import sys; print('.'.join(map(str, sys.version_info[:2])))")
+
+GIT_ROOT=$(git rev-parse --show-toplevel)
+ENV_DIR=$GIT_ROOT/dev/.venv$PYVER
+
+ENV_PYTHON=$ENV_DIR/bin/python3
+ENV_PIP="$ENV_PYTHON -m pip --no-input"
+
+check_venv() {
+ [ -x $ENV_PYTHON ] || {
+ echo "Virtual environment broken: $ENV_PYTHON not an executable"
+ exit 1
+ }
+}
+
+create_venv() {
+ echo ""
+ echo "Creating Python virtual environment in $ENV_DIR ..."
+ echo ""
+ $PYTHON -m venv $ENV_DIR
+ $ENV_PIP install -q -r $GIT_ROOT/dev/requirements_merge_arrow_pr.txt || {
+ echo "Failed to setup virtual environment"
+ echo "Please delete directory '$ENV_DIR' and try again"
+ exit $?
+ }
+}
+
+[ -d $ENV_DIR ] || create_venv
+check_venv
+
+$ENV_PYTHON $GIT_ROOT/dev/merge_arrow_pr.py "$@"
diff --git a/src/arrow/dev/release/.env.example b/src/arrow/dev/release/.env.example
new file mode 100644
index 000000000..50c8ec8e6
--- /dev/null
+++ b/src/arrow/dev/release/.env.example
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The GPG key ID to sign artifacts. The GPG key ID must be registered
+# to both of the followings:
+#
+# * https://dist.apache.org/repos/dist/dev/arrow/KEYS
+# * https://dist.apache.org/repos/dist/release/arrow/KEYS
+#
+# See these files how to import your GPG key ID to these files.
+#
+# You must set this.
+#GPG_KEY_ID=08D3564B7C6A9CAFBFF6A66791D18FCF079F8007
+
+# The Artifactory API key to upload artifacts to Artifactory.
+#
+# You must set this.
+#ARTIFACTORY_API_KEY=secret
diff --git a/src/arrow/dev/release/.gitignore b/src/arrow/dev/release/.gitignore
new file mode 100644
index 000000000..f3d708a6a
--- /dev/null
+++ b/src/arrow/dev/release/.gitignore
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/.env
+/binary/id_rsa
+/binary/id_rsa.pub
+/binary/tmp/
diff --git a/src/arrow/dev/release/01-prepare-test.rb b/src/arrow/dev/release/01-prepare-test.rb
new file mode 100644
index 000000000..51665ec02
--- /dev/null
+++ b/src/arrow/dev/release/01-prepare-test.rb
@@ -0,0 +1,586 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class PrepareTest < Test::Unit::TestCase
+ include GitRunnable
+ include VersionDetectable
+
+ def setup
+ @current_commit = git_current_commit
+ detect_versions
+
+ top_dir = Pathname(__dir__).parent.parent
+ @original_git_repository = top_dir + ".git"
+ Dir.mktmpdir do |dir|
+ @test_git_repository = Pathname(dir) + "arrow"
+ git("clone", @original_git_repository.to_s, @test_git_repository.to_s)
+ Dir.chdir(@test_git_repository) do
+ @tag_name = "apache-arrow-#{@release_version}"
+ @release_branch = "testing-release-#{@release_version}-rc0"
+ git("checkout", "-b", @release_branch, @current_commit)
+ yield
+ end
+ FileUtils.rm_rf(@test_git_repository)
+ end
+ end
+
+ def omit_on_release_branch
+ omit("Not for release branch") if on_release_branch?
+ end
+
+ def prepare(*targets)
+ if targets.last.is_a?(Hash)
+ additional_env = targets.pop
+ else
+ additional_env = {}
+ end
+ env = { "PREPARE_DEFAULT" => "0" }
+ targets.each do |target|
+ env["PREPARE_#{target}"] = "1"
+ end
+ env = env.merge(additional_env)
+ sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0")
+ end
+
+ def bump_versions(*targets)
+ env = { "BUMP_DEFAULT" => "0" }
+ targets.each do |target|
+ env["BUMP_#{target}"] = "1"
+ end
+ sh(env, "dev/release/post-12-bump-versions.sh", @release_version,
+ @next_version)
+ end
+
+ def parse_patch(patch)
+ diffs = []
+ in_hunk = false
+ patch.each_line do |line|
+ case line
+ when /\A--- a\//
+ path = $POSTMATCH.chomp
+ diffs << { path: path, hunks: [] }
+ in_hunk = false
+ when /\A@@/
+ in_hunk = true
+ diffs.last[:hunks] << []
+ when /\A[-+]/
+ next unless in_hunk
+ diffs.last[:hunks].last << line.chomp
+ end
+ end
+ diffs.sort_by do |diff|
+ diff[:path]
+ end
+ end
+
+ def test_linux_packages
+ user = "Arrow Developers"
+ email = "dev@arrow.apache.org"
+ prepare("LINUX_PACKAGES", "DEBFULLNAME" => user, "DEBEMAIL" => email)
+ changes = parse_patch(git("log", "-n", "1", "-p"))
+ sampled_changes = changes.collect do |change|
+ {
+ path: change[:path],
+ sampled_hunks: change[:hunks].collect(&:first),
+ }
+ end
+ base_dir = "dev/tasks/linux-packages"
+ today = Time.now.utc.strftime("%a %b %d %Y")
+ expected_changes = [
+ {
+ path: "#{base_dir}/apache-arrow-apt-source/debian/changelog",
+ sampled_hunks: [
+ "+apache-arrow-apt-source (#{@release_version}-1) " +
+ "unstable; urgency=low",
+ ],
+ },
+ {
+ path: "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in",
+ sampled_hunks: [
+ "+* #{today} #{user} <#{email}> - #{@release_version}-1",
+ ],
+ },
+ {
+ path: "#{base_dir}/apache-arrow/debian/changelog",
+ sampled_hunks: [
+ "+apache-arrow (#{@release_version}-1) unstable; urgency=low",
+ ],
+ },
+ {
+ path: "#{base_dir}/apache-arrow/yum/arrow.spec.in",
+ sampled_hunks: [
+ "+* #{today} #{user} <#{email}> - #{@release_version}-1",
+ ],
+ },
+ ]
+ assert_equal(expected_changes, sampled_changes)
+ end
+
+ def test_version_pre_tag
+ omit_on_release_branch
+ prepare("VERSION_PRE_TAG")
+ assert_equal([
+ {
+ path: "c_glib/meson.build",
+ hunks: [
+ ["-version = '#{@snapshot_version}'",
+ "+version = '#{@release_version}'"],
+ ],
+ },
+ {
+ path: "ci/scripts/PKGBUILD",
+ hunks: [
+ ["-pkgver=#{@previous_version}.9000",
+ "+pkgver=#{@release_version}"],
+ ],
+ },
+ {
+ path: "cpp/CMakeLists.txt",
+ hunks: [
+ ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
+ "+set(ARROW_VERSION \"#{@release_version}\")"],
+ ],
+ },
+ {
+ path: "cpp/vcpkg.json",
+ hunks: [
+ ["- \"version-string\": \"#{@snapshot_version}\",",
+ "+ \"version-string\": \"#{@release_version}\","],
+ ],
+ },
+ {
+ path: "csharp/Directory.Build.props",
+ hunks: [
+ ["- <Version>#{@snapshot_version}</Version>",
+ "+ <Version>#{@release_version}</Version>"],
+ ],
+ },
+ {
+ path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
+ hunks: [
+ ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
+ "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
+ ],
+ },
+ {
+ path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
+ hunks: [
+ ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
+ "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
+ ],
+ },
+ {
+ path: "java/adapter/avro/pom.xml",
+ hunks: [
+ ["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"],
+ ],
+ },
+ {
+ hunks: [
+ ["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"],
+ ],
+ path: "java/adapter/jdbc/pom.xml",
+ },
+ {
+ hunks: [
+ ["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"],
+ ],
+ path: "java/adapter/orc/pom.xml",
+ },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/algorithm/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/c/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/compression/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/dataset/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/flight/flight-core/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/flight/flight-grpc/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@release_version}</version>"]],
+ path: "java/format/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/gandiva/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/memory/memory-core/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/memory/memory-netty/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/memory/memory-unsafe/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/memory/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"],
+ ["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/performance/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/plasma/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@release_version}</version>"]],
+ path: "java/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/tools/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@release_version}</version>"]],
+ path: "java/vector/pom.xml" },
+ {
+ path: "js/package.json",
+ hunks: [
+ ["- \"version\": \"#{@snapshot_version}\"",
+ "+ \"version\": \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "matlab/CMakeLists.txt",
+ hunks: [
+ ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
+ "+set(MLARROW_VERSION \"#{@release_version}\")"],
+ ],
+ },
+ {
+ path: "python/setup.py",
+ hunks: [
+ ["-default_version = '#{@snapshot_version}'",
+ "+default_version = '#{@release_version}'"],
+ ],
+ },
+ {
+ path: "r/DESCRIPTION",
+ hunks: [
+ ["-Version: #{@previous_version}.9000",
+ "+Version: #{@release_version}"],
+ ],
+ },
+ {
+ path: "r/NEWS.md",
+ hunks: [
+ ["-\# arrow #{@previous_version}.9000",
+ "+\# arrow #{@release_version}"],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow/lib/arrow/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-gandiva/lib/gandiva/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-parquet/lib/parquet/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-plasma/lib/plasma/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@release_version}\""],
+ ],
+ },
+ ],
+ parse_patch(git("log", "-n", "1", "-p")))
+ end
+
+ def test_version_post_tag
+ omit_on_release_branch
+ bump_versions("VERSION_POST_TAG")
+ assert_equal([
+ {
+ path: "c_glib/meson.build",
+ hunks: [
+ ["-version = '#{@snapshot_version}'",
+ "+version = '#{@next_snapshot_version}'"],
+ ],
+ },
+ {
+ path: "ci/scripts/PKGBUILD",
+ hunks: [
+ ["-pkgver=#{@previous_version}.9000",
+ "+pkgver=#{@release_version}.9000"],
+ ],
+ },
+ {
+ path: "cpp/CMakeLists.txt",
+ hunks: [
+ ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
+ "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
+ ],
+ },
+ {
+ path: "cpp/vcpkg.json",
+ hunks: [
+ ["- \"version-string\": \"#{@snapshot_version}\",",
+ "+ \"version-string\": \"#{@next_snapshot_version}\","],
+ ],
+ },
+ {
+ path: "csharp/Directory.Build.props",
+ hunks: [
+ ["- <Version>#{@snapshot_version}</Version>",
+ "+ <Version>#{@next_snapshot_version}</Version>"],
+ ],
+ },
+ {
+ path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
+ hunks: [
+ ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
+ "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""],
+ ],
+ },
+ {
+ path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
+ hunks: [
+ ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
+ "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""],
+ ],
+ },
+ { path: "java/adapter/avro/pom.xml",
+ hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]] },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/adapter/jdbc/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/adapter/orc/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/algorithm/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/c/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/compression/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/dataset/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/flight/flight-core/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/flight/flight-grpc/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/format/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/gandiva/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/memory/memory-core/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/memory/memory-netty/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/memory/memory-unsafe/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/memory/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"],
+ ["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/performance/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/plasma/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/tools/pom.xml" },
+ { hunks: [["- <version>#{@snapshot_version}</version>",
+ "+ <version>#{@next_snapshot_version}</version>"]],
+ path: "java/vector/pom.xml" },
+ {
+ path: "js/package.json",
+ hunks: [
+ ["- \"version\": \"#{@snapshot_version}\"",
+ "+ \"version\": \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "matlab/CMakeLists.txt",
+ hunks: [
+ ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
+ "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
+ ],
+ },
+ {
+ path: "python/setup.py",
+ hunks: [
+ ["-default_version = '#{@snapshot_version}'",
+ "+default_version = '#{@next_snapshot_version}'"],
+ ],
+ },
+ {
+ path: "r/DESCRIPTION",
+ hunks: [
+ ["-Version: #{@previous_version}.9000",
+ "+Version: #{@release_version}.9000"],
+ ],
+ },
+ {
+ path: "r/NEWS.md",
+ hunks: [
+ ["-# arrow #{@previous_version}.9000",
+ "+# arrow #{@release_version}.9000",
+ "+",
+ "+# arrow #{@release_version}",],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-arrow/lib/arrow/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-gandiva/lib/gandiva/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-parquet/lib/parquet/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ {
+ path: "ruby/red-plasma/lib/plasma/version.rb",
+ hunks: [
+ ["- VERSION = \"#{@snapshot_version}\"",
+ "+ VERSION = \"#{@next_snapshot_version}\""],
+ ],
+ },
+ ],
+ parse_patch(git("log", "-n", "1", "-p")))
+ end
+
+ def test_deb_package_names
+ bump_versions("DEB_PACKAGE_NAMES")
+ changes = parse_patch(git("log", "-n", "1", "-p"))
+ sampled_changes = changes.collect do |change|
+ first_hunk = change[:hunks][0]
+ first_removed_line = first_hunk.find { |line| line.start_with?("-") }
+ first_added_line = first_hunk.find { |line| line.start_with?("+") }
+ {
+ sampled_diff: [first_removed_line, first_added_line],
+ path: change[:path],
+ }
+ end
+ expected_changes = [
+ {
+ sampled_diff: [
+ "-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@so_version}.install",
+ "+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@next_so_version}.install",
+ ],
+ path: "dev/release/rat_exclude_files.txt",
+ },
+ {
+ sampled_diff: [
+ "-Package: libarrow#{@so_version}",
+ "+Package: libarrow#{@next_so_version}",
+ ],
+ path: "dev/tasks/linux-packages/apache-arrow/debian/control.in",
+ },
+ {
+ sampled_diff: [
+ "- - libarrow-dataset-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+ "+ - libarrow-dataset-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+ ],
+ path: "dev/tasks/tasks.yml",
+ },
+ ]
+ assert_equal(expected_changes, sampled_changes)
+ end
+end
diff --git a/src/arrow/dev/release/01-prepare.sh b/src/arrow/dev/release/01-prepare.sh
new file mode 100755
index 000000000..b1e917390
--- /dev/null
+++ b/src/arrow/dev/release/01-prepare.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 3 ]; then
+ echo "Usage: $0 <version> <next_version> <rc-num>"
+ exit 1
+fi
+
+. $SOURCE_DIR/utils-prepare.sh
+
+version=$1
+next_version=$2
+next_version_snapshot="${next_version}-SNAPSHOT"
+rc_number=$3
+
+release_tag="apache-arrow-${version}"
+release_branch="release-${version}"
+release_candidate_branch="release-${version}-rc${rc_number}"
+
+: ${PREPARE_DEFAULT:=1}
+: ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}}
+: ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}}
+: ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}}
+: ${PREPARE_BRANCH:=${PREPARE_DEFAULT}}
+: ${PREPARE_TAG:=${PREPARE_DEFAULT}}
+
+if [ ${PREPARE_TAG} -gt 0 ]; then
+ if [ $(git tag -l "${release_tag}") ]; then
+ echo "Delete existing git tag $release_tag"
+ git tag -d "${release_tag}"
+ fi
+fi
+
+if [ ${PREPARE_BRANCH} -gt 0 ]; then
+ if [[ $(git branch -l "${release_candidate_branch}") ]]; then
+ next_rc_number=$(($rc_number+1))
+ echo "Branch ${release_candidate_branch} already exists, so create a new release candidate:"
+ echo "1. Checkout the master branch for major releases and maint-<version> for patch releases."
+ echo "2. Execute the script again with bumped RC number."
+ echo "Commands:"
+ echo " git checkout master"
+ echo " dev/release/01-prepare.sh ${version} ${next_version} ${next_rc_number}"
+ exit 1
+ fi
+
+ echo "Create local branch ${release_candidate_branch} for release candidate ${rc_number}"
+ git checkout -b ${release_candidate_branch}
+fi
+
+############################## Pre-Tag Commits ##############################
+
+if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
+ echo "Updating changelog for $version"
+ # Update changelog
+ archery release changelog add $version
+ git add ${SOURCE_DIR}/../../CHANGELOG.md
+ git commit -m "[Release] Update CHANGELOG.md for $version"
+fi
+
+if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
+ echo "Updating .deb/.rpm changelogs for $version"
+ cd $SOURCE_DIR/../tasks/linux-packages
+ rake \
+ version:update \
+ ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \
+ ARROW_VERSION=${version}
+ git add */debian*/changelog */yum/*.spec.in
+ git commit -m "[Release] Update .deb/.rpm changelogs for $version"
+ cd -
+fi
+
+if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then
+ echo "Prepare release ${version} on tag ${release_tag} then reset to version ${next_version_snapshot}"
+
+ update_versions "${version}" "${next_version}" "release"
+ git commit -m "[Release] Update versions for ${version}"
+fi
+
+############################## Tag the Release ##############################
+
+if [ ${PREPARE_TAG} -gt 0 ]; then
+ git tag -a "${release_tag}" -m "[Release] Apache Arrow Release ${version}"
+fi
diff --git a/src/arrow/dev/release/02-source-test.rb b/src/arrow/dev/release/02-source-test.rb
new file mode 100644
index 000000000..652d4c07f
--- /dev/null
+++ b/src/arrow/dev/release/02-source-test.rb
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class SourceTest < Test::Unit::TestCase
+ include GitRunnable
+ include VersionDetectable
+
+ def setup
+ @current_commit = git_current_commit
+ detect_versions
+ @tag_name = "apache-arrow-#{@release_version}"
+ @script = File.expand_path("dev/release/02-source.sh")
+
+ Dir.mktmpdir do |dir|
+ Dir.chdir(dir) do
+ yield
+ end
+ end
+ end
+
+ def source(*targets)
+ env = {
+ "SOURCE_DEFAULT" => "0",
+ "release_hash" => @current_commit,
+ }
+ targets.each do |target|
+ env["SOURCE_#{target}"] = "1"
+ end
+ output = sh(env, @script, @release_version, "0")
+ sh("tar", "xf", "#{@tag_name}.tar.gz")
+ output
+ end
+
+ def test_symbolic_links
+ source
+ Dir.chdir(@tag_name) do
+ assert_equal([],
+ Find.find(".").find_all {|path| File.symlink?(path)})
+ end
+ end
+
+ def test_csharp_git_commit_information
+ source
+ Dir.chdir("#{@tag_name}/csharp") do
+ FileUtils.mv("dummy.git", "../.git")
+ sh("dotnet", "pack", "-c", "Release")
+ FileUtils.mv("../.git", "dummy.git")
+ Dir.chdir("artifacts/Apache.Arrow/Release") do
+ sh("unzip", "Apache.Arrow.#{@snapshot_version}.nupkg")
+ FileUtils.chmod(0400, "Apache.Arrow.nuspec")
+ nuspec = REXML::Document.new(File.read("Apache.Arrow.nuspec"))
+ nuspec_repository = nuspec.elements["package/metadata/repository"]
+ attributes = {}
+ nuspec_repository.attributes.each do |key, value|
+ attributes[key] = value
+ end
+ assert_equal({
+ "type" => "git",
+ "url" => "https://github.com/apache/arrow",
+ "commit" => @current_commit,
+ },
+ attributes)
+ end
+ end
+ end
+
+ def test_python_version
+ source
+ Dir.chdir("#{@tag_name}/python") do
+ sh("python3", "setup.py", "sdist")
+ if on_release_branch?
+ pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz"
+ else
+ pyarrow_source_archive = "dist/pyarrow-#{@release_version}a0.tar.gz"
+ end
+ assert_equal([pyarrow_source_archive],
+ Dir.glob("dist/pyarrow-*.tar.gz"))
+ end
+ end
+
+ def test_vote
+ jira_url = "https://issues.apache.org/jira"
+ jql_conditions = [
+ "project = ARROW",
+ "status in (Resolved, Closed)",
+ "fixVersion = #{@release_version}",
+ ]
+ jql = jql_conditions.join(" AND ")
+ n_resolved_issues = nil
+ search_url = URI("#{jira_url}/rest/api/2/search?jql=#{CGI.escape(jql)}")
+ search_url.open do |response|
+ n_resolved_issues = JSON.parse(response.read)["total"]
+ end
+ output = source("VOTE")
+ assert_equal(<<-VOTE.strip, output[/^-+$(.+?)^-+$/m, 1].strip)
+To: dev@arrow.apache.org
+Subject: [VOTE] Release Apache Arrow #{@release_version} - RC0
+
+Hi,
+
+I would like to propose the following release candidate (RC0) of Apache
+Arrow version #{@release_version}. This is a release consisting of #{n_resolved_issues}
+resolved JIRA issues[1].
+
+This release candidate is based on commit:
+#{@current_commit} [2]
+
+The source release rc0 is hosted at [3].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
+
+Please download, verify checksums and signatures, run the unit tests,
+and vote on the release. See [11] for how to validate a release candidate.
+
+The vote will be open for at least 72 hours.
+
+[ ] +1 Release this as Apache Arrow #{@release_version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow #{@release_version} because...
+
+[1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version}
+[2]: https://github.com/apache/arrow/tree/#{@current_commit}
+[3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+ VOTE
+ end
+end
diff --git a/src/arrow/dev/release/02-source.sh b/src/arrow/dev/release/02-source.sh
new file mode 100755
index 000000000..156eccc1b
--- /dev/null
+++ b/src/arrow/dev/release/02-source.sh
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+: ${SOURCE_DEFAULT:=1}
+: ${SOURCE_RAT:=${SOURCE_DEFAULT}}
+: ${SOURCE_UPLOAD:=${SOURCE_DEFAULT}}
+: ${SOURCE_VOTE:=${SOURCE_DEFAULT}}
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+tag=apache-arrow-${version}
+tagrc=${tag}-rc${rc}
+rc_url="https://dist.apache.org/repos/dist/dev/arrow/${tagrc}"
+
+echo "Preparing source for tag ${tag}"
+
+: ${release_hash:=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag})}
+
+if [ ${SOURCE_UPLOAD} -gt 0 ]; then
+ if [ -z "$release_hash" ]; then
+ echo "Cannot continue: unknown git tag: $tag"
+ exit
+ fi
+fi
+
+echo "Using commit $release_hash"
+
+tarball=${tag}.tar.gz
+
+rm -rf ${tag}
+# be conservative and use the release hash, even though git produces the same
+# archive (identical hashes) using the scm tag
+(cd "${SOURCE_TOP_DIR}" && \
+ git archive ${release_hash} --prefix ${tag}/) | \
+ tar xf -
+
+# Resolve all hard and symbolic links
+rm -rf ${tag}.tmp
+mv ${tag} ${tag}.tmp
+cp -R -L ${tag}.tmp ${tag}
+rm -rf ${tag}.tmp
+
+# Create a dummy .git/ directory to download the source files from GitHub with Source Link in C#.
+dummy_git=${tag}/csharp/dummy.git
+mkdir ${dummy_git}
+pushd ${dummy_git}
+echo ${release_hash} > HEAD
+echo '[remote "origin"] url = https://github.com/apache/arrow.git' >> config
+mkdir objects refs
+popd
+
+# Create new tarball from modified source directory
+tar czf ${tarball} ${tag}
+rm -rf ${tag}
+
+if [ ${SOURCE_RAT} -gt 0 ]; then
+ "${SOURCE_DIR}/run-rat.sh" ${tarball}
+fi
+
+if [ ${SOURCE_UPLOAD} -gt 0 ]; then
+ # sign the archive
+ gpg --armor --output ${tarball}.asc --detach-sig ${tarball}
+ shasum -a 256 $tarball > ${tarball}.sha256
+ shasum -a 512 $tarball > ${tarball}.sha512
+
+ # check out the arrow RC folder
+ svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow tmp
+
+ # add the release candidate for the tag
+ mkdir -p tmp/${tagrc}
+
+ # copy the rc tarball into the tmp dir
+ cp ${tarball}* tmp/${tagrc}
+
+ # commit to svn
+ svn add tmp/${tagrc}
+ svn ci -m "Apache Arrow ${version} RC${rc}" tmp/${tagrc}
+
+ # clean up
+ rm -rf tmp
+
+ echo "Success! The release candidate is available here:"
+ echo " ${rc_url}"
+ echo ""
+ echo "Commit SHA1: ${release_hash}"
+ echo ""
+fi
+
+if [ ${SOURCE_VOTE} -gt 0 ]; then
+ echo "The following draft email has been created to send to the"
+ echo "dev@arrow.apache.org mailing list"
+ echo ""
+ echo "---------------------------------------------------------"
+ jira_url="https://issues.apache.org/jira"
+ jql="project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20${version}"
+ n_resolved_issues=$(curl "${jira_url}/rest/api/2/search/?jql=${jql}" | jq ".total")
+ cat <<MAIL
+To: dev@arrow.apache.org
+Subject: [VOTE] Release Apache Arrow ${version} - RC${rc}
+
+Hi,
+
+I would like to propose the following release candidate (RC${rc}) of Apache
+Arrow version ${version}. This is a release consisting of ${n_resolved_issues}
+resolved JIRA issues[1].
+
+This release candidate is based on commit:
+${release_hash} [2]
+
+The source release rc${rc} is hosted at [3].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
+
+Please download, verify checksums and signatures, run the unit tests,
+and vote on the release. See [11] for how to validate a release candidate.
+
+The vote will be open for at least 72 hours.
+
+[ ] +1 Release this as Apache Arrow ${version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow ${version} because...
+
+[1]: ${jira_url}/issues/?jql=${jql}
+[2]: https://github.com/apache/arrow/tree/${release_hash}
+[3]: ${rc_url}
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc}
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+MAIL
+ echo "---------------------------------------------------------"
+fi
diff --git a/src/arrow/dev/release/03-binary-submit.sh b/src/arrow/dev/release/03-binary-submit.sh
new file mode 100755
index 000000000..b22a54bfd
--- /dev/null
+++ b/src/arrow/dev/release/03-binary-submit.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
+release_tag="apache-arrow-${version}"
+
+: ${ARROW_REPOSITORY:="apache/arrow"}
+: ${ARROW_BRANCH:=$release_tag}
+
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+archery crossbow submit \
+ --no-fetch \
+ --job-prefix ${crossbow_job_prefix} \
+ --arrow-version ${version_with_rc} \
+ --arrow-remote "https://github.com/${ARROW_REPOSITORY}" \
+ --arrow-branch ${ARROW_BRANCH} \
+ --group packaging
diff --git a/src/arrow/dev/release/04-binary-download.sh b/src/arrow/dev/release/04-binary-download.sh
new file mode 100755
index 000000000..b433a3f9c
--- /dev/null
+++ b/src/arrow/dev/release/04-binary-download.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
+
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
+
+archery crossbow download-artifacts ${CROSSBOW_JOB_ID} --no-fetch
diff --git a/src/arrow/dev/release/05-binary-upload.sh b/src/arrow/dev/release/05-binary-upload.sh
new file mode 100755
index 000000000..5a30fc8bd
--- /dev/null
+++ b/src/arrow/dev/release/05-binary-upload.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+version_with_rc="${version}-rc${rc}"
+crossbow_job_prefix="release-${version_with_rc}"
+crossbow_package_dir="${SOURCE_DIR}/../../packages"
+
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
+artifact_dir="${crossbow_package_dir}/${CROSSBOW_JOB_ID}"
+
+if [ ! -e "$artifact_dir" ]; then
+ echo "$artifact_dir does not exist"
+ exit 1
+fi
+
+if [ ! -d "$artifact_dir" ]; then
+ echo "$artifact_dir is not a directory"
+ exit 1
+fi
+
+cd "${SOURCE_DIR}"
+
+if [ ! -f .env ]; then
+ echo "You must create $(pwd)/.env"
+ echo "You can use $(pwd)/.env.example as template"
+ exit 1
+fi
+. .env
+
+. utils-binary.sh
+
+# By default upload all artifacts.
+# To deactivate one category, deactivate the category and all of its dependents.
+# To explicitly select one category, set UPLOAD_DEFAULT=0 UPLOAD_X=1.
+: ${UPLOAD_DEFAULT:=1}
+: ${UPLOAD_ALMALINUX:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_AMAZON_LINUX:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_CENTOS:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_DEBIAN:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_NUGET:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_PYTHON:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_UBUNTU:=${UPLOAD_DEFAULT}}
+
+rake_tasks=()
+apt_targets=()
+yum_targets=()
+if [ ${UPLOAD_ALMALINUX} -gt 0 ]; then
+ rake_tasks+=(yum:rc)
+ yum_targets+=(almalinux)
+fi
+if [ ${UPLOAD_AMAZON_LINUX} -gt 0 ]; then
+ rake_tasks+=(yum:rc)
+ yum_targets+=(amazon-linux)
+fi
+if [ ${UPLOAD_CENTOS} -gt 0 ]; then
+ rake_tasks+=(yum:rc)
+ yum_targets+=(centos)
+fi
+if [ ${UPLOAD_DEBIAN} -gt 0 ]; then
+ rake_tasks+=(apt:rc)
+ apt_targets+=(debian)
+fi
+if [ ${UPLOAD_NUGET} -gt 0 ]; then
+ rake_tasks+=(nuget:rc)
+fi
+if [ ${UPLOAD_PYTHON} -gt 0 ]; then
+ rake_tasks+=(python:rc)
+fi
+if [ ${UPLOAD_UBUNTU} -gt 0 ]; then
+ rake_tasks+=(apt:rc)
+ apt_targets+=(ubuntu)
+fi
+rake_tasks+=(summary:rc)
+
+tmp_dir=binary/tmp
+mkdir -p "${tmp_dir}"
+source_artifacts_dir="${tmp_dir}/artifacts"
+rm -rf "${source_artifacts_dir}"
+cp -a "${artifact_dir}" "${source_artifacts_dir}"
+
+docker_run \
+ ./runner.sh \
+ rake \
+ "${rake_tasks[@]}" \
+ APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+ ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
+ ARTIFACTS_DIR="${tmp_dir}/artifacts" \
+ RC=${rc} \
+ STAGING=${STAGING:-no} \
+ VERSION=${version} \
+ YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/src/arrow/dev/release/README.md b/src/arrow/dev/release/README.md
new file mode 100644
index 000000000..0a9cc3e04
--- /dev/null
+++ b/src/arrow/dev/release/README.md
@@ -0,0 +1,24 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+## Release management scripts
+
+To learn more, see the project wiki:
+
+https://cwiki.apache.org/confluence/display/ARROW/Release+Management+Guide
diff --git a/src/arrow/dev/release/Rakefile b/src/arrow/dev/release/Rakefile
new file mode 100644
index 000000000..ff57bad5e
--- /dev/null
+++ b/src/arrow/dev/release/Rakefile
@@ -0,0 +1,37 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "binary-task"
+
+if File.exist?(".env")
+ File.open(".env") do |env|
+ env.each_line do |line|
+ case line.strip
+ when /\A#/
+ when /\A([^=]+)=(.*)\z/
+ key = $1
+ value = $2
+ ENV[key] ||= value
+ end
+ end
+ end
+end
+
+binary_task = BinaryTask.new
+binary_task.define
diff --git a/src/arrow/dev/release/VERIFY.md b/src/arrow/dev/release/VERIFY.md
new file mode 100644
index 000000000..5b441ac13
--- /dev/null
+++ b/src/arrow/dev/release/VERIFY.md
@@ -0,0 +1,76 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Verifying Arrow releases
+
+## Windows
+
+We've provided a convenience script for verifying the C++ and Python builds on
+Windows. Read the comments in `verify-release-candidate.bat` for instructions.
+
+## Linux and macOS
+
+We've provided a convenience script for verifying the C++, Python, C
+GLib, Java and JavaScript builds on Linux and macOS. Read the comments in
+`verify-release-candidate.sh` for instructions.
+
+### C GLib
+
+You need the followings to verify C GLib build:
+
+ * GLib
+ * GObject Introspection
+ * Ruby (not EOL-ed version is required)
+ * gobject-introspection gem
+ * test-unit gem
+
+You can install them by the followings on Debian GNU/Linux and Ubuntu:
+
+```console
+% sudo apt install -y -V libgirepository1.0-dev ruby-dev
+% sudo gem install gobject-introspection test-unit
+```
+
+You can install them by the followings on CentOS:
+
+```console
+% sudo yum install -y gobject-introspection-devel
+% git clone https://github.com/sstephenson/rbenv.git ~/.rbenv
+% git clone https://github.com/sstephenson/ruby-build.git ~/.rbenv/plugins/ruby-build
+% echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bash_profile
+% echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
+% exec ${SHELL} --login
+% sudo yum install -y gcc make patch openssl-devel readline-devel zlib-devel
+% rbenv install 2.4.2
+% rbenv global 2.4.2
+% gem install gobject-introspection test-unit
+```
+
+You can install them by the followings on macOS:
+
+```console
+% brew install -y gobject-introspection
+% gem install gobject-introspection test-unit
+```
+
+You need to set `PKG_CONFIG_PATH` to find libffi on macOS:
+
+```console
+% export PKG_CONFIG_PATH=$(brew --prefix libffi)/lib/pkgconfig:$PKG_CONFIG_PATH
+```
diff --git a/src/arrow/dev/release/binary-task.rb b/src/arrow/dev/release/binary-task.rb
new file mode 100644
index 000000000..5f88e477e
--- /dev/null
+++ b/src/arrow/dev/release/binary-task.rb
@@ -0,0 +1,1910 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "cgi/util"
+require "digest/sha2"
+require "io/console"
+require "json"
+require "net/http"
+require "pathname"
+require "tempfile"
+require "thread"
+require "time"
+
+begin
+ require "apt-dists-merge"
+rescue LoadError
+ warn("apt-dists-merge is needed for apt:* tasks")
+end
+
+class BinaryTask
+ include Rake::DSL
+
+ class ThreadPool
+ def initialize(use_case, &worker)
+ @n_workers = choose_n_workers(use_case)
+ @worker = worker
+ @jobs = Thread::Queue.new
+ @workers = @n_workers.times.collect do
+ Thread.new do
+ loop do
+ job = @jobs.pop
+ break if job.nil?
+ @worker.call(job)
+ end
+ end
+ end
+ end
+
+ def <<(job)
+ @jobs << job
+ end
+
+ def join
+ @n_workers.times do
+ @jobs << nil
+ end
+ @workers.each(&:join)
+ end
+
+ private
+ def choose_n_workers(use_case)
+ case use_case
+ when :artifactory
+ # Too many workers cause Artifactory error.
+ 6
+ when :gpg
+ # Too many workers cause gpg-agent error.
+ 2
+ else
+ raise "Unknown use case: #{use_case}"
+ end
+ end
+ end
+
+ class ProgressReporter
+ def initialize(label, count_max=0)
+ @label = label
+ @count_max = count_max
+
+ @mutex = Thread::Mutex.new
+
+ @time_start = Time.now
+ @time_previous = Time.now
+ @count_current = 0
+ @count_previous = 0
+ end
+
+ def advance
+ @mutex.synchronize do
+ @count_current += 1
+
+ return if @count_max.zero?
+
+ time_current = Time.now
+ if time_current - @time_previous <= 1
+ return
+ end
+
+ show_progress(time_current)
+ end
+ end
+
+ def increment_max
+ @mutex.synchronize do
+ @count_max += 1
+ show_progress(Time.now) if @count_max == 1
+ end
+ end
+
+ def finish
+ @mutex.synchronize do
+ return if @count_max.zero?
+ show_progress(Time.now)
+ $stderr.puts
+ end
+ end
+
+ private
+ def show_progress(time_current)
+ n_finishes = @count_current - @count_previous
+ throughput = n_finishes.to_f / (time_current - @time_previous)
+ @time_previous = time_current
+ @count_previous = @count_current
+
+ message = build_message(time_current, throughput)
+ $stderr.print("\r#{message}") if message
+ end
+
+ def build_message(time_current, throughput)
+ percent = (@count_current / @count_max.to_f) * 100
+ formatted_count = "[%s/%s]" % [
+ format_count(@count_current),
+ format_count(@count_max),
+ ]
+ elapsed_second = time_current - @time_start
+ if throughput.zero?
+ rest_second = 0
+ else
+ rest_second = (@count_max - @count_current) / throughput
+ end
+ separator = " - "
+ progress = "%5.1f%% %s %s %s %s" % [
+ percent,
+ formatted_count,
+ format_time_interval(elapsed_second),
+ format_time_interval(rest_second),
+ format_throughput(throughput),
+ ]
+ label = @label
+
+ width = guess_terminal_width
+ return "#{label}#{separator}#{progress}" if width.nil?
+
+ return nil if progress.size > width
+
+ label_width = width - progress.size - separator.size
+ if label.size > label_width
+ ellipsis = "..."
+ shorten_label_width = label_width - ellipsis.size
+ if shorten_label_width < 1
+ return progress
+ else
+ label = label[0, shorten_label_width] + ellipsis
+ end
+ end
+ "#{label}#{separator}#{progress}"
+ end
+
+ def format_count(count)
+ "%d" % count
+ end
+
+ def format_time_interval(interval)
+ if interval < 60
+ "00:00:%02d" % interval
+ elsif interval < (60 * 60)
+ minute, second = interval.divmod(60)
+ "00:%02d:%02d" % [minute, second]
+ elsif interval < (60 * 60 * 24)
+ minute, second = interval.divmod(60)
+ hour, minute = minute.divmod(60)
+ "%02d:%02d:%02d" % [hour, minute, second]
+ else
+ minute, second = interval.divmod(60)
+ hour, minute = minute.divmod(60)
+ day, hour = hour.divmod(24)
+ "%dd %02d:%02d:%02d" % [day, hour, minute, second]
+ end
+ end
+
+ def format_throughput(throughput)
+ "%2d/s" % throughput
+ end
+
+ def guess_terminal_width
+ guess_terminal_width_from_io ||
+ guess_terminal_width_from_command ||
+ guess_terminal_width_from_env ||
+ 80
+ end
+
+ def guess_terminal_width_from_io
+ if IO.respond_to?(:console) and IO.console
+ IO.console.winsize[1]
+ elsif $stderr.respond_to?(:winsize)
+ begin
+ $stderr.winsize[1]
+ rescue SystemCallError
+ nil
+ end
+ else
+ nil
+ end
+ end
+
+ def guess_terminal_width_from_command
+ IO.pipe do |input, output|
+ begin
+ pid = spawn("tput", "cols", {:out => output, :err => output})
+ rescue SystemCallError
+ return nil
+ end
+
+ output.close
+ _, status = Process.waitpid2(pid)
+ return nil unless status.success?
+
+ result = input.read.chomp
+ begin
+ Integer(result, 10)
+ rescue ArgumentError
+ nil
+ end
+ end
+ end
+
+ def guess_terminal_width_from_env
+ env = ENV["COLUMNS"] || ENV["TERM_WIDTH"]
+ return nil if env.nil?
+
+ begin
+ Integer(env, 10)
+ rescue ArgumentError
+ nil
+ end
+ end
+ end
+
+ class ArtifactoryClient
+ class Error < StandardError
+ attr_reader :request
+ attr_reader :response
+ def initialize(request, response, message)
+ @request = request
+ @response = response
+ super(message)
+ end
+ end
+
+ def initialize(prefix, api_key)
+ @prefix = prefix
+ @api_key = api_key
+ @http = nil
+ restart
+ end
+
+ def restart
+ close
+ @http = start_http(build_url(""))
+ end
+
+ private def start_http(url, &block)
+ http = Net::HTTP.new(url.host, url.port)
+ http.set_debug_output($stderr) if ENV["DEBUG"]
+ http.use_ssl = true
+ if block_given?
+ http.start(&block)
+ else
+ http
+ end
+ end
+
+ def close
+ return if @http.nil?
+ @http.finish if @http.started?
+ @http = nil
+ end
+
+ def request(method, headers, url, body: nil, &block)
+ request = build_request(method, url, headers, body: body)
+ if ENV["DRY_RUN"]
+ case request
+ when Net::HTTP::Get, Net::HTTP::Head
+ else
+ p [method, url]
+ return
+ end
+ end
+ request_internal(@http, request, &block)
+ end
+
+ private def request_internal(http, request, &block)
+ http.request(request) do |response|
+ case response
+ when Net::HTTPSuccess,
+ Net::HTTPNotModified
+ if block_given?
+ return yield(response)
+ else
+ response.read_body
+ return response
+ end
+ when Net::HTTPRedirection
+ redirected_url = URI(response["Location"])
+ redirected_request = Net::HTTP::Get.new(redirected_url, {})
+ start_http(redirected_url) do |redirected_http|
+ request_internal(redirected_http, redirected_request, &block)
+ end
+ else
+ message = "failed to request: "
+ message << "#{request.uri}: #{request.method}: "
+ message << "#{response.message} #{response.code}"
+ if response.body
+ message << "\n"
+ message << response.body
+ end
+ raise Error.new(request, response, message)
+ end
+ end
+ end
+
+ def files
+ _files = []
+ directories = [""]
+ until directories.empty?
+ directory = directories.shift
+ list(directory).each do |path|
+ resolved_path = "#{directory}#{path}"
+ case path
+ when "../"
+ when /\/\z/
+ directories << resolved_path
+ else
+ _files << resolved_path
+ end
+ end
+ end
+ _files
+ end
+
+ def list(path)
+ url = build_url(path)
+ with_retry(3, url) do
+ begin
+ request(:get, {}, url) do |response|
+ response.body.scan(/<a href="(.+?)"/).flatten
+ end
+ rescue Error => error
+ case error.response
+ when Net::HTTPNotFound
+ return []
+ else
+ raise
+ end
+ end
+ end
+ end
+
+ def head(path)
+ url = build_url(path)
+ with_retry(3, url) do
+ request(:head, {}, url)
+ end
+ end
+
+ def exist?(path)
+ begin
+ head(path)
+ true
+ rescue Error => error
+ case error.response
+ when Net::HTTPNotFound
+ false
+ else
+ raise
+ end
+ end
+ end
+
+ def upload(path, destination_path)
+ destination_url = build_url(destination_path)
+ with_retry(3, destination_url) do
+ sha1 = Digest::SHA1.file(path).hexdigest
+ sha256 = Digest::SHA256.file(path).hexdigest
+ headers = {
+ "X-Artifactory-Last-Modified" => File.mtime(path).rfc2822,
+ "X-Checksum-Deploy" => "false",
+ "X-Checksum-Sha1" => sha1,
+ "X-Checksum-Sha256" => sha256,
+ "Content-Length" => File.size(path).to_s,
+ "Content-Type" => "application/octet-stream",
+ }
+ File.open(path, "rb") do |input|
+ request(:put, headers, destination_url, body: input)
+ end
+ end
+ end
+
+ def download(path, output_path)
+ url = build_url(path)
+ with_retry(5, url) do
+ begin
+ begin
+ headers = {}
+ if File.exist?(output_path)
+ headers["If-Modified-Since"] = File.mtime(output_path).rfc2822
+ end
+ request(:get, headers, url) do |response|
+ case response
+ when Net::HTTPNotModified
+ else
+ File.open(output_path, "wb") do |output|
+ response.read_body do |chunk|
+ output.write(chunk)
+ end
+ end
+ last_modified = response["Last-Modified"]
+ if last_modified
+ FileUtils.touch(output_path,
+ mtime: Time.rfc2822(last_modified))
+ end
+ end
+ end
+ rescue Error => error
+ case error.response
+ when Net::HTTPNotFound
+ $stderr.puts(error.message)
+ return
+ else
+ raise
+ end
+ end
+ end
+ rescue
+ FileUtils.rm_f(output_path)
+ raise
+ end
+ end
+
+ def delete(path)
+ url = build_url(path)
+ with_retry(3, url) do
+ request(:delete, {}, url)
+ end
+ end
+
+ def copy(source, destination)
+ uri = build_api_url("copy/arrow/#{source}",
+ "to" => "/arrow/#{destination}")
+ with_read_timeout(300) do
+ request(:post, {}, uri)
+ end
+ end
+
+ private
+ def build_url(path)
+ uri_string = "https://apache.jfrog.io/artifactory/arrow"
+ uri_string << "/#{@prefix}" unless @prefix.nil?
+ uri_string << "/#{path}"
+ URI(uri_string)
+ end
+
+ def build_api_url(path, parameters)
+ uri_string = "https://apache.jfrog.io/artifactory/api/#{path}"
+ unless parameters.empty?
+ uri_string << "?"
+ escaped_parameters = parameters.collect do |key, value|
+ "#{CGI.escape(key)}=#{CGI.escape(value)}"
+ end
+ uri_string << escaped_parameters.join("&")
+ end
+ URI(uri_string)
+ end
+
+ def build_request(method, url, headers, body: nil)
+ need_auth = false
+ case method
+ when :head
+ request = Net::HTTP::Head.new(url, headers)
+ when :get
+ request = Net::HTTP::Get.new(url, headers)
+ when :post
+ need_auth = true
+ request = Net::HTTP::Post.new(url, headers)
+ when :put
+ need_auth = true
+ request = Net::HTTP::Put.new(url, headers)
+ when :delete
+ need_auth = true
+ request = Net::HTTP::Delete.new(url, headers)
+ else
+ raise "unsupported HTTP method: #{method.inspect}"
+ end
+ request["Connection"] = "Keep-Alive"
+ request["X-JFrog-Art-Api"] = @api_key if need_auth
+ if body
+ if body.is_a?(String)
+ request.body = body
+ else
+ request.body_stream = body
+ end
+ end
+ request
+ end
+
+ def with_retry(max_n_retries, target)
+ n_retries = 0
+ begin
+ yield
+ rescue Net::OpenTimeout,
+ OpenSSL::OpenSSLError,
+ SocketError,
+ SystemCallError,
+ Timeout::Error => error
+ n_retries += 1
+ if n_retries <= max_n_retries
+ $stderr.puts
+ $stderr.puts("Retry #{n_retries}: #{target}: " +
+ "#{error.class}: #{error.message}")
+ restart
+ retry
+ else
+ raise
+ end
+ end
+ end
+
+ def with_read_timeout(timeout)
+ current_timeout = @http.read_timeout
+ begin
+ @http.read_timeout = timeout
+ yield
+ ensure
+ @http.read_timeout = current_timeout
+ end
+ end
+ end
+
+ class ArtifactoryClientPool
+ class << self
+ def open(prefix, api_key)
+ pool = new(prefix, api_key)
+ begin
+ yield(pool)
+ ensure
+ pool.close
+ end
+ end
+ end
+
+ def initialize(prefix, api_key)
+ @prefix = prefix
+ @api_key = api_key
+ @mutex = Thread::Mutex.new
+ @clients = []
+ end
+
+ def pull
+ client = @mutex.synchronize do
+ if @clients.empty?
+ ArtifactoryClient.new(@prefix, @api_key)
+ else
+ @clients.pop
+ end
+ end
+ begin
+ yield(client)
+ ensure
+ release(client)
+ end
+ end
+
+ def release(client)
+ @mutex.synchronize do
+ @clients << client
+ end
+ end
+
+ def close
+ @clients.each(&:close)
+ end
+ end
+
+ module ArtifactoryPath
+ private
+ def base_path
+ path = @distribution
+ path += "-staging" if @staging
+ path += "-rc" if @rc
+ path
+ end
+ end
+
+ class ArtifactoryDownloader
+ include ArtifactoryPath
+
+ def initialize(api_key:,
+ destination:,
+ distribution:,
+ list: nil,
+ pattern: nil,
+ prefix: nil,
+ rc: nil,
+ staging: false)
+ @api_key = api_key
+ @destination = destination
+ @distribution = distribution
+ @list = list
+ @pattern = pattern
+ @prefix = prefix
+ @rc = rc
+ @staging = staging
+ end
+
+ def download
+ progress_label = "Downloading: #{base_path}"
+ progress_reporter = ProgressReporter.new(progress_label)
+ prefix = [base_path, @prefix].compact.join("/")
+ ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+ thread_pool = ThreadPool.new(:artifactory) do |path, output_path|
+ client_pool.pull do |client|
+ client.download(path, output_path)
+ end
+ progress_reporter.advance
+ end
+ files = client_pool.pull do |client|
+ if @list
+ list_output_path = "#{@destination}/#{@list}"
+ client.download(@list, list_output_path)
+ File.readlines(list_output_path, chomp: true)
+ else
+ client.files
+ end
+ end
+ files.each do |path|
+ output_path = "#{@destination}/#{path}"
+ if @pattern
+ next unless @pattern.match?(path)
+ end
+ yield(output_path)
+ output_dir = File.dirname(output_path)
+ FileUtils.mkdir_p(output_dir)
+ progress_reporter.increment_max
+ thread_pool << [path, output_path]
+ end
+ thread_pool.join
+ end
+ progress_reporter.finish
+ end
+ end
+
+ class ArtifactoryUploader
+ include ArtifactoryPath
+
+ def initialize(api_key:,
+ destination_prefix: nil,
+ distribution:,
+ rc: nil,
+ source:,
+ staging: false,
+ sync: false,
+ sync_pattern: nil)
+ @api_key = api_key
+ @destination_prefix = destination_prefix
+ @distribution = distribution
+ @rc = rc
+ @source = source
+ @staging = staging
+ @sync = sync
+ @sync_pattern = sync_pattern
+ end
+
+ def upload
+ progress_label = "Uploading: #{base_path}"
+ progress_reporter = ProgressReporter.new(progress_label)
+ prefix = base_path
+ prefix += "/#{@destination_prefix}" if @destination_prefix
+ ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+ if @sync
+ existing_files = client_pool.pull do |client|
+ client.files
+ end
+ else
+ existing_files = []
+ end
+
+ thread_pool = ThreadPool.new(:artifactory) do |path, relative_path|
+ client_pool.pull do |client|
+ client.upload(path, relative_path)
+ end
+ progress_reporter.advance
+ end
+
+ source = Pathname(@source)
+ source.glob("**/*") do |path|
+ next if path.directory?
+ destination_path = path.relative_path_from(source)
+ progress_reporter.increment_max
+ existing_files.delete(destination_path.to_s)
+ thread_pool << [path, destination_path]
+ end
+ thread_pool.join
+
+ if @sync
+ thread_pool = ThreadPool.new(:artifactory) do |path|
+ client_pool.pull do |client|
+ client.delete(path)
+ end
+ progress_reporter.advance
+ end
+ existing_files.each do |path|
+ if @sync_pattern
+ next unless @sync_pattern.match?(path)
+ end
+ progress_reporter.increment_max
+ thread_pool << path
+ end
+ thread_pool.join
+ end
+ end
+ progress_reporter.finish
+ end
+ end
+
+ def define
+ define_apt_tasks
+ define_yum_tasks
+ define_python_tasks
+ define_nuget_tasks
+ define_summary_tasks
+ end
+
+ private
+ def env_value(name)
+ value = ENV[name]
+ value = yield(name) if value.nil? and block_given?
+ raise "Specify #{name} environment variable" if value.nil?
+ value
+ end
+
+ def verbose?
+ ENV["VERBOSE"] == "yes"
+ end
+
+ def default_output
+ if verbose?
+ $stdout
+ else
+ IO::NULL
+ end
+ end
+
+ def gpg_key_id
+ env_value("GPG_KEY_ID")
+ end
+
+ def shorten_gpg_key_id(id)
+ id[-8..-1]
+ end
+
+ def rpm_gpg_key_package_name(id)
+ "gpg-pubkey-#{shorten_gpg_key_id(id).downcase}"
+ end
+
+ def artifactory_api_key
+ env_value("ARTIFACTORY_API_KEY")
+ end
+
+ def artifacts_dir
+ env_value("ARTIFACTS_DIR")
+ end
+
+ def version
+ env_value("VERSION")
+ end
+
+ def rc
+ env_value("RC")
+ end
+
+ def staging?
+ ENV["STAGING"] == "yes"
+ end
+
+ def full_version
+ "#{version}-rc#{rc}"
+ end
+
+ def valid_sign?(path, sign_path)
+ IO.pipe do |input, output|
+ begin
+ sh({"LANG" => "C"},
+ "gpg",
+ "--verify",
+ sign_path,
+ path,
+ out: default_output,
+ err: output,
+ verbose: false)
+ rescue
+ return false
+ end
+ output.close
+ /Good signature/ === input.read
+ end
+ end
+
+ def sign(source_path, destination_path)
+ if File.exist?(destination_path)
+ return if valid_sign?(source_path, destination_path)
+ rm(destination_path, verbose: false)
+ end
+ sh("gpg",
+ "--detach-sig",
+ "--local-user", gpg_key_id,
+ "--output", destination_path,
+ source_path,
+ out: default_output,
+ verbose: verbose?)
+ end
+
+ def sha512(source_path, destination_path)
+ if File.exist?(destination_path)
+ sha512 = File.read(destination_path).split[0]
+ return if Digest::SHA512.file(source_path).hexdigest == sha512
+ end
+ absolute_destination_path = File.expand_path(destination_path)
+ Dir.chdir(File.dirname(source_path)) do
+ sh("shasum",
+ "--algorithm", "512",
+ File.basename(source_path),
+ out: absolute_destination_path,
+ verbose: verbose?)
+ end
+ end
+
+ def sign_dir(label, dir)
+ progress_label = "Signing: #{label}"
+ progress_reporter = ProgressReporter.new(progress_label)
+
+ target_paths = []
+ Pathname(dir).glob("**/*") do |path|
+ next if path.directory?
+ case path.extname
+ when ".asc", ".sha512"
+ next
+ end
+ progress_reporter.increment_max
+ target_paths << path.to_s
+ end
+ target_paths.each do |path|
+ sign(path, "#{path}.asc")
+ sha512(path, "#{path}.sha512")
+ progress_reporter.advance
+ end
+ progress_reporter.finish
+ end
+
+ def download_distribution(distribution,
+ destination,
+ target,
+ list: nil,
+ pattern: nil,
+ prefix: nil)
+ mkdir_p(destination, verbose: verbose?) unless File.exist?(destination)
+ existing_paths = {}
+ Pathname(destination).glob("**/*") do |path|
+ next if path.directory?
+ existing_paths[path.to_s] = true
+ end
+ options = {
+ api_key: artifactory_api_key,
+ destination: destination,
+ distribution: distribution,
+ list: list,
+ pattern: pattern,
+ prefix: prefix,
+ staging: staging?,
+ }
+ options[:rc] = rc if target == :rc
+ downloader = ArtifactoryDownloader.new(**options)
+ downloader.download do |output_path|
+ existing_paths.delete(output_path)
+ end
+ existing_paths.each_key do |path|
+ rm_f(path, verbose: verbose?)
+ end
+ end
+
+ def same_content?(path1, path2)
+ File.exist?(path1) and
+ File.exist?(path2) and
+ Digest::SHA256.file(path1) == Digest::SHA256.file(path2)
+ end
+
+ def copy_artifact(source_path,
+ destination_path,
+ progress_reporter)
+ return if same_content?(source_path, destination_path)
+ progress_reporter.increment_max
+ destination_dir = File.dirname(destination_path)
+ unless File.exist?(destination_dir)
+ mkdir_p(destination_dir, verbose: verbose?)
+ end
+ cp(source_path, destination_path, verbose: verbose?)
+ progress_reporter.advance
+ end
+
+ def prepare_staging(base_path)
+ client = ArtifactoryClient.new(nil, artifactory_api_key)
+ ["", "-rc"].each do |suffix|
+ path = "#{base_path}#{suffix}"
+ progress_reporter = ProgressReporter.new("Preparing staging for #{path}")
+ progress_reporter.increment_max
+ begin
+ staging_path = "#{base_path}-staging#{suffix}"
+ if client.exist?(staging_path)
+ client.delete(staging_path)
+ end
+ if client.exist?(path)
+ client.copy(path, staging_path)
+ end
+ ensure
+ progress_reporter.advance
+ progress_reporter.finish
+ end
+ end
+ end
+
+ def delete_staging(base_path)
+ client = ArtifactoryClient.new(nil, artifactory_api_key)
+ ["", "-rc"].each do |suffix|
+ path = "#{base_path}#{suffix}"
+ progress_reporter = ProgressReporter.new("Deleting staging for #{path}")
+ progress_reporter.increment_max
+ begin
+ staging_path = "#{base_path}-staging#{suffix}"
+ if client.exist?(staging_path)
+ client.delete(staging_path)
+ end
+ ensure
+ progress_reporter.advance
+ progress_reporter.finish
+ end
+ end
+ end
+
+ def uploaded_files_name
+ "uploaded-files.txt"
+ end
+
+ def write_uploaded_files(dir)
+ dir = Pathname(dir)
+ uploaded_files = []
+ dir.glob("**/*") do |path|
+ next if path.directory?
+ uploaded_files << path.relative_path_from(dir).to_s
+ end
+ File.open("#{dir}/#{uploaded_files_name}", "w") do |output|
+ output.puts(uploaded_files.sort)
+ end
+ end
+
+ def tmp_dir
+ "binary/tmp"
+ end
+
+ def rc_dir
+ "#{tmp_dir}/rc"
+ end
+
+ def release_dir
+ "#{tmp_dir}/release"
+ end
+
+ def apt_repository_label
+ "Apache Arrow"
+ end
+
+ def apt_repository_description
+ "Apache Arrow packages"
+ end
+
+ def apt_rc_repositories_dir
+ "#{rc_dir}/apt/repositories"
+ end
+
+ def apt_release_repositories_dir
+ "#{release_dir}/apt/repositories"
+ end
+
+ def available_apt_targets
+ [
+ ["debian", "buster", "main"],
+ ["debian", "bullseye", "main"],
+ ["debian", "bookworm", "main"],
+ ["ubuntu", "bionic", "main"],
+ ["ubuntu", "focal", "main"],
+ ["ubuntu", "hirsute", "main"],
+ ["ubuntu", "impish", "main"],
+ ]
+ end
+
+ def apt_targets
+ env_apt_targets = (ENV["APT_TARGETS"] || "").split(",")
+ if env_apt_targets.empty?
+ available_apt_targets
+ else
+ available_apt_targets.select do |distribution, code_name, component|
+ env_apt_targets.any? do |env_apt_target|
+ if env_apt_target.include?("-")
+ env_apt_target.start_with?("#{distribution}-#{code_name}")
+ else
+ env_apt_target == distribution
+ end
+ end
+ end
+ end
+ end
+
+ def apt_distributions
+ apt_targets.collect(&:first).uniq
+ end
+
+ def apt_architectures
+ [
+ "amd64",
+ "arm64",
+ ]
+ end
+
+ def generate_apt_release(dists_dir, code_name, component, architecture)
+ dir = "#{dists_dir}/#{component}/"
+ if architecture == "source"
+ dir << architecture
+ else
+ dir << "binary-#{architecture}"
+ end
+
+ mkdir_p(dir, verbose: verbose?)
+ File.open("#{dir}/Release", "w") do |release|
+ release.puts(<<-RELEASE)
+Archive: #{code_name}
+Component: #{component}
+Origin: #{apt_repository_label}
+Label: #{apt_repository_label}
+Architecture: #{architecture}
+ RELEASE
+ end
+ end
+
+ def generate_apt_ftp_archive_generate_conf(code_name, component)
+ conf = <<-CONF
+Dir::ArchiveDir ".";
+Dir::CacheDir ".";
+TreeDefault::Directory "pool/#{code_name}/#{component}";
+TreeDefault::SrcDirectory "pool/#{code_name}/#{component}";
+Default::Packages::Extensions ".deb";
+Default::Packages::Compress ". gzip xz";
+Default::Sources::Compress ". gzip xz";
+Default::Contents::Compress "gzip";
+ CONF
+
+ apt_architectures.each do |architecture|
+ conf << <<-CONF
+
+BinDirectory "dists/#{code_name}/#{component}/binary-#{architecture}" {
+ Packages "dists/#{code_name}/#{component}/binary-#{architecture}/Packages";
+ Contents "dists/#{code_name}/#{component}/Contents-#{architecture}";
+ SrcPackages "dists/#{code_name}/#{component}/source/Sources";
+};
+ CONF
+ end
+
+ conf << <<-CONF
+
+Tree "dists/#{code_name}" {
+ Sections "#{component}";
+ Architectures "#{apt_architectures.join(" ")} source";
+};
+ CONF
+
+ conf
+ end
+
+ def generate_apt_ftp_archive_release_conf(code_name, component)
+ <<-CONF
+APT::FTPArchive::Release::Origin "#{apt_repository_label}";
+APT::FTPArchive::Release::Label "#{apt_repository_label}";
+APT::FTPArchive::Release::Architectures "#{apt_architectures.join(" ")}";
+APT::FTPArchive::Release::Codename "#{code_name}";
+APT::FTPArchive::Release::Suite "#{code_name}";
+APT::FTPArchive::Release::Components "#{component}";
+APT::FTPArchive::Release::Description "#{apt_repository_description}";
+ CONF
+ end
+
+ def apt_update(base_dir, incoming_dir, merged_dir)
+ apt_targets.each do |distribution, code_name, component|
+ distribution_dir = "#{incoming_dir}/#{distribution}"
+ pool_dir = "#{distribution_dir}/pool/#{code_name}"
+ next unless File.exist?(pool_dir)
+ dists_dir = "#{distribution_dir}/dists/#{code_name}"
+ rm_rf(dists_dir, verbose: verbose?)
+ generate_apt_release(dists_dir, code_name, component, "source")
+ apt_architectures.each do |architecture|
+ generate_apt_release(dists_dir, code_name, component, architecture)
+ end
+
+ generate_conf_file = Tempfile.new("apt-ftparchive-generate.conf")
+ File.open(generate_conf_file.path, "w") do |conf|
+ conf.puts(generate_apt_ftp_archive_generate_conf(code_name,
+ component))
+ end
+ cd(distribution_dir, verbose: verbose?) do
+ sh("apt-ftparchive",
+ "generate",
+ generate_conf_file.path,
+ out: default_output,
+ verbose: verbose?)
+ end
+
+ Dir.glob("#{dists_dir}/Release*") do |release|
+ rm_f(release, verbose: verbose?)
+ end
+ Dir.glob("#{distribution_dir}/*.db") do |db|
+ rm_f(db, verbose: verbose?)
+ end
+ release_conf_file = Tempfile.new("apt-ftparchive-release.conf")
+ File.open(release_conf_file.path, "w") do |conf|
+ conf.puts(generate_apt_ftp_archive_release_conf(code_name,
+ component))
+ end
+ release_file = Tempfile.new("apt-ftparchive-release")
+ sh("apt-ftparchive",
+ "-c", release_conf_file.path,
+ "release",
+ dists_dir,
+ out: release_file.path,
+ verbose: verbose?)
+ mv(release_file.path, "#{dists_dir}/Release", verbose: verbose?)
+
+ base_dists_dir = "#{base_dir}/#{distribution}/dists/#{code_name}"
+ merged_dists_dir = "#{merged_dir}/#{distribution}/dists/#{code_name}"
+ rm_rf(merged_dists_dir)
+ merger = APTDistsMerge::Merger.new(base_dists_dir,
+ dists_dir,
+ merged_dists_dir)
+ merger.merge
+
+ in_release_path = "#{merged_dists_dir}/InRelease"
+ release_path = "#{merged_dists_dir}/Release"
+ signed_release_path = "#{release_path}.gpg"
+ sh("gpg",
+ "--sign",
+ "--detach-sign",
+ "--armor",
+ "--local-user", gpg_key_id,
+ "--output", signed_release_path,
+ release_path,
+ out: default_output,
+ verbose: verbose?)
+ sh("gpg",
+ "--clear-sign",
+ "--local-user", gpg_key_id,
+ "--output", in_release_path,
+ release_path,
+ out: default_output,
+ verbose: verbose?)
+ end
+ end
+
+ def define_apt_staging_tasks
+ namespace :apt do
+ namespace :staging do
+ desc "Prepare staging environment for APT repositories"
+ task :prepare do
+ apt_distributions.each do |distribution|
+ prepare_staging(distribution)
+ end
+ end
+
+ desc "Delete staging environment for APT repositories"
+ task :delete do
+ apt_distributions.each do |distribution|
+ delete_staging(distribution)
+ end
+ end
+ end
+ end
+ end
+
+ def define_apt_rc_tasks
+ namespace :apt do
+ namespace :rc do
+ base_dir = "#{apt_rc_repositories_dir}/base"
+ incoming_dir = "#{apt_rc_repositories_dir}/incoming"
+ merged_dir = "#{apt_rc_repositories_dir}/merged"
+ upload_dir = "#{apt_rc_repositories_dir}/upload"
+
+ desc "Copy .deb packages"
+ task :copy do
+ apt_targets.each do |distribution, code_name, component|
+ progress_label = "Copying: #{distribution} #{code_name}"
+ progress_reporter = ProgressReporter.new(progress_label)
+
+ distribution_dir = "#{incoming_dir}/#{distribution}"
+ pool_dir = "#{distribution_dir}/pool/#{code_name}"
+ rm_rf(pool_dir, verbose: verbose?)
+ mkdir_p(pool_dir, verbose: verbose?)
+ source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{code_name}"
+ Dir.glob("#{source_dir_prefix}*/**/*") do |path|
+ next if File.directory?(path)
+ base_name = File.basename(path)
+ if base_name.start_with?("apache-arrow-apt-source")
+ package_name = "apache-arrow-apt-source"
+ else
+ package_name = "apache-arrow"
+ end
+ destination_path = [
+ pool_dir,
+ component,
+ package_name[0],
+ package_name,
+ base_name,
+ ].join("/")
+ copy_artifact(path,
+ destination_path,
+ progress_reporter)
+ case base_name
+ when /\A[^_]+-apt-source_.*\.deb\z/
+ latest_apt_source_package_path = [
+ distribution_dir,
+ "#{package_name}-latest-#{code_name}.deb"
+ ].join("/")
+ copy_artifact(path,
+ latest_apt_source_package_path,
+ progress_reporter)
+ end
+ end
+ progress_reporter.finish
+ end
+ end
+
+ desc "Download dists/ for RC APT repositories"
+ task :download do
+ apt_distributions.each do |distribution|
+ not_checksum_pattern = /.+(?<!\.asc|\.sha512)\z/
+ base_distribution_dir = "#{base_dir}/#{distribution}"
+ pattern = /\Adists\/#{not_checksum_pattern}/
+ download_distribution(distribution,
+ base_distribution_dir,
+ :base,
+ pattern: pattern)
+ end
+ end
+
+ desc "Sign .deb packages"
+ task :sign do
+ apt_distributions.each do |distribution|
+ distribution_dir = "#{incoming_dir}/#{distribution}"
+ Dir.glob("#{distribution_dir}/**/*.dsc") do |path|
+ begin
+ sh({"LANG" => "C"},
+ "gpg",
+ "--verify",
+ path,
+ out: IO::NULL,
+ err: IO::NULL,
+ verbose: false)
+ rescue
+ sh("debsign",
+ "--no-re-sign",
+ "-k#{gpg_key_id}",
+ path,
+ out: default_output,
+ verbose: verbose?)
+ end
+ end
+ sign_dir(distribution, distribution_dir)
+ end
+ end
+
+ desc "Update RC APT repositories"
+ task :update do
+ apt_update(base_dir, incoming_dir, merged_dir)
+ apt_targets.each do |distribution, code_name, component|
+ dists_dir = "#{merged_dir}/#{distribution}/dists/#{code_name}"
+ next unless File.exist?(dists_dir)
+ sign_dir("#{distribution} #{code_name}",
+ dists_dir)
+ end
+ end
+
+ desc "Upload .deb packages and RC APT repositories"
+ task :upload do
+ apt_distributions.each do |distribution|
+ upload_distribution_dir = "#{upload_dir}/#{distribution}"
+ incoming_distribution_dir = "#{incoming_dir}/#{distribution}"
+ merged_dists_dir = "#{merged_dir}/#{distribution}/dists"
+
+ rm_rf(upload_distribution_dir, verbose: verbose?)
+ mkdir_p(upload_distribution_dir, verbose: verbose?)
+ Dir.glob("#{incoming_distribution_dir}/*") do |path|
+ next if File.basename(path) == "dists"
+ cp_r(path,
+ upload_distribution_dir,
+ preserve: true,
+ verbose: verbose?)
+ end
+ cp_r(merged_dists_dir,
+ upload_distribution_dir,
+ preserve: true,
+ verbose: verbose?)
+ write_uploaded_files(upload_distribution_dir)
+ uploader = ArtifactoryUploader.new(api_key: artifactory_api_key,
+ distribution: distribution,
+ rc: rc,
+ source: upload_distribution_dir,
+ staging: staging?)
+ uploader.upload
+ end
+ end
+ end
+
+ desc "Release RC APT repositories"
+ apt_rc_tasks = [
+ "apt:rc:copy",
+ "apt:rc:download",
+ "apt:rc:sign",
+ "apt:rc:update",
+ "apt:rc:upload",
+ ]
+ apt_rc_tasks.unshift("apt:staging:prepare") if staging?
+ task :rc => apt_rc_tasks
+ end
+ end
+
+ def define_apt_release_tasks
+ directory apt_release_repositories_dir
+
+ namespace :apt do
+ namespace :release do
+ desc "Download RC APT repositories"
+ task :download => apt_release_repositories_dir do
+ apt_distributions.each do |distribution|
+ distribution_dir = "#{apt_release_repositories_dir}/#{distribution}"
+ download_distribution(distribution,
+ distribution_dir,
+ :rc,
+ list: uploaded_files_name)
+ end
+ end
+
+ desc "Upload release APT repositories"
+ task :upload => apt_release_repositories_dir do
+ apt_distributions.each do |distribution|
+ distribution_dir = "#{apt_release_repositories_dir}/#{distribution}"
+ uploader = ArtifactoryUploader.new(api_key: artifactory_api_key,
+ distribution: distribution,
+ source: distribution_dir,
+ staging: staging?)
+ uploader.upload
+ end
+ end
+ end
+
+ desc "Release APT repositories"
+ apt_release_tasks = [
+ "apt:release:download",
+ "apt:release:upload",
+ ]
+ task :release => apt_release_tasks
+ end
+ end
+
+ def define_apt_tasks
+ define_apt_staging_tasks
+ define_apt_rc_tasks
+ define_apt_release_tasks
+ end
+
+ def yum_rc_repositories_dir
+ "#{rc_dir}/yum/repositories"
+ end
+
+ def yum_release_repositories_dir
+ "#{release_dir}/yum/repositories"
+ end
+
+ def available_yum_targets
+ [
+ ["almalinux", "8"],
+ ["amazon-linux", "2"],
+ ["centos", "7"],
+ ["centos", "8"],
+ ]
+ end
+
+ def yum_targets
+ env_yum_targets = (ENV["YUM_TARGETS"] || "").split(",")
+ if env_yum_targets.empty?
+ available_yum_targets
+ else
+ available_yum_targets.select do |distribution, distribution_version|
+ env_yum_targets.any? do |env_yum_target|
+ if /\d/.match?(env_yum_target)
+ env_yum_target.start_with?("#{distribution}-#{distribution_version}")
+ else
+ env_yum_target == distribution
+ end
+ end
+ end
+ end
+ end
+
+ def yum_distributions
+ yum_targets.collect(&:first).uniq
+ end
+
+ def yum_architectures
+ [
+ "aarch64",
+ "x86_64",
+ ]
+ end
+
+ def signed_rpm?(rpm)
+ IO.pipe do |input, output|
+ system("rpm", "--checksig", rpm, out: output)
+ output.close
+ signature = input.gets.sub(/\A#{Regexp.escape(rpm)}: /, "")
+ signature.split.include?("signatures")
+ end
+ end
+
+ def sign_rpms(directory)
+ thread_pool = ThreadPool.new(:gpg) do |rpm|
+ unless signed_rpm?(rpm)
+ sh("rpm",
+ "-D", "_gpg_name #{gpg_key_id}",
+ "-D", "__gpg /usr/bin/gpg",
+ "-D", "__gpg_check_password_cmd /bin/true true",
+ "--resign",
+ rpm,
+ out: default_output,
+ verbose: verbose?)
+ end
+ end
+ Dir.glob("#{directory}/**/*.rpm") do |rpm|
+ thread_pool << rpm
+ end
+ thread_pool.join
+ end
+
+ def rpm_sign(directory)
+ unless system("rpm", "-q",
+ rpm_gpg_key_package_name(gpg_key_id),
+ out: IO::NULL)
+ gpg_key = Tempfile.new(["apache-arrow-binary", ".asc"])
+ sh("gpg",
+ "--armor",
+ "--export", gpg_key_id,
+ out: gpg_key.path,
+ verbose: verbose?)
+ sh("rpm",
+ "--import", gpg_key.path,
+ out: default_output,
+ verbose: verbose?)
+ gpg_key.close!
+ end
+
+ yum_targets.each do |distribution, distribution_version|
+ source_dir = [
+ directory,
+ distribution,
+ distribution_version,
+ ].join("/")
+ sign_rpms(source_dir)
+ end
+ end
+
+ def yum_update(base_dir, incoming_dir)
+ yum_targets.each do |distribution, distribution_version|
+ target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}"
+ target_dir = Pathname(target_dir)
+ next unless target_dir.directory?
+ Dir.glob("#{target_dir}/**/repodata") do |repodata|
+ rm_rf(repodata, verbose: verbose?)
+ end
+ target_dir.glob("*") do |arch_dir|
+ next unless arch_dir.directory?
+ base_repodata_dir = [
+ base_dir,
+ distribution,
+ distribution_version,
+ File.basename(arch_dir),
+ "repodata",
+ ].join("/")
+ if File.exist?(base_repodata_dir)
+ cp_r(base_repodata_dir,
+ arch_dir.to_s,
+ preserve: true,
+ verbose: verbose?)
+ end
+ packages = Tempfile.new("createrepo-c-packages")
+ Pathname.glob("#{arch_dir}/*/*.rpm") do |rpm|
+ relative_rpm = rpm.relative_path_from(arch_dir)
+ packages.puts(relative_rpm.to_s)
+ end
+ packages.close
+ sh("createrepo_c",
+ "--pkglist", packages.path,
+ "--recycle-pkglist",
+ "--retain-old-md-by-age=0",
+ "--skip-stat",
+ "--update",
+ arch_dir.to_s,
+ out: default_output,
+ verbose: verbose?)
+ end
+ end
+ end
+
+ def define_yum_staging_tasks
+ namespace :yum do
+ namespace :staging do
+ desc "Prepare staging environment for Yum repositories"
+ task :prepare do
+ yum_distributions.each do |distribution|
+ prepare_staging(distribution)
+ end
+ end
+
+ desc "Delete staging environment for Yum repositories"
+ task :delete do
+ yum_distributions.each do |distribution|
+ delete_staging(distribution)
+ end
+ end
+ end
+ end
+ end
+
+ def define_yum_rc_tasks
+ namespace :yum do
+ namespace :rc do
+ base_dir = "#{yum_rc_repositories_dir}/base"
+ incoming_dir = "#{yum_rc_repositories_dir}/incoming"
+ upload_dir = "#{yum_rc_repositories_dir}/upload"
+
+ desc "Copy RPM packages"
+ task :copy do
+ yum_targets.each do |distribution, distribution_version|
+ progress_label = "Copying: #{distribution} #{distribution_version}"
+ progress_reporter = ProgressReporter.new(progress_label)
+
+ destination_prefix = [
+ incoming_dir,
+ distribution,
+ distribution_version,
+ ].join("/")
+ rm_rf(destination_prefix, verbose: verbose?)
+ source_dir_prefix =
+ "#{artifacts_dir}/#{distribution}-#{distribution_version}"
+ Dir.glob("#{source_dir_prefix}*/**/*") do |path|
+ next if File.directory?(path)
+ base_name = File.basename(path)
+ type = base_name.split(".")[-2]
+ destination_paths = []
+ case type
+ when "src"
+ destination_paths << [
+ destination_prefix,
+ "Source",
+ "SPackages",
+ base_name,
+ ].join("/")
+ when "noarch"
+ yum_architectures.each do |architecture|
+ destination_paths << [
+ destination_prefix,
+ architecture,
+ "Packages",
+ base_name,
+ ].join("/")
+ end
+ else
+ destination_paths << [
+ destination_prefix,
+ type,
+ "Packages",
+ base_name,
+ ].join("/")
+ end
+ destination_paths.each do |destination_path|
+ copy_artifact(path,
+ destination_path,
+ progress_reporter)
+ end
+ case base_name
+ when /\A(apache-arrow-release)-.*\.noarch\.rpm\z/
+ package_name = $1
+ latest_release_package_path = [
+ destination_prefix,
+ "#{package_name}-latest.rpm"
+ ].join("/")
+ copy_artifact(path,
+ latest_release_package_path,
+ progress_reporter)
+ end
+ end
+
+ progress_reporter.finish
+ end
+ end
+
+ desc "Download repodata for RC Yum repositories"
+ task :download do
+ yum_distributions.each do |distribution|
+ distribution_dir = "#{base_dir}/#{distribution}"
+ download_distribution(distribution,
+ distribution_dir,
+ :base,
+ pattern: /\/repodata\//)
+ end
+ end
+
+ desc "Sign RPM packages"
+ task :sign do
+ rpm_sign(incoming_dir)
+ yum_targets.each do |distribution, distribution_version|
+ source_dir = [
+ incoming_dir,
+ distribution,
+ distribution_version,
+ ].join("/")
+ sign_dir("#{distribution}-#{distribution_version}",
+ source_dir)
+ end
+ end
+
+ desc "Update RC Yum repositories"
+ task :update do
+ yum_update(base_dir, incoming_dir)
+ yum_targets.each do |distribution, distribution_version|
+ target_dir = [
+ incoming_dir,
+ distribution,
+ distribution_version,
+ ].join("/")
+ target_dir = Pathname(target_dir)
+ next unless target_dir.directory?
+ target_dir.glob("*") do |arch_dir|
+ next unless arch_dir.directory?
+ sign_label =
+ "#{distribution}-#{distribution_version} #{arch_dir.basename}"
+ sign_dir(sign_label,
+ arch_dir.to_s)
+ end
+ end
+ end
+
+ desc "Upload RC Yum repositories"
+ task :upload => yum_rc_repositories_dir do
+ yum_distributions.each do |distribution|
+ incoming_target_dir = "#{incoming_dir}/#{distribution}"
+ upload_target_dir = "#{upload_dir}/#{distribution}"
+
+ rm_rf(upload_target_dir, verbose: verbose?)
+ mkdir_p(upload_target_dir, verbose: verbose?)
+ cp_r(Dir.glob("#{incoming_target_dir}/*"),
+ upload_target_dir.to_s,
+ preserve: true,
+ verbose: verbose?)
+ write_uploaded_files(upload_target_dir)
+
+ uploader = ArtifactoryUploader.new(api_key: artifactory_api_key,
+ distribution: distribution,
+ rc: rc,
+ source: upload_target_dir,
+ staging: staging?,
+ sync: true,
+ sync_pattern: /\/repodata\//)
+ uploader.upload
+ end
+ end
+ end
+
+ desc "Release RC Yum packages"
+ yum_rc_tasks = [
+ "yum:rc:copy",
+ "yum:rc:download",
+ "yum:rc:sign",
+ "yum:rc:update",
+ "yum:rc:upload",
+ ]
+ yum_rc_tasks.unshift("yum:staging:prepare") if staging?
+ task :rc => yum_rc_tasks
+ end
+ end
+
+ def define_yum_release_tasks
+ directory yum_release_repositories_dir
+
+ namespace :yum do
+ namespace :release do
+ desc "Download RC Yum repositories"
+ task :download => yum_release_repositories_dir do
+ yum_distributions.each do |distribution|
+ distribution_dir = "#{yum_release_repositories_dir}/#{distribution}"
+ download_distribution(distribution,
+ distribution_dir,
+ :rc,
+ list: uploaded_files_name)
+ end
+ end
+
+ desc "Upload release Yum repositories"
+ task :upload => yum_release_repositories_dir do
+ yum_distributions.each do |distribution|
+ distribution_dir = "#{yum_release_repositories_dir}/#{distribution}"
+ uploader =
+ ArtifactoryUploader.new(api_key: artifactory_api_key,
+ distribution: distribution,
+ source: distribution_dir,
+ staging: staging?,
+ sync: true,
+ sync_pattern: /\/repodata\//)
+ uploader.upload
+ end
+ end
+ end
+
+ desc "Release Yum packages"
+ yum_release_tasks = [
+ "yum:release:download",
+ "yum:release:upload",
+ ]
+ task :release => yum_release_tasks
+ end
+ end
+
+ def define_yum_tasks
+ define_yum_staging_tasks
+ define_yum_rc_tasks
+ define_yum_release_tasks
+ end
+
+ def define_generic_data_rc_tasks(label,
+ id,
+ rc_dir,
+ target_files_glob)
+ directory rc_dir
+
+ namespace id do
+ namespace :rc do
+ desc "Copy #{label} packages"
+ task :copy => rc_dir do
+ progress_label = "Copying: #{label}"
+ progress_reporter = ProgressReporter.new(progress_label)
+
+ Pathname(artifacts_dir).glob(target_files_glob) do |path|
+ next if path.directory?
+ destination_path = [
+ rc_dir,
+ path.basename.to_s,
+ ].join("/")
+ copy_artifact(path, destination_path, progress_reporter)
+ end
+
+ progress_reporter.finish
+ end
+
+ desc "Sign #{label} packages"
+ task :sign => rc_dir do
+ sign_dir(label, rc_dir)
+ end
+
+ desc "Upload #{label} packages"
+ task :upload do
+ uploader =
+ ArtifactoryUploader.new(api_key: artifactory_api_key,
+ destination_prefix: full_version,
+ distribution: id.to_s,
+ rc: rc,
+ source: rc_dir,
+ staging: staging?)
+ uploader.upload
+ end
+ end
+
+ desc "Release RC #{label} packages"
+ rc_tasks = [
+ "#{id}:rc:copy",
+ "#{id}:rc:sign",
+ "#{id}:rc:upload",
+ ]
+ task :rc => rc_tasks
+ end
+ end
+
+ def define_generic_data_release_tasks(label, id, release_dir)
+ directory release_dir
+
+ namespace id do
+ namespace :release do
+ desc "Download RC #{label} packages"
+ task :download => release_dir do
+ download_distribution(id.to_s,
+ release_dir,
+ :rc,
+ prefix: "#{full_version}")
+ end
+
+ desc "Upload release #{label} packages"
+ task :upload => release_dir do
+ uploader = ArtifactoryUploader.new(api_key: artifactory_api_key,
+ destination_prefix: version,
+ distribution: id.to_s,
+ source: release_dir,
+ staging: staging?)
+ uploader.upload
+ end
+ end
+
+ desc "Release #{label} packages"
+ release_tasks = [
+ "#{id}:release:download",
+ "#{id}:release:upload",
+ ]
+ task :release => release_tasks
+ end
+ end
+
+ def define_generic_data_tasks(label,
+ id,
+ rc_dir,
+ release_dir,
+ target_files_glob)
+ define_generic_data_rc_tasks(label, id, rc_dir, target_files_glob)
+ define_generic_data_release_tasks(label, id, release_dir)
+ end
+
+ def define_python_tasks
+ define_generic_data_tasks("Python",
+ :python,
+ "#{rc_dir}/python/#{full_version}",
+ "#{release_dir}/python/#{full_version}",
+ "{python-sdist,wheel-*}/**/*")
+ end
+
+ def define_nuget_tasks
+ define_generic_data_tasks("NuGet",
+ :nuget,
+ "#{rc_dir}/nuget/#{full_version}",
+ "#{release_dir}/nuget/#{full_version}",
+ "nuget/**/*")
+ end
+
+ def define_summary_tasks
+ namespace :summary do
+ desc "Show RC summary"
+ task :rc do
+ suffix = ""
+ suffix << "-staging" if staging?
+ puts(<<-SUMMARY)
+Success! The release candidate binaries are available here:
+ https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}-rc/
+ https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}-rc/
+ https://apache.jfrog.io/artifactory/arrow/centos#{suffix}-rc/
+ https://apache.jfrog.io/artifactory/arrow/debian#{suffix}-rc/
+ https://apache.jfrog.io/artifactory/arrow/nuget#{suffix}-rc/#{full_version}
+ https://apache.jfrog.io/artifactory/arrow/python#{suffix}-rc/#{full_version}
+ https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}-rc/
+ SUMMARY
+ end
+
+ desc "Show release summary"
+ task :release do
+ suffix = ""
+ suffix << "-staging" if staging?
+ puts(<<-SUMMARY)
+Success! The release binaries are available here:
+ https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}/
+ https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}/
+ https://apache.jfrog.io/artifactory/arrow/centos#{suffix}/
+ https://apache.jfrog.io/artifactory/arrow/debian#{suffix}/
+ https://apache.jfrog.io/artifactory/arrow/nuget#{suffix}/#{version}
+ https://apache.jfrog.io/artifactory/arrow/python#{suffix}/#{version}
+ https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}/
+ SUMMARY
+ end
+ end
+ end
+end
diff --git a/src/arrow/dev/release/binary/.dockerignore b/src/arrow/dev/release/binary/.dockerignore
new file mode 100644
index 000000000..f2c46d8ce
--- /dev/null
+++ b/src/arrow/dev/release/binary/.dockerignore
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/tmp/
diff --git a/src/arrow/dev/release/binary/Dockerfile b/src/arrow/dev/release/binary/Dockerfile
new file mode 100644
index 000000000..a21b32dd7
--- /dev/null
+++ b/src/arrow/dev/release/binary/Dockerfile
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM debian:bullseye
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ apt-utils \
+ createrepo-c \
+ devscripts \
+ gpg \
+ locales \
+ openssh-server \
+ rake \
+ rpm \
+ ruby \
+ sudo && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN gem install apt-dists-merge -v ">= 1.0.2"
+
+RUN locale-gen en_US.UTF-8
+
+RUN mkdir -p /run/sshd
+RUN echo "StreamLocalBindUnlink yes" >> /etc/ssh/sshd_config
+
+ENV ARROW_USER arrow
+ENV ARROW_UID 10000
+
+RUN \
+ groupadd --gid ${ARROW_UID} ${ARROW_USER} && \
+ useradd --uid ${ARROW_UID} --gid ${ARROW_UID} --create-home ${ARROW_USER} && \
+ mkdir -p /home/arrow/.gnupg /home/arrow/.ssh && \
+ chown -R arrow: /home/arrow/.gnupg /home/arrow/.ssh && \
+ chmod -R og-rwx /home/arrow/.gnupg /home/arrow/.ssh && \
+ echo "${ARROW_USER} ALL=(ALL:ALL) NOPASSWD:ALL" | \
+ EDITOR=tee visudo -f /etc/sudoers.d/arrow
+
+COPY id_rsa.pub /home/arrow/.ssh/authorized_keys
+RUN \
+ chown -R arrow: /home/arrow/.ssh && \
+ chmod -R og-rwx /home/arrow/.ssh
+
+COPY runner.sh /home/arrow/runner.sh
+RUN \
+ chown -R arrow: /home/arrow/runner.sh && \
+ chmod +x /home/arrow/runner.sh
+
+EXPOSE 22
diff --git a/src/arrow/dev/release/binary/runner.sh b/src/arrow/dev/release/binary/runner.sh
new file mode 100755
index 000000000..465d60d62
--- /dev/null
+++ b/src/arrow/dev/release/binary/runner.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -u
+
+export LANG=C
+
+target_dir=/host/binary/tmp
+original_owner=$(stat --format=%u ${target_dir})
+original_group=$(stat --format=%g ${target_dir})
+
+sudo -H chown -R ${USER}: ${target_dir}
+restore_owner() {
+ sudo -H chown -R ${original_owner}:${original_group} ${target_dir}
+}
+trap restore_owner EXIT
+
+cd /host
+
+"$@"
diff --git a/src/arrow/dev/release/check-rat-report.py b/src/arrow/dev/release/check-rat-report.py
new file mode 100644
index 000000000..a5718103a
--- /dev/null
+++ b/src/arrow/dev/release/check-rat-report.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+import fnmatch
+import re
+import sys
+import xml.etree.ElementTree as ET
+
+if len(sys.argv) != 3:
+ sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" %
+ sys.argv[0])
+ sys.exit(1)
+
+exclude_globs_filename = sys.argv[1]
+xml_filename = sys.argv[2]
+
+globs = [line.strip() for line in open(exclude_globs_filename, "r")]
+
+tree = ET.parse(xml_filename)
+root = tree.getroot()
+resources = root.findall('resource')
+
+all_ok = True
+for r in resources:
+ approvals = r.findall('license-approval')
+ if not approvals or approvals[0].attrib['name'] == 'true':
+ continue
+ clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+ excluded = False
+ for g in globs:
+ if fnmatch.fnmatch(clean_name, g):
+ excluded = True
+ break
+ if not excluded:
+ sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % (
+ clean_name, r.attrib['name'], approvals[0].attrib['name']))
+ all_ok = False
+
+if not all_ok:
+ sys.exit(1)
+
+print('OK')
+sys.exit(0)
diff --git a/src/arrow/dev/release/download_rc_binaries.py b/src/arrow/dev/release/download_rc_binaries.py
new file mode 100755
index 000000000..3e3d0f7d3
--- /dev/null
+++ b/src/arrow/dev/release/download_rc_binaries.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import argparse
+import concurrent.futures as cf
+import functools
+import os
+import subprocess
+import urllib.request
+
+
+ARTIFACTORY_ROOT = "https://apache.jfrog.io/artifactory/arrow"
+DEFAULT_PARALLEL_DOWNLOADS = 8
+
+
+class Artifactory:
+
+ def get_file_list(self, prefix):
+ def traverse(directory, files, directories):
+ url = f'{ARTIFACTORY_ROOT}/{directory}'
+ response = urllib.request.urlopen(url).read().decode()
+ paths = re.findall('<a href="(.+?)"', response)
+ for path in paths:
+ if path == '../':
+ continue
+ resolved_path = f'{directory}{path}'
+ if path.endswith('/'):
+ directories.append(resolved_path)
+ else:
+ files.append(resolved_path)
+ files = []
+ if not prefix.endswith('/'):
+ prefix += '/'
+ directories = [prefix]
+ while len(directories) > 0:
+ directory = directories.pop()
+ traverse(directory, files, directories)
+ return files
+
+ def download_files(self, files, dest=None, num_parallel=None,
+ re_match=None):
+ """
+ Download files from Bintray in parallel. If file already exists, will
+ overwrite if the checksum does not match what Bintray says it should be
+
+ Parameters
+ ----------
+ files : List[Dict]
+ File listing from Bintray
+ dest : str, default None
+ Defaults to current working directory
+ num_parallel : int, default 8
+ Number of files to download in parallel. If set to None, uses
+ default
+ """
+ if dest is None:
+ dest = os.getcwd()
+ if num_parallel is None:
+ num_parallel = DEFAULT_PARALLEL_DOWNLOADS
+
+ if re_match is not None:
+ regex = re.compile(re_match)
+ files = [x for x in files if regex.match(x)]
+
+ if num_parallel == 1:
+ for path in files:
+ self._download_file(dest, path)
+ else:
+ parallel_map_terminate_early(
+ functools.partial(self._download_file, dest),
+ files,
+ num_parallel
+ )
+
+ def _download_file(self, dest, path):
+ base, filename = os.path.split(path)
+
+ dest_dir = os.path.join(dest, base)
+ os.makedirs(dest_dir, exist_ok=True)
+
+ dest_path = os.path.join(dest_dir, filename)
+
+ print("Downloading {} to {}".format(path, dest_path))
+
+ url = f'{ARTIFACTORY_ROOT}/{path}'
+
+ cmd = [
+ 'curl', '--fail', '--location', '--retry', '5',
+ '--output', dest_path, url
+ ]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+ if proc.returncode != 0:
+ raise Exception("Downloading {} failed\nstdout: {}\nstderr: {}"
+ .format(path, stdout, stderr))
+
+
+def parallel_map_terminate_early(f, iterable, num_parallel):
+ tasks = []
+ with cf.ProcessPoolExecutor(num_parallel) as pool:
+ for v in iterable:
+ tasks.append(pool.submit(functools.partial(f, v)))
+
+ for task in cf.as_completed(tasks):
+ if task.exception() is not None:
+ e = task.exception()
+ for task in tasks:
+ task.cancel()
+ raise e
+
+
+ARROW_REPOSITORY_PACKAGE_TYPES = ['centos', 'debian', 'ubuntu']
+ARROW_STANDALONE_PACKAGE_TYPES = ['nuget', 'python']
+ARROW_PACKAGE_TYPES = \
+ ARROW_REPOSITORY_PACKAGE_TYPES + \
+ ARROW_STANDALONE_PACKAGE_TYPES
+
+
+def download_rc_binaries(version, rc_number, re_match=None, dest=None,
+ num_parallel=None, target_package_type=None):
+ artifactory = Artifactory()
+
+ version_string = '{}-rc{}'.format(version, rc_number)
+ if target_package_type:
+ package_types = [target_package_type]
+ else:
+ package_types = ARROW_PACKAGE_TYPES
+ for package_type in package_types:
+ if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+ prefix = f'{package_type}-rc'
+ else:
+ prefix = f'{package_type}-rc/{version_string}'
+ files = artifactory.get_file_list(prefix)
+ if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+ version_pattern = re.compile(r'\d+\.\d+\.\d+')
+
+ def is_old_release(path):
+ match = version_pattern.search(path)
+ if not match:
+ return False
+ return match[0] != version
+ files = [x for x in files if not is_old_release(x)]
+ artifactory.download_files(files, re_match=re_match, dest=dest,
+ num_parallel=num_parallel)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description='Download release candidate binaries'
+ )
+ parser.add_argument('version', type=str, help='The version number')
+ parser.add_argument('rc_number', type=int,
+ help='The release candidate number, e.g. 0, 1, etc')
+ parser.add_argument('-e', '--regexp', type=str, default=None,
+ help=('Regular expression to match on file names '
+ 'to only download certain files'))
+ parser.add_argument('--dest', type=str, default=os.getcwd(),
+ help='The output folder for the downloaded files')
+ parser.add_argument('--num_parallel', type=int, default=8,
+ help='The number of concurrent downloads to do')
+ parser.add_argument('--package_type', type=str, default=None,
+ help='The package type to be downloaded')
+ args = parser.parse_args()
+
+ download_rc_binaries(args.version, args.rc_number, dest=args.dest,
+ re_match=args.regexp, num_parallel=args.num_parallel,
+ target_package_type=args.package_type)
diff --git a/src/arrow/dev/release/post-01-upload.sh b/src/arrow/dev/release/post-01-upload.sh
new file mode 100755
index 000000000..5671c3746
--- /dev/null
+++ b/src/arrow/dev/release/post-01-upload.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -e
+set -u
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+tmp_dir=tmp-apache-arrow-dist
+
+echo "Recreate temporary directory: ${tmp_dir}"
+rm -rf ${tmp_dir}
+mkdir -p ${tmp_dir}
+
+echo "Clone dev dist repository"
+svn \
+ co \
+ https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-${version}-rc${rc} \
+ ${tmp_dir}/dev
+
+echo "Clone release dist repository"
+svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release
+
+echo "Copy ${version}-rc${rc} to release working copy"
+release_version=arrow-${version}
+mkdir -p ${tmp_dir}/release/${release_version}
+cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/
+svn add ${tmp_dir}/release/${release_version}
+
+echo "Keep only the three most recent versions"
+old_releases=$(
+ svn ls ${tmp_dir}/release/ | \
+ grep -E '^arrow-[0-9\.]+' | \
+ sort --version-sort --reverse | \
+ tail -n +4
+)
+for old_release_version in $old_releases; do
+ echo "Remove old release ${old_release_version}"
+ svn delete ${tmp_dir}/release/${old_release_version}
+done
+
+echo "Commit release"
+svn ci -m "Apache Arrow ${version}" ${tmp_dir}/release
+
+echo "Clean up"
+rm -rf ${tmp_dir}
+
+echo "Success! The release is available here:"
+echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}"
diff --git a/src/arrow/dev/release/post-02-binary.sh b/src/arrow/dev/release/post-02-binary.sh
new file mode 100755
index 000000000..b1b41f9fb
--- /dev/null
+++ b/src/arrow/dev/release/post-02-binary.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+cd "${SOURCE_DIR}"
+
+if [ ! -f .env ]; then
+ echo "You must create $(pwd)/.env"
+ echo "You can use $(pwd)/.env.example as template"
+ exit 1
+fi
+. .env
+
+. utils-binary.sh
+
+# By default deploy all artifacts.
+# To deactivate one category, deactivate the category and all of its dependents.
+# To explicitly select one category, set DEPLOY_DEFAULT=0 DEPLOY_X=1.
+: ${DEPLOY_DEFAULT:=1}
+: ${DEPLOY_ALMALINUX:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_AMAZON_LINUX:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_CENTOS:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_DEBIAN:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_NUGET:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_PYTHON:=${DEPLOY_DEFAULT}}
+: ${DEPLOY_UBUNTU:=${DEPLOY_DEFAULT}}
+
+rake_tasks=()
+apt_targets=()
+yum_targets=()
+if [ ${DEPLOY_ALMALINUX} -gt 0 ]; then
+ rake_tasks+=(yum:release)
+ yum_targets+=(almalinux)
+fi
+if [ ${DEPLOY_AMAZON_LINUX} -gt 0 ]; then
+ rake_tasks+=(yum:release)
+ yum_targets+=(amazon-linux)
+fi
+if [ ${DEPLOY_CENTOS} -gt 0 ]; then
+ rake_tasks+=(yum:release)
+ yum_targets+=(centos)
+fi
+if [ ${DEPLOY_DEBIAN} -gt 0 ]; then
+ rake_tasks+=(apt:release)
+ apt_targets+=(debian)
+fi
+if [ ${DEPLOY_NUGET} -gt 0 ]; then
+ rake_tasks+=(nuget:release)
+fi
+if [ ${DEPLOY_PYTHON} -gt 0 ]; then
+ rake_tasks+=(python:release)
+fi
+if [ ${DEPLOY_UBUNTU} -gt 0 ]; then
+ rake_tasks+=(apt:release)
+ apt_targets+=(ubuntu)
+fi
+rake_tasks+=(summary:release)
+
+tmp_dir=binary/tmp
+mkdir -p "${tmp_dir}"
+
+docker_run \
+ ./runner.sh \
+ rake \
+ --trace \
+ "${rake_tasks[@]}" \
+ APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+ ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
+ ARTIFACTS_DIR="${tmp_dir}/artifacts" \
+ RC=${rc} \
+ STAGING=${STAGING:-no} \
+ VERSION=${version} \
+ YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/src/arrow/dev/release/post-03-website.sh b/src/arrow/dev/release/post-03-website.sh
new file mode 100755
index 000000000..7aceeaf59
--- /dev/null
+++ b/src/arrow/dev/release/post-03-website.sh
@@ -0,0 +1,266 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ARROW_DIR="${SOURCE_DIR}/../.."
+ARROW_SITE_DIR="${ARROW_DIR}/../arrow-site"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <previous-version> <version>"
+ exit 1
+fi
+
+previous_version=$1
+version=$2
+
+branch_name=release-note-${version}
+release_dir="${ARROW_SITE_DIR}/_release"
+announce_file="${release_dir}/${version}.md"
+versions_yml="${ARROW_SITE_DIR}/_data/versions.yml"
+
+pushd "${ARROW_SITE_DIR}"
+git checkout master
+git checkout -b ${branch_name}
+popd
+
+pushd "${ARROW_DIR}"
+
+release_date=$(LANG=C date "+%-d %B %Y")
+previous_tag_date=$(git log -n 1 --pretty=%aI apache-arrow-${previous_version})
+rough_previous_release_date=$(date --date "${previous_tag_date}" +%s)
+rough_release_date=$(date +%s)
+rough_n_development_months=$((
+ (${rough_release_date} - ${rough_previous_release_date}) / (60 * 60 * 24 * 30)
+))
+
+git_tag=apache-arrow-${version}
+git_range=apache-arrow-${previous_version}..${git_tag}
+
+committers_command_line="git shortlog -csn ${git_range}"
+contributors_command_line="git shortlog -sn ${git_range}"
+
+committers=$(${committers_command_line})
+contributors=$(${contributors_command_line})
+
+n_commits=$(git log --pretty=oneline ${git_range} | wc -l)
+n_contributors=$(${contributors_command_line} | wc -l)
+
+git_tag_hash=$(git log -n 1 --pretty=%H ${git_tag})
+
+popd
+
+pushd "${ARROW_SITE_DIR}"
+
+# Add announce for the current version
+cat <<ANNOUNCE > "${announce_file}"
+---
+layout: default
+title: Apache Arrow ${version} Release
+permalink: /release/${version}.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+# Apache Arrow ${version} (${release_date})
+
+This is a major release covering more than ${rough_n_development_months} months of development.
+
+## Download
+
+* [**Source Artifacts**][1]
+* **Binary Artifacts**
+ * [For CentOS][2]
+ * [For Debian][3]
+ * [For Python][4]
+ * [For Ubuntu][5]
+* [Git tag][6]
+
+## Contributors
+
+This release includes ${n_commits} commits from ${n_contributors} distinct contributors.
+
+\`\`\`console
+$ ${contributors_command_line}
+ANNOUNCE
+
+echo "${contributors}" >> "${announce_file}"
+
+cat <<ANNOUNCE >> "${announce_file}"
+\`\`\`
+
+## Patch Committers
+
+The following Apache committers merged contributed patches to the repository.
+
+\`\`\`console
+$ ${committers_command_line}
+ANNOUNCE
+
+echo "${committers}" >> "${announce_file}"
+
+cat <<ANNOUNCE >> "${announce_file}"
+\`\`\`
+
+## Changelog
+
+ANNOUNCE
+
+archery release changelog generate ${version} | \
+ sed -e 's/^#/##/g' >> "${announce_file}"
+
+cat <<ANNOUNCE >> "${announce_file}"
+[1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/
+[2]: https://apache.jfrog.io/artifactory/arrow/centos/
+[3]: https://apache.jfrog.io/artifactory/arrow/debian/
+[4]: https://apache.jfrog.io/artifactory/arrow/python/${version}/
+[5]: https://apache.jfrog.io/artifactory/arrow/ubuntu/
+[6]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version}
+ANNOUNCE
+git add "${announce_file}"
+
+
+# Update index
+pushd "${release_dir}"
+
+index_file=index.md
+rm -f ${index_file}
+announce_files="$(ls | sort --version-sort --reverse)"
+cat <<INDEX > ${index_file}
+---
+layout: default
+title: Releases
+permalink: /release/index.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+# Apache Arrow Releases
+
+Navigate to the release page for downloads and the changelog.
+
+INDEX
+
+i=0
+for md_file in ${announce_files}; do
+ i=$((i + 1))
+ title=$(grep '^# Apache Arrow' ${md_file} | sed -e 's/^# Apache Arrow //')
+ echo "* [${title}][${i}]" >> ${index_file}
+done
+echo >> ${index_file}
+
+i=0
+for md_file in ${announce_files}; do
+ i=$((i + 1))
+ html_file=$(echo ${md_file} | sed -e 's/md$/html/')
+ echo "[${i}]: {{ site.baseurl }}/release/${html_file}" >> ${index_file}
+done
+
+git add ${index_file}
+
+popd
+
+
+# Update versions.yml
+pinned_version=$(echo ${version} | sed -e 's/\.[^.]*$/.*/')
+
+apache_download_url=https://downloads.apache.org
+
+cat <<YAML > "${versions_yml}"
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Database of the current version
+#
+current:
+ number: '${version}'
+ pinned_number: '${pinned_version}'
+ date: '${release_date}'
+ git-tag: '${git_tag_hash}'
+ github-tag-link: 'https://github.com/apache/arrow/releases/tag/${git_tag}'
+ release-notes: 'https://arrow.apache.org/release/${version}.html'
+ mirrors: 'https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/'
+ tarball-name: 'apache-arrow-${version}.tar.gz'
+ tarball-url: 'https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz'
+ java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%22${version}%22'
+ asc: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.asc'
+ sha256: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256'
+ sha512: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha512'
+YAML
+git add "${versions_yml}"
+
+git commit -m "[Website] Add release note for ${version}"
+git push -u origin ${branch_name}
+
+github_url=$(git remote get-url origin | \
+ sed \
+ -e 's,^git@github.com:,https://github.com/,' \
+ -e 's,\.git$,,')
+
+echo "Success!"
+echo "Create a pull request:"
+echo " ${github_url}/pull/new/${branch_name}"
+
+popd
diff --git a/src/arrow/dev/release/post-04-ruby.sh b/src/arrow/dev/release/post-04-ruby.sh
new file mode 100755
index 000000000..edcb54c13
--- /dev/null
+++ b/src/arrow/dev/release/post-04-ruby.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -e
+set -o pipefail
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+archive_name=apache-arrow-${version}
+tar_gz=${archive_name}.tar.gz
+
+echo "NOTE: We should release RubyGems after Homebrew and MSYS2 packages are updated!!!"
+
+echo "Checking Homebrew package..."
+homebrew_version=$(
+ curl \
+ --fail \
+ --no-progress-meter \
+ https://raw.githubusercontent.com/Homebrew/homebrew-core/master/Formula/apache-arrow-glib.rb | \
+ grep url | \
+ grep -o "[0-9]*\.[0-9]*\.[0-9]*" | \
+ head -n 1)
+echo "Homebrew package version: ${homebrew_version}"
+if [ "${version}" = "${homebrew_version}" ]; then
+ echo "OK!"
+else
+ echo "Different!"
+ exit 1
+fi
+
+
+echo "Checking MSYS2 package..."
+msys2_version=$(
+ curl \
+ --fail \
+ --no-progress-meter \
+ https://packages.msys2.org/base/mingw-w64-arrow | \
+ grep -A 1 ">Version:<" | \
+ grep -o "[0-9]*\.[0-9]*\.[0-9]*")
+echo "MSYS2 package version: ${msys2_version}"
+if [ "${version}" = "${msys2_version}" ]; then
+ echo "OK!"
+else
+ echo "Different!"
+ exit 1
+fi
+
+
+rm -f ${tar_gz}
+curl \
+ --remote-name \
+ --fail \
+ https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
+rm -rf ${archive_name}
+tar xf ${tar_gz}
+modules=()
+for module in ${archive_name}/ruby/red-*; do
+ pushd ${module}
+ rake release
+ modules+=($(basename ${module}))
+ popd
+done
+rm -rf ${archive_name}
+rm -f ${tar_gz}
+
+echo "Success! The released RubyGems are available here:"
+for module in ${modules[@]}; do
+ echo " https://rubygems.org/gems/${module}/versions/${version}"
+done
diff --git a/src/arrow/dev/release/post-05-js.sh b/src/arrow/dev/release/post-05-js.sh
new file mode 100755
index 000000000..edc5fe20b
--- /dev/null
+++ b/src/arrow/dev/release/post-05-js.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -e
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+archive_name=apache-arrow-${version}
+tar_gz=${archive_name}.tar.gz
+
+rm -f ${tar_gz}
+curl \
+ --remote-name \
+ --fail \
+ https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
+rm -rf ${archive_name}
+tar xf ${tar_gz}
+pushd ${archive_name}/js
+./npm-release.sh
+popd
+rm -rf ${archive_name}
+rm -f ${tar_gz}
+
+echo "Success! The released npm packages are available here:"
+echo " https://www.npmjs.com/package/apache-arrow/v/${version}"
diff --git a/src/arrow/dev/release/post-06-csharp.sh b/src/arrow/dev/release/post-06-csharp.sh
new file mode 100755
index 000000000..d2968a5d5
--- /dev/null
+++ b/src/arrow/dev/release/post-06-csharp.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -eux
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+
+if [ -z "${NUGET_API_KEY}" ]; then
+ echo "NUGET_API_KEY is empty"
+ exit 1
+fi
+
+base_names=()
+base_names+=(Apache.Arrow.${version})
+base_names+=(Apache.Arrow.Flight.${version})
+base_names+=(Apache.Arrow.Flight.AspNetCore.${version})
+for base_name in ${base_names[@]}; do
+ for extension in nupkg snupkg; do
+ path=${base_name}.${extension}
+ rm -f ${path}
+ curl \
+ --fail \
+ --location \
+ --remote-name \
+ https://apache.jfrog.io/artifactory/arrow/nuget/${version}/${path}
+ done
+ dotnet nuget push \
+ ${base_name}.nupkg \
+ -k ${NUGET_API_KEY} \
+ -s https://api.nuget.org/v3/index.json
+ rm -f ${base_name}.{nupkg,snupkg}
+done
+
+echo "Success! The released NuGet package is available here:"
+echo " https://www.nuget.org/packages/Apache.Arrow/${version}"
diff --git a/src/arrow/dev/release/post-08-remove-rc.sh b/src/arrow/dev/release/post-08-remove-rc.sh
new file mode 100755
index 000000000..8e02b7e95
--- /dev/null
+++ b/src/arrow/dev/release/post-08-remove-rc.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+set -u
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+
+base_url=https://dist.apache.org/repos/dist/dev/arrow
+pattern="^apache-arrow-${version}-rc"
+paths=$()
+if svn ls ${base_url} | grep "${pattern}" > /dev/null 2>&1; then
+ rc_paths=$(svn ls ${base_url} | grep "${pattern}")
+ rc_urls=()
+ for rc_path in ${rc_paths}; do
+ rc_urls+=(${base_url}/${rc_path})
+ done
+ svn rm --message "Remove RC for ${version}" ${rc_urls[@]}
+ echo "Removed RC artifacts:"
+ for rc_url in ${rc_urls[@]}; do
+ echo " ${rc_url}"
+ done
+else
+ echo "No RC artifacts at ${base_url}"
+fi
diff --git a/src/arrow/dev/release/post-09-docs.sh b/src/arrow/dev/release/post-09-docs.sh
new file mode 100755
index 000000000..9c0b77bb5
--- /dev/null
+++ b/src/arrow/dev/release/post-09-docs.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ARROW_DIR="${SOURCE_DIR}/../.."
+ARROW_SITE_DIR="${ARROW_DIR}/../arrow-site"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit 1
+fi
+
+version=$1
+release_tag="apache-arrow-${version}"
+branch_name=release-docs-${version}
+
+pushd "${ARROW_SITE_DIR}"
+git checkout asf-site
+git checkout -b ${branch_name}
+rm -rf docs/*
+git checkout docs/c_glib/index.html
+popd
+
+pushd "${ARROW_DIR}"
+git checkout "${release_tag}"
+
+UBUNTU=20.10 archery docker run \
+ -v "${ARROW_SITE_DIR}/docs:/build/docs" \
+ -e ARROW_DOCS_VERSION="${version}" \
+ ubuntu-docs
+
+: ${PUSH:=1}
+
+if [ ${PUSH} -gt 0 ]; then
+ pushd "${ARROW_SITE_DIR}"
+ git add docs
+ git commit -m "[Website] Update documentations for ${version}"
+ git push -u origin ${branch_name}
+ github_url=$(git remote get-url origin | \
+ sed \
+ -e 's,^git@github.com:,https://github.com/,' \
+ -e 's,\.git$,,')
+ popd
+
+ echo "Success!"
+ echo "Create a pull request:"
+ echo " ${github_url}/pull/new/${branch_name}"
+fi
diff --git a/src/arrow/dev/release/post-10-python.sh b/src/arrow/dev/release/post-10-python.sh
new file mode 100755
index 000000000..a014239ea
--- /dev/null
+++ b/src/arrow/dev/release/post-10-python.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+: ${TEST_PYPI:=0}
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+tmp=$(mktemp -d -t "arrow-post-python.XXXXX")
+${PYTHON:-python} \
+ "${SOURCE_DIR}/download_rc_binaries.py" \
+ ${version} \
+ ${rc} \
+ --dest="${tmp}" \
+ --package_type=python \
+ --regex=".*\.(whl|tar\.gz)$"
+
+if [ ${TEST_PYPI} -gt 0 ]; then
+ TWINE_ARGS="--repository-url https://test.pypi.org/legacy/"
+fi
+
+twine upload ${TWINE_ARGS} ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+
+rm -rf "${tmp}"
+
+echo "Success! The released PyPI packages are available here:"
+echo " https://pypi.org/project/pyarrow/${version}"
diff --git a/src/arrow/dev/release/post-11-java.sh b/src/arrow/dev/release/post-11-java.sh
new file mode 100755
index 000000000..86e6e9b57
--- /dev/null
+++ b/src/arrow/dev/release/post-11-java.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+archive_name=apache-arrow-${version}
+tar_gz=${archive_name}.tar.gz
+
+rm -f ${tar_gz}
+curl \
+ --remote-name \
+ --fail \
+ https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
+rm -rf ${archive_name}
+tar xf ${tar_gz}
+
+pushd ${archive_name}
+
+# clone the testing data to the appropiate directories
+git clone https://github.com/apache/arrow-testing.git testing
+git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing
+
+# build the jni bindings similarly like the 01-perform.sh does
+mkdir -p cpp/java-build
+pushd cpp/java-build
+cmake \
+ -DARROW_DATASET=ON \
+ -DARROW_FILESYSTEM=ON \
+ -DARROW_GANDIVA_JAVA=ON \
+ -DARROW_GANDIVA=ON \
+ -DARROW_JNI=ON \
+ -DARROW_ORC=ON \
+ -DARROW_PARQUET=ON \
+ -DCMAKE_BUILD_TYPE=release \
+ -G Ninja \
+ ..
+ninja
+popd
+
+# go in the java subfolder
+pushd java
+# stage the artifacts using both the apache-release and arrow-jni profiles
+# Note: on ORC checkstyle failure use -Dcheckstyle.skip=true until https://issues.apache.org/jira/browse/ARROW-12552 gets resolved
+mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build/release) deploy
+popd
+
+popd
+
+echo "Success! The maven artifacts have been stated. Proceed with the following steps:"
+echo "1. Login to the apache repository: https://repository.apache.org/#stagingRepositories"
+echo "2. Select the arrow staging repository you just just created: orgapachearrow-100x"
+echo "3. Click the \"close\" button"
+echo "4. Once validation has passed, click the \"release\" button"
+echo ""
+echo "Note, that you must set up Maven to be able to publish to Apache's repositories."
+echo "Read more at https://www.apache.org/dev/publishing-maven-artifacts.html."
diff --git a/src/arrow/dev/release/post-12-bump-versions.sh b/src/arrow/dev/release/post-12-bump-versions.sh
new file mode 100755
index 000000000..8474f03d2
--- /dev/null
+++ b/src/arrow/dev/release/post-12-bump-versions.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <next_version>"
+ exit 1
+fi
+
+: ${BUMP_DEFAULT:=1}
+: ${BUMP_VERSION_POST_TAG:=${BUMP_DEFAULT}}
+: ${BUMP_DEB_PACKAGE_NAMES:=${BUMP_DEFAULT}}
+
+. $SOURCE_DIR/utils-prepare.sh
+
+version=$1
+next_version=$2
+next_version_snapshot="${next_version}-SNAPSHOT"
+
+if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then
+ echo "Updating versions for ${next_version_snapshot}"
+ update_versions "${version}" "${next_version}" "snapshot"
+ git commit -m "[Release] Update versions for ${next_version_snapshot}"
+fi
+
+if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ]; then
+ echo "Updating .deb package names for ${next_version}"
+ so_version() {
+ local version=$1
+ local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/')
+ local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
+ expr ${major_version} \* 100 + ${minor_version}
+ }
+ deb_lib_suffix=$(so_version $version)
+ next_deb_lib_suffix=$(so_version $next_version)
+ if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then
+ cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow
+ for target in debian*/lib*${deb_lib_suffix}.install; do
+ git mv \
+ ${target} \
+ $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
+ done
+ deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
+ sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control*
+ rm -f debian*/control*.bak
+ git add debian*/control*
+ cd -
+ cd $SOURCE_DIR/../tasks/
+ sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
+ rm -f tasks.yml.bak
+ git add tasks.yml
+ cd -
+ cd $SOURCE_DIR
+ sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
+ rm -f rat_exclude_files.txt.bak
+ git add rat_exclude_files.txt
+ git commit -m "[Release] Update .deb package names for $next_version"
+ cd -
+ fi
+fi
diff --git a/src/arrow/dev/release/post-13-go.sh b/src/arrow/dev/release/post-13-go.sh
new file mode 100644
index 000000000..7c6034837
--- /dev/null
+++ b/src/arrow/dev/release/post-13-go.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <version>"
+ exit
+fi
+
+version=$1
+version_tag="apache-arrow-${version}"
+go_arrow_tag="go/v${version}"
+
+git tag "${go_arrow_tag}" "${version_tag}"
+git push apache "${go_arrow_tag}"
diff --git a/src/arrow/dev/release/rat_exclude_files.txt b/src/arrow/dev/release/rat_exclude_files.txt
new file mode 100644
index 000000000..47fcf618f
--- /dev/null
+++ b/src/arrow/dev/release/rat_exclude_files.txt
@@ -0,0 +1,208 @@
+*.npmrc
+*.gitignore
+.gitmodules
+*_generated.h
+*_generated.js
+*_generated.ts
+*.csv
+*.json
+*.snap
+.github/ISSUE_TEMPLATE/question.md
+ci/etc/rprofile
+ci/etc/*.patch
+ci/vcpkg/*.patch
+CHANGELOG.md
+cpp/CHANGELOG_PARQUET.md
+cpp/src/arrow/io/mman.h
+cpp/src/arrow/util/random.h
+cpp/src/arrow/status.cc
+cpp/src/arrow/status.h
+cpp/src/arrow/vendored/*
+cpp/build-support/asan_symbolize.py
+cpp/build-support/cpplint.py
+cpp/build-support/lint_exclusions.txt
+cpp/build-support/iwyu/*
+cpp/cmake_modules/FindPythonLibsNew.cmake
+cpp/cmake_modules/SnappyCMakeLists.txt
+cpp/cmake_modules/SnappyConfig.h
+cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake
+cpp/src/parquet/.parquetcppversion
+cpp/src/generated/parquet_constants.cpp
+cpp/src/generated/parquet_constants.h
+cpp/src/generated/parquet_types.cpp
+cpp/src/generated/parquet_types.h
+cpp/src/plasma/thirdparty/ae/ae.c
+cpp/src/plasma/thirdparty/ae/ae.h
+cpp/src/plasma/thirdparty/ae/ae_epoll.c
+cpp/src/plasma/thirdparty/ae/ae_evport.c
+cpp/src/plasma/thirdparty/ae/ae_kqueue.c
+cpp/src/plasma/thirdparty/ae/ae_select.c
+cpp/src/plasma/thirdparty/ae/config.h
+cpp/src/plasma/thirdparty/ae/zmalloc.h
+cpp/src/plasma/thirdparty/dlmalloc.c
+cpp/thirdparty/flatbuffers/include/flatbuffers/base.h
+cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h
+cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h
+dev/requirements*.txt
+dev/archery/MANIFEST.in
+dev/archery/requirements*.txt
+dev/archery/archery/tests/fixtures/*
+dev/archery/archery/crossbow/tests/fixtures/*
+dev/release/rat_exclude_files.txt
+dev/tasks/homebrew-formulae/apache-arrow.rb
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
+dev/tasks/linux-packages/apache-arrow/debian/compat
+dev/tasks/linux-packages/apache-arrow/debian/control.in
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-flight-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
+dev/tasks/linux-packages/apache-arrow/debian/patches/series
+dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
+dev/tasks/linux-packages/apache-arrow/debian/rules
+dev/tasks/linux-packages/apache-arrow/debian/source/format
+dev/tasks/linux-packages/apache-arrow/debian/watch
+dev/tasks/requirements*.txt
+dev/tasks/conda-recipes/*
+docs/requirements.txt
+go/arrow/flight/Flight_grpc.pb.go
+go/go.sum
+go/arrow/Gopkg.lock
+go/arrow/flight/Flight.pb.go
+go/arrow/flight/Flight_grpc.pb.go
+go/arrow/internal/cpu/*
+go/arrow/type_string.go
+go/arrow/cdata/test/go.sum
+go/*.tmpldata
+go/*.s
+go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
+go/parquet/internal/gen-go/parquet/parquet-consts.go
+go/parquet/internal/gen-go/parquet/parquet.go
+js/.npmignore
+js/closure-compiler-scripts/*
+js/src/fb/*.ts
+js/yarn.lock
+js/.eslintignore
+python/cmake_modules
+python/cmake_modules/FindPythonLibsNew.cmake
+python/cmake_modules/SnappyCMakeLists.txt
+python/cmake_modules/SnappyConfig.h
+python/MANIFEST.in
+python/manylinux1/.dockerignore
+python/pyarrow/includes/__init__.pxd
+python/pyarrow/tests/__init__.py
+python/pyarrow/vendored/*
+python/requirements*.txt
+pax_global_header
+MANIFEST.in
+__init__.pxd
+__init__.py
+requirements.txt
+csharp/.gitattributes
+csharp/dummy.git/*
+csharp/src/Apache.Arrow/Flatbuf/*
+csharp/Apache.Arrow.sln
+csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj
+csharp/examples/Examples.sln
+csharp/src/Apache.Arrow/Apache.Arrow.csproj
+csharp/src/Apache.Arrow/Properties/Resources.Designer.cs
+csharp/src/Apache.Arrow/Properties/Resources.resx
+csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
+csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
+csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
+csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+csharp/test/Apache.Arrow.Tests/app.config
+*.html
+*.sgml
+*.css
+*.png
+*.ico
+*.svg
+*.devhelp2
+*.scss
+r/R/arrowExports.R
+r/src/arrowExports.cpp
+r/DESCRIPTION
+r/LICENSE.md
+r/NAMESPACE
+r/.Rbuildignore
+r/arrow.Rproj
+r/README.md
+r/README.Rmd
+r/man/*.Rd
+r/cran-comments.md
+r/vignettes/*.Rmd
+r/tests/testthat/test-*.txt
+r/inst/include/cpp11.hpp
+r/inst/include/cpp11/*.hpp
+.gitattributes
+ruby/red-arrow/.yardopts
+julia/Arrow/Project.toml
+julia/Arrow/README.md
+julia/Arrow/docs/Manifest.toml
+julia/Arrow/docs/Project.toml
+julia/Arrow/docs/make.jl
+julia/Arrow/docs/mkdocs.yml
+julia/Arrow/docs/src/index.md
+julia/Arrow/docs/src/manual.md
+julia/Arrow/docs/src/reference.md
diff --git a/src/arrow/dev/release/run-rat.sh b/src/arrow/dev/release/run-rat.sh
new file mode 100755
index 000000000..2596a284c
--- /dev/null
+++ b/src/arrow/dev/release/run-rat.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+RAT_VERSION=0.13
+
+# download apache rat
+if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then
+ curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar
+fi
+
+RAT="java -jar apache-rat-${RAT_VERSION}.jar -x "
+
+RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
+
+# generate the rat report
+$RAT $1 > rat.txt
+python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt
+cat filtered_rat.txt
+UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l`
+
+if [ "0" -eq "${UNAPPROVED}" ]; then
+ echo "No unapproved licenses"
+else
+ echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt"
+ exit 1
+fi
diff --git a/src/arrow/dev/release/run-test.rb b/src/arrow/dev/release/run-test.rb
new file mode 100755
index 000000000..90df39b13
--- /dev/null
+++ b/src/arrow/dev/release/run-test.rb
@@ -0,0 +1,31 @@
+#!/usr/bin/env ruby
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+$VERBOSE = true
+
+require "pathname"
+
+test_dir = Pathname.new(__dir__)
+
+require "test-unit"
+require_relative "test-helper"
+
+ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] = "10000"
+
+exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/src/arrow/dev/release/setup-gpg-agent.sh b/src/arrow/dev/release/setup-gpg-agent.sh
new file mode 100644
index 000000000..9ff84f6f0
--- /dev/null
+++ b/src/arrow/dev/release/setup-gpg-agent.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# source me
+eval $(gpg-agent --daemon --allow-preset-passphrase)
+gpg --use-agent -s LICENSE.txt
+rm -rf LICENSE.txt.gpg
diff --git a/src/arrow/dev/release/test-helper.rb b/src/arrow/dev/release/test-helper.rb
new file mode 100644
index 000000000..8a272ddfe
--- /dev/null
+++ b/src/arrow/dev/release/test-helper.rb
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "English"
+require "cgi/util"
+require "fileutils"
+require "find"
+require "json"
+require "open-uri"
+require "rexml/document"
+require "tempfile"
+require "tmpdir"
+
+module CommandRunnable
+ class Error < StandardError
+ end
+
+ def sh(*command_line, check_result: true)
+ if command_line[0].is_a?(Hash)
+ env = command_line.shift
+ else
+ env = {}
+ end
+ stdout = Tempfile.new("command-stdout.log")
+ stderr = Tempfile.new("command-stderr.log")
+ success = system(env, *command_line, out: stdout.path, err: stderr.path)
+ if check_result
+ unless success
+ message = "Failed to run: #{command_line.join(" ")}\n"
+ message << "stdout:\n #{stdout.read}\n"
+ message << "stderr:\n #{stderr.read}"
+ raise Error, message
+ end
+ end
+ stdout.read
+ end
+end
+
+module GitRunnable
+ include CommandRunnable
+
+ def git(*args)
+ if args[0].is_a?(Hash)
+ env = args.shift
+ else
+ env = {}
+ end
+ sh(env, "git", *args)
+ end
+
+ def git_current_commit
+ git("rev-parse", "HEAD").chomp
+ end
+
+ def git_tags
+ git("tags").lines(chomp: true)
+ end
+end
+
+module VersionDetectable
+ def detect_versions
+ top_dir = Pathname(__dir__).parent.parent
+ cpp_cmake_lists = top_dir + "cpp" + "CMakeLists.txt"
+ @snapshot_version = cpp_cmake_lists.read[/ARROW_VERSION "(.+?)"/, 1]
+ @release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "")
+ @so_version = compute_so_version(@release_version)
+ @next_version = @release_version.gsub(/\A\d+/) {|major| major.succ}
+ @next_snapshot_version = "#{@next_version}-SNAPSHOT"
+ @next_so_version = compute_so_version(@next_version)
+ r_description = top_dir + "r" + "DESCRIPTION"
+ @previous_version = r_description.read[/^Version: (.+?)\.9000$/, 1]
+ end
+
+ def compute_so_version(version)
+ major, minor, _patch = version.split(".")
+ Integer(major, 10) * 100 + Integer(minor, 10)
+ end
+
+ def on_release_branch?
+ @snapshot_version == @release_version
+ end
+end
diff --git a/src/arrow/dev/release/utils-binary.sh b/src/arrow/dev/release/utils-binary.sh
new file mode 100644
index 000000000..31ebcd8e9
--- /dev/null
+++ b/src/arrow/dev/release/utils-binary.sh
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker_image_name=apache-arrow/release-binary
+gpg_agent_extra_socket="$(gpgconf --list-dirs agent-extra-socket)"
+if [ $(uname) = "Darwin" ]; then
+ docker_uid=10000
+ docker_gid=10000
+else
+ docker_uid=$(id -u)
+ docker_gid=$(id -g)
+fi
+docker_ssh_key="${SOURCE_DIR}/binary/id_rsa"
+
+if [ ! -f "${docker_ssh_key}" ]; then
+ ssh-keygen -N "" -f "${docker_ssh_key}"
+fi
+
+docker_gpg_ssh() {
+ local ssh_port=$1
+ shift
+ local known_hosts_file=$(mktemp -t "arrow-binary-gpg-ssh-known-hosts.XXXXX")
+ local exit_code=
+ if ssh \
+ -o StrictHostKeyChecking=no \
+ -o UserKnownHostsFile=${known_hosts_file} \
+ -i "${docker_ssh_key}" \
+ -p ${ssh_port} \
+ -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \
+ arrow@127.0.0.1 \
+ "$@"; then
+ exit_code=$?;
+ else
+ exit_code=$?;
+ fi
+ rm -f ${known_hosts_file}
+ return ${exit_code}
+}
+
+docker_run() {
+ local container_id_dir=$(mktemp -d -t "arrow-binary-gpg-container.XXXXX")
+ local container_id_file=${container_id_dir}/id
+ docker \
+ run \
+ --cidfile ${container_id_file} \
+ --detach \
+ --publish-all \
+ --rm \
+ --volume "$PWD":/host \
+ ${docker_image_name} \
+ bash -c "
+if [ \$(id -u) -ne ${docker_uid} ]; then
+ usermod --uid ${docker_uid} arrow
+ chown -R arrow: ~arrow
+fi
+/usr/sbin/sshd -D
+"
+ local container_id=$(cat ${container_id_file})
+ local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$' | head -n 1)
+ # Wait for sshd available
+ while ! docker_gpg_ssh ${ssh_port} : > /dev/null 2>&1; do
+ sleep 0.1
+ done
+ gpg --export ${GPG_KEY_ID} | docker_gpg_ssh ${ssh_port} gpg --import
+ docker_gpg_ssh ${ssh_port} "$@"
+ docker kill ${container_id}
+ rm -rf ${container_id_dir}
+}
+
+docker build -t ${docker_image_name} "${SOURCE_DIR}/binary"
+
+chmod go-rwx "${docker_ssh_key}"
diff --git a/src/arrow/dev/release/utils-prepare.sh b/src/arrow/dev/release/utils-prepare.sh
new file mode 100644
index 000000000..7ba786a75
--- /dev/null
+++ b/src/arrow/dev/release/utils-prepare.sh
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARROW_DIR="${SOURCE_DIR}/../.."
+
+update_versions() {
+ local base_version=$1
+ local next_version=$2
+ local type=$3
+
+ case ${type} in
+ release)
+ local version=${base_version}
+ local r_version=${base_version}
+ ;;
+ snapshot)
+ local version=${next_version}-SNAPSHOT
+ local r_version=${base_version}.9000
+ ;;
+ esac
+
+ pushd "${ARROW_DIR}/c_glib"
+ sed -i.bak -E -e \
+ "s/^version = '.+'/version = '${version}'/" \
+ meson.build
+ rm -f meson.build.bak
+ git add meson.build
+ popd
+
+ pushd "${ARROW_DIR}/ci/scripts"
+ sed -i.bak -E -e \
+ "s/^pkgver=.+/pkgver=${r_version}/" \
+ PKGBUILD
+ rm -f PKGBUILD.bak
+ git add PKGBUILD
+ popd
+
+ pushd "${ARROW_DIR}/cpp"
+ sed -i.bak -E -e \
+ "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
+ CMakeLists.txt
+ rm -f CMakeLists.txt.bak
+ git add CMakeLists.txt
+
+ sed -i.bak -E -e \
+ "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
+ vcpkg.json
+ rm -f vcpkg.json.bak
+ git add vcpkg.json
+ popd
+
+ pushd "${ARROW_DIR}/java"
+ mvn versions:set -DnewVersion=${version}
+ find . -type f -name pom.xml.versionsBackup -delete
+ git add "pom.xml"
+ git add "**/pom.xml"
+ popd
+
+ pushd "${ARROW_DIR}/csharp"
+ sed -i.bak -E -e \
+ "s/^ <Version>.+<\/Version>/ <Version>${version}<\/Version>/" \
+ Directory.Build.props
+ rm -f Directory.Build.props.bak
+ git add Directory.Build.props
+ popd
+
+ pushd "${ARROW_DIR}/dev/tasks/homebrew-formulae"
+ sed -i.bak -E -e \
+ "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \
+ autobrew/apache-arrow.rb
+ rm -f autobrew/apache-arrow.rb.bak
+ git add autobrew/apache-arrow.rb
+ sed -i.bak -E -e \
+ "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \
+ apache-arrow.rb
+ rm -f apache-arrow.rb.bak
+ git add apache-arrow.rb
+ popd
+
+ pushd "${ARROW_DIR}/js"
+ sed -i.bak -E -e \
+ "s/^ \"version\": \".+\"/ \"version\": \"${version}\"/" \
+ package.json
+ rm -f package.json.bak
+ git add package.json
+ popd
+
+ pushd "${ARROW_DIR}/matlab"
+ sed -i.bak -E -e \
+ "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
+ CMakeLists.txt
+ rm -f CMakeLists.txt.bak
+ git add CMakeLists.txt
+ popd
+
+ pushd "${ARROW_DIR}/python"
+ sed -i.bak -E -e \
+ "s/^default_version = '.+'/default_version = '${version}'/" \
+ setup.py
+ rm -f setup.py.bak
+ git add setup.py
+ popd
+
+ pushd "${ARROW_DIR}/r"
+ sed -i.bak -E -e \
+ "s/^Version: .+/Version: ${r_version}/" \
+ DESCRIPTION
+ rm -f DESCRIPTION.bak
+ git add DESCRIPTION
+ # Replace dev version with release version
+ sed -i.bak -E -e \
+ "0,/^# arrow /s/^# arrow .+/# arrow ${base_version}/" \
+ NEWS.md
+ if [ ${type} = "snapshot" ]; then
+ # Add a news entry for the new dev version
+ sed -i.bak -E -e \
+ "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
+ NEWS.md
+ fi
+ rm -f NEWS.md.bak
+ git add NEWS.md
+ popd
+
+ pushd "${ARROW_DIR}/ruby"
+ sed -i.bak -E -e \
+ "s/^ VERSION = \".+\"/ VERSION = \"${version}\"/g" \
+ */*/*/version.rb
+ rm -f */*/*/version.rb.bak
+ git add */*/*/version.rb
+ popd
+}
diff --git a/src/arrow/dev/release/verify-apt.sh b/src/arrow/dev/release/verify-apt.sh
new file mode 100755
index 000000000..3773e27fa
--- /dev/null
+++ b/src/arrow/dev/release/verify-apt.sh
@@ -0,0 +1,194 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -exu
+
+if [ $# -lt 2 ]; then
+ echo "Usage: $0 VERSION rc"
+ echo " $0 VERSION staging-rc"
+ echo " $0 VERSION release"
+ echo " $0 VERSION staging-release"
+ echo " $0 VERSION local"
+ echo " e.g.: $0 0.13.0 rc # Verify 0.13.0 RC"
+ echo " e.g.: $0 0.13.0 staging-rc # Verify 0.13.0 RC on staging"
+ echo " e.g.: $0 0.13.0 release # Verify 0.13.0"
+ echo " e.g.: $0 0.13.0 staging-release # Verify 0.13.0 on staging"
+ echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
+ exit 1
+fi
+
+VERSION="$1"
+TYPE="$2"
+
+local_prefix="/arrow/dev/tasks/linux-packages"
+
+
+echo "::group::Prepare repository"
+
+export DEBIAN_FRONTEND=noninteractive
+
+APT_INSTALL="apt install -y -V --no-install-recommends"
+
+apt update
+${APT_INSTALL} \
+ ca-certificates \
+ curl \
+ lsb-release
+
+code_name="$(lsb_release --codename --short)"
+distribution="$(lsb_release --id --short | tr 'A-Z' 'a-z')"
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/${distribution}"
+case "${TYPE}" in
+ rc|staging-rc|staging-release)
+ suffix=${TYPE%-release}
+ artifactory_base_url+="-${suffix}"
+ ;;
+esac
+
+have_flight=yes
+have_plasma=yes
+workaround_missing_packages=()
+case "${distribution}-${code_name}" in
+ debian-*)
+ sed \
+ -i"" \
+ -e "s/ main$/ main contrib non-free/g" \
+ /etc/apt/sources.list
+ ;;
+esac
+if [ "$(arch)" = "aarch64" ]; then
+ have_plasma=no
+fi
+
+if [ "${TYPE}" = "local" ]; then
+ case "${VERSION}" in
+ *-dev*)
+ package_version="$(echo "${VERSION}" | sed -e 's/-dev\(.*\)$/~dev\1/g')"
+ ;;
+ *-rc*)
+ package_version="$(echo "${VERSION}" | sed -e 's/-rc.*$//g')"
+ ;;
+ *)
+ package_version="${VERSION}"
+ ;;
+ esac
+ package_version+="-1"
+ apt_source_path="${local_prefix}/apt/repositories"
+ apt_source_path+="/${distribution}/pool/${code_name}/main"
+ apt_source_path+="/a/apache-arrow-apt-source"
+ apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb"
+ ${APT_INSTALL} "${apt_source_path}"
+else
+ package_version="${VERSION}-1"
+ apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb"
+ curl \
+ --output "${apt_source_base_name}" \
+ "${artifactory_base_url}/${apt_source_base_name}"
+ ${APT_INSTALL} "./${apt_source_base_name}"
+fi
+
+if [ "${TYPE}" = "local" ]; then
+ sed \
+ -i"" \
+ -e "s,^URIs: .*$,URIs: file://${local_prefix}/apt/repositories/${distribution},g" \
+ /etc/apt/sources.list.d/apache-arrow.sources
+ keys="${local_prefix}/KEYS"
+ if [ -f "${keys}" ]; then
+ gpg \
+ --no-default-keyring \
+ --keyring /usr/share/keyrings/apache-arrow-apt-source.gpg \
+ --import "${keys}"
+ fi
+else
+ case "${TYPE}" in
+ rc|staging-rc|staging-release)
+ suffix=${TYPE%-release}
+ sed \
+ -i"" \
+ -e "s,^URIs: \\(.*\\)/,URIs: \\1-${suffix}/,g" \
+ /etc/apt/sources.list.d/apache-arrow.sources
+ ;;
+ esac
+fi
+
+apt update
+
+echo "::endgroup::"
+
+
+echo "::group::Test Apache Arrow C++"
+${APT_INSTALL} libarrow-dev=${package_version}
+required_packages=()
+required_packages+=(cmake)
+required_packages+=(g++)
+required_packages+=(git)
+required_packages+=(make)
+required_packages+=(pkg-config)
+required_packages+=(${workaround_missing_packages[@]})
+${APT_INSTALL} ${required_packages[@]}
+mkdir -p build
+cp -a /arrow/cpp/examples/minimal_build build
+pushd build/minimal_build
+cmake .
+make -j$(nproc)
+./arrow_example
+c++ -std=c++11 -o arrow_example example.cc $(pkg-config --cflags --libs arrow)
+./arrow_example
+popd
+echo "::endgroup::"
+
+
+echo "::group::Test Apache Arrow GLib"
+${APT_INSTALL} libarrow-glib-dev=${package_version}
+${APT_INSTALL} libarrow-glib-doc=${package_version}
+echo "::endgroup::"
+
+
+if [ "${have_flight}" = "yes" ]; then
+ echo "::group::Test Apache Arrow Flight"
+ ${APT_INSTALL} libarrow-flight-glib-dev=${package_version}
+ ${APT_INSTALL} libarrow-flight-glib-doc=${package_version}
+ echo "::endgroup::"
+fi
+
+
+echo "::group::Test libarrow-python"
+${APT_INSTALL} libarrow-python-dev=${package_version}
+echo "::endgroup::"
+
+
+if [ "${have_plasma}" = "yes" ]; then
+ echo "::group::Test Plasma"
+ ${APT_INSTALL} libplasma-glib-dev=${package_version}
+ ${APT_INSTALL} libplasma-glib-doc=${package_version}
+ ${APT_INSTALL} plasma-store-server=${package_version}
+ echo "::endgroup::"
+fi
+
+
+echo "::group::Test Gandiva"
+${APT_INSTALL} libgandiva-glib-dev=${package_version}
+${APT_INSTALL} libgandiva-glib-doc=${package_version}
+echo "::endgroup::"
+
+
+echo "::group::Test Parquet"
+${APT_INSTALL} libparquet-glib-dev=${package_version}
+${APT_INSTALL} libparquet-glib-doc=${package_version}
+echo "::endgroup::"
diff --git a/src/arrow/dev/release/verify-release-candidate-wheels.bat b/src/arrow/dev/release/verify-release-candidate-wheels.bat
new file mode 100644
index 000000000..5bcefe80d
--- /dev/null
+++ b/src/arrow/dev/release/verify-release-candidate-wheels.bat
@@ -0,0 +1,107 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@rem This script downloads and installs all Windows wheels for a release
+@rem candidate into temporary conda environments and makes sure that imports
+@rem work
+
+@rem To run the script:
+@rem verify-release-candidate-wheels.bat VERSION RC_NUM
+
+@echo on
+
+set _CURRENT_DIR=%CD%
+set _VERIFICATION_DIR=C:\tmp\arrow-verify-release-wheels
+
+if not exist "C:\tmp\" mkdir C:\tmp
+if exist %_VERIFICATION_DIR% rd %_VERIFICATION_DIR% /s /q
+if not exist %_VERIFICATION_DIR% mkdir %_VERIFICATION_DIR%
+
+cd %_VERIFICATION_DIR%
+
+@rem clone Arrow repository to obtain test requirements
+set GIT_ENV_PATH=%_VERIFICATION_DIR%\_git
+call conda create -p %GIT_ENV_PATH% ^
+ --no-shortcuts -f -q -y git ^
+ || EXIT /B 1
+call activate %GIT_ENV_PATH%
+
+git clone https://github.com/apache/arrow.git || EXIT /B 1
+pushd arrow
+git submodule update --init
+popd
+
+set ARROW_VERSION=%1
+set RC_NUMBER=%2
+
+python arrow\dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^
+ --package_type python ^
+ --regex=".*win_amd64.*" || EXIT /B 1
+
+call deactivate
+
+set ARROW_TEST_DATA=%cd%\arrow\testing\data
+
+CALL :verify_wheel 3.6 m
+if errorlevel 1 GOTO error
+
+CALL :verify_wheel 3.7 m
+if errorlevel 1 GOTO error
+
+CALL :verify_wheel 3.8
+if errorlevel 1 GOTO error
+
+:done
+cd %_CURRENT_DIR%
+
+EXIT /B %ERRORLEVEL%
+
+:error
+call deactivate
+cd %_CURRENT_DIR%
+
+EXIT /B 1
+
+@rem a batch function to verify a single wheel
+:verify_wheel
+
+set PY_VERSION=%1
+set ABI_TAG=%2
+set PY_VERSION_NO_PERIOD=%PY_VERSION:.=%
+
+set CONDA_ENV_PATH=%_VERIFICATION_DIR%\_verify-wheel-%PY_VERSION%
+call conda create -p %CONDA_ENV_PATH% ^
+ --no-shortcuts -f -q -y python=%PY_VERSION% ^
+ || EXIT /B 1
+call activate %CONDA_ENV_PATH%
+
+set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl
+
+pip install python-rc\%ARROW_VERSION%-rc%RC_NUMBER%\%WHEEL_FILENAME% || EXIT /B 1
+python -c "import pyarrow" || EXIT /B 1
+python -c "import pyarrow.parquet" || EXIT /B 1
+python -c "import pyarrow.flight" || EXIT /B 1
+python -c "import pyarrow.dataset" || EXIT /B 1
+
+pip install -r arrow\python\requirements-test.txt || EXIT /B 1
+pytest %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1
+
+:done
+
+call deactivate
+
+EXIT /B 0
diff --git a/src/arrow/dev/release/verify-release-candidate.bat b/src/arrow/dev/release/verify-release-candidate.bat
new file mode 100644
index 000000000..fee8c01bc
--- /dev/null
+++ b/src/arrow/dev/release/verify-release-candidate.bat
@@ -0,0 +1,130 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@rem To run the script:
+@rem verify-release-candidate.bat VERSION RC_NUM
+
+@echo on
+
+if not exist "C:\tmp\" mkdir C:\tmp
+if exist "C:\tmp\arrow-verify-release" rd C:\tmp\arrow-verify-release /s /q
+if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release
+
+set _VERIFICATION_DIR=C:\tmp\arrow-verify-release
+set _VERIFICATION_DIR_UNIX=C:/tmp/arrow-verify-release
+set _VERIFICATION_CONDA_ENV=%_VERIFICATION_DIR%\conda-env
+set _DIST_URL=https://dist.apache.org/repos/dist/dev/arrow
+set _TARBALL=apache-arrow-%1.tar.gz
+set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1
+set INSTALL_DIR=%_VERIFICATION_DIR%\install
+
+@rem Requires GNU Wget for Windows
+wget --no-check-certificate -O %_TARBALL% %_DIST_URL%/apache-arrow-%1-rc%2/%_TARBALL% || exit /B 1
+
+tar xf %_TARBALL% -C %_VERIFICATION_DIR_UNIX%
+
+set PYTHON=3.6
+
+@rem Using call with conda.bat seems necessary to avoid terminating the batch
+@rem script execution
+call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^
+ --file=ci\conda_env_cpp.txt ^
+ --file=ci\conda_env_python.txt ^
+ git ^
+ python=%PYTHON% ^
+ || exit /B 1
+
+call activate %_VERIFICATION_CONDA_ENV% || exit /B 1
+
+set GENERATOR=Visual Studio 15 2017 Win64
+set CONFIGURATION=release
+
+pushd %ARROW_SOURCE%
+
+set ARROW_HOME=%INSTALL_DIR%
+set PARQUET_HOME=%INSTALL_DIR%
+set PATH=%INSTALL_DIR%\bin;%PATH%
+
+@rem Build and test Arrow C++ libraries
+mkdir %ARROW_SOURCE%\cpp\build
+pushd %ARROW_SOURCE%\cpp\build
+
+@rem This is the path for Visual Studio Community 2017
+call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64
+
+@rem NOTE(wesm): not using Ninja for now to be able to more easily control the
+@rem generator used
+
+cmake -G "%GENERATOR%" ^
+ -DARROW_BOOST_USE_SHARED=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=ON ^
+ -DARROW_CXXFLAGS="/MP" ^
+ -DARROW_DATASET=ON ^
+ -DARROW_FLIGHT=ON ^
+ -DARROW_MIMALLOC=ON ^
+ -DARROW_PARQUET=ON ^
+ -DARROW_PYTHON=ON ^
+ -DARROW_WITH_BROTLI=ON ^
+ -DARROW_WITH_BZ2=ON ^
+ -DARROW_WITH_LZ4=ON ^
+ -DARROW_WITH_SNAPPY=ON ^
+ -DARROW_WITH_ZLIB=ON ^
+ -DARROW_WITH_ZSTD=ON ^
+ -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
+ -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^
+ -DCMAKE_UNITY_BUILD=ON ^
+ -DGTest_SOURCE=BUNDLED ^
+ .. || exit /B
+
+cmake --build . --target INSTALL --config Release || exit /B 1
+
+@rem NOTE(wesm): Building googletest is flaky for me with ninja. Building it
+@rem first fixes the problem
+
+@rem ninja googletest_ep || exit /B 1
+@rem ninja install || exit /B 1
+
+@rem Get testing datasets for Parquet unit tests
+git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing
+set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data
+
+git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing
+set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data
+
+@rem Needed so python-test.exe works
+set PYTHONPATH_ORIGINAL=%PYTHONPATH%
+set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH%
+ctest -VV || exit /B 1
+set PYTHONPATH=%PYTHONPATH_ORIGINAL%
+popd
+
+@rem Build and import pyarrow
+pushd %ARROW_SOURCE%\python
+
+pip install -r requirements-test.txt || exit /B 1
+
+set PYARROW_CMAKE_GENERATOR=%GENERATOR%
+set PYARROW_WITH_FLIGHT=1
+set PYARROW_WITH_PARQUET=1
+set PYARROW_WITH_DATASET=1
+python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel || exit /B 1
+pytest pyarrow -v -s --enable-parquet || exit /B 1
+
+popd
+
+call deactivate
diff --git a/src/arrow/dev/release/verify-release-candidate.sh b/src/arrow/dev/release/verify-release-candidate.sh
new file mode 100755
index 000000000..3da89360c
--- /dev/null
+++ b/src/arrow/dev/release/verify-release-candidate.sh
@@ -0,0 +1,817 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Requirements
+# - Ruby >= 2.3
+# - Maven >= 3.3.9
+# - JDK >=7
+# - gcc >= 4.8
+# - Node.js >= 11.12 (best way is to use nvm)
+# - Go >= 1.15
+#
+# If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
+# LD_LIBRARY_PATH.
+#
+# To reuse build artifacts between runs set ARROW_TMPDIR environment variable to
+# a directory where the temporary files should be placed to, note that this
+# directory is not cleaned up automatically.
+
+case $# in
+ 3) ARTIFACT="$1"
+ VERSION="$2"
+ RC_NUMBER="$3"
+ case $ARTIFACT in
+ source|binaries|wheels) ;;
+ *) echo "Invalid argument: '${ARTIFACT}', valid options are \
+'source', 'binaries', or 'wheels'"
+ exit 1
+ ;;
+ esac
+ ;;
+ *) echo "Usage: $0 source|binaries X.Y.Z RC_NUMBER"
+ exit 1
+ ;;
+esac
+
+set -e
+set -x
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))"
+
+detect_cuda() {
+ if ! (which nvcc && which nvidia-smi) > /dev/null; then
+ return 1
+ fi
+
+ local n_gpus=$(nvidia-smi --list-gpus | wc -l)
+ return $((${n_gpus} < 1))
+}
+
+# Build options for the C++ library
+
+if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then
+ ARROW_CUDA=ON
+fi
+: ${ARROW_CUDA:=OFF}
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_GANDIVA:=ON}
+
+ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
+
+download_dist_file() {
+ curl \
+ --silent \
+ --show-error \
+ --fail \
+ --location \
+ --remote-name $ARROW_DIST_URL/$1
+}
+
+download_rc_file() {
+ download_dist_file apache-arrow-${VERSION}-rc${RC_NUMBER}/$1
+}
+
+import_gpg_keys() {
+ download_dist_file KEYS
+ gpg --import KEYS
+}
+
+fetch_archive() {
+ local dist_name=$1
+ download_rc_file ${dist_name}.tar.gz
+ download_rc_file ${dist_name}.tar.gz.asc
+ download_rc_file ${dist_name}.tar.gz.sha256
+ download_rc_file ${dist_name}.tar.gz.sha512
+ gpg --verify ${dist_name}.tar.gz.asc ${dist_name}.tar.gz
+ shasum -a 256 -c ${dist_name}.tar.gz.sha256
+ shasum -a 512 -c ${dist_name}.tar.gz.sha512
+}
+
+verify_dir_artifact_signatures() {
+ # verify the signature and the checksums of each artifact
+ find $1 -name '*.asc' | while read sigfile; do
+ artifact=${sigfile/.asc/}
+ gpg --verify $sigfile $artifact || exit 1
+
+ # go into the directory because the checksum files contain only the
+ # basename of the artifact
+ pushd $(dirname $artifact)
+ base_artifact=$(basename $artifact)
+ if [ -f $base_artifact.sha256 ]; then
+ shasum -a 256 -c $base_artifact.sha256 || exit 1
+ fi
+ shasum -a 512 -c $base_artifact.sha512 || exit 1
+ popd
+ done
+}
+
+test_binary() {
+ local download_dir=binaries
+ mkdir -p ${download_dir}
+
+ ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+ --dest=${download_dir}
+
+ verify_dir_artifact_signatures ${download_dir}
+}
+
+test_apt() {
+ for target in "debian:buster" \
+ "arm64v8/debian:buster" \
+ "debian:bullseye" \
+ "arm64v8/debian:bullseye" \
+ "debian:bookworm" \
+ "arm64v8/debian:bookworm" \
+ "ubuntu:bionic" \
+ "arm64v8/ubuntu:bionic" \
+ "ubuntu:focal" \
+ "arm64v8/ubuntu:focal" \
+ "ubuntu:hirsute" \
+ "arm64v8/ubuntu:hirsute" \
+ "ubuntu:impish" \
+ "arm64v8/ubuntu:impish"; do \
+ case "${target}" in
+ arm64v8/*)
+ if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
+ case "${target}" in
+ arm64v8/debian:buster|arm64v8/ubuntu:bionic|arm64v8/ubuntu:focal)
+ ;; # OK
+ *)
+ # qemu-user-static in Ubuntu 20.04 has a crash bug:
+ # https://bugs.launchpad.net/qemu/+bug/1749393
+ continue
+ ;;
+ esac
+ else
+ continue
+ fi
+ ;;
+ esac
+ if ! docker run --rm -v "${SOURCE_DIR}"/../..:/arrow:delegated \
+ "${target}" \
+ /arrow/dev/release/verify-apt.sh \
+ "${VERSION}" \
+ "rc"; then
+ echo "Failed to verify the APT repository for ${target}"
+ exit 1
+ fi
+ done
+}
+
+test_yum() {
+ for target in "almalinux:8" \
+ "arm64v8/almalinux:8" \
+ "amazonlinux:2" \
+ "centos:7" \
+ "centos:8" \
+ "arm64v8/centos:8"; do
+ case "${target}" in
+ arm64v8/*)
+ if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
+ : # OK
+ else
+ continue
+ fi
+ ;;
+ esac
+ if ! docker run --rm -v "${SOURCE_DIR}"/../..:/arrow:delegated \
+ "${target}" \
+ /arrow/dev/release/verify-yum.sh \
+ "${VERSION}" \
+ "rc"; then
+ echo "Failed to verify the Yum repository for ${target}"
+ exit 1
+ fi
+ done
+}
+
+
+setup_tempdir() {
+ cleanup() {
+ if [ "${TEST_SUCCESS}" = "yes" ]; then
+ rm -fr "${ARROW_TMPDIR}"
+ else
+ echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details."
+ fi
+ }
+
+ if [ -z "${ARROW_TMPDIR}" ]; then
+ # clean up automatically if ARROW_TMPDIR is not defined
+ ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX")
+ trap cleanup EXIT
+ else
+ # don't clean up automatically
+ mkdir -p "${ARROW_TMPDIR}"
+ fi
+}
+
+setup_miniconda() {
+ # Setup short-lived miniconda for Python and integration tests
+ OS="$(uname)"
+ if [ "${OS}" == "Darwin" ]; then
+ OS=MacOSX
+ fi
+ ARCH="$(uname -m)"
+ MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${OS}-${ARCH}.sh"
+
+ MINICONDA=$PWD/test-miniconda
+
+ if [ ! -d "${MINICONDA}" ]; then
+ # Setup miniconda only if the directory doesn't exist yet
+ wget -O miniconda.sh $MINICONDA_URL
+ bash miniconda.sh -b -p $MINICONDA
+ rm -f miniconda.sh
+ fi
+ echo "Installed miniconda at ${MINICONDA}"
+
+ . $MINICONDA/etc/profile.d/conda.sh
+
+ conda create -n arrow-test -y -q -c conda-forge \
+ python=3.8 \
+ nomkl \
+ numpy \
+ pandas \
+ cython
+ conda activate arrow-test
+ echo "Using conda environment ${CONDA_PREFIX}"
+}
+
+# Build and test Java (Requires newer Maven -- I used 3.3.9)
+
+test_package_java() {
+ pushd java
+
+ mvn test
+ mvn package
+
+ popd
+}
+
+# Build and test C++
+
+test_and_install_cpp() {
+ mkdir -p cpp/build
+ pushd cpp/build
+
+ ARROW_CMAKE_OPTIONS="
+${ARROW_CMAKE_OPTIONS:-}
+-DCMAKE_INSTALL_PREFIX=$ARROW_HOME
+-DCMAKE_INSTALL_LIBDIR=lib
+-DARROW_FLIGHT=${ARROW_FLIGHT}
+-DARROW_PLASMA=ON
+-DARROW_ORC=ON
+-DARROW_PYTHON=ON
+-DARROW_GANDIVA=${ARROW_GANDIVA}
+-DARROW_PARQUET=ON
+-DARROW_DATASET=ON
+-DPARQUET_REQUIRE_ENCRYPTION=ON
+-DARROW_VERBOSE_THIRDPARTY_BUILD=ON
+-DARROW_WITH_BZ2=ON
+-DARROW_WITH_ZLIB=ON
+-DARROW_WITH_ZSTD=ON
+-DARROW_WITH_LZ4=ON
+-DARROW_WITH_SNAPPY=ON
+-DARROW_WITH_BROTLI=ON
+-DARROW_BOOST_USE_SHARED=ON
+-DCMAKE_BUILD_TYPE=release
+-DARROW_BUILD_TESTS=ON
+-DARROW_BUILD_INTEGRATION=ON
+-DARROW_CUDA=${ARROW_CUDA}
+-DARROW_DEPENDENCY_SOURCE=AUTO
+"
+ cmake $ARROW_CMAKE_OPTIONS ..
+
+ make -j$NPROC install
+
+ # TODO: ARROW-5036: plasma-serialization_tests broken
+ # TODO: ARROW-5054: libgtest.so link failure in flight-server-test
+ LD_LIBRARY_PATH=$PWD/release:$LD_LIBRARY_PATH ctest \
+ --exclude-regex "plasma-serialization_tests" \
+ -j$NPROC \
+ --output-on-failure \
+ -L unittest
+ popd
+}
+
+test_csharp() {
+ pushd csharp
+
+ local csharp_bin=${PWD}/bin
+ mkdir -p ${csharp_bin}
+
+ if which dotnet > /dev/null 2>&1; then
+ if ! which sourcelink > /dev/null 2>&1; then
+ local dotnet_tools_dir=$HOME/.dotnet/tools
+ if [ -d "${dotnet_tools_dir}" ]; then
+ PATH="${dotnet_tools_dir}:$PATH"
+ fi
+ fi
+ else
+ local dotnet_version=3.1.405
+ local dotnet_platform=
+ case "$(uname)" in
+ Linux)
+ dotnet_platform=linux
+ ;;
+ Darwin)
+ dotnet_platform=macos
+ ;;
+ esac
+ local dotnet_download_thank_you_url=https://dotnet.microsoft.com/download/thank-you/dotnet-sdk-${dotnet_version}-${dotnet_platform}-x64-binaries
+ local dotnet_download_url=$( \
+ curl --location ${dotnet_download_thank_you_url} | \
+ grep 'window\.open' | \
+ grep -E -o '[^"]+' | \
+ sed -n 2p)
+ curl ${dotnet_download_url} | \
+ tar xzf - -C ${csharp_bin}
+ PATH=${csharp_bin}:${PATH}
+ fi
+
+ dotnet test
+ mv dummy.git ../.git
+ dotnet pack -c Release
+ mv ../.git dummy.git
+
+ if ! which sourcelink > /dev/null 2>&1; then
+ dotnet tool install --tool-path ${csharp_bin} sourcelink
+ PATH=${csharp_bin}:${PATH}
+ if ! sourcelink --help > /dev/null 2>&1; then
+ export DOTNET_ROOT=${csharp_bin}
+ fi
+ fi
+
+ sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb
+ sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb
+
+ popd
+}
+
+# Build and test Python
+
+test_python() {
+ pushd python
+
+ pip install -r requirements-build.txt -r requirements-test.txt
+
+ export PYARROW_WITH_DATASET=1
+ export PYARROW_WITH_PARQUET=1
+ export PYARROW_WITH_PLASMA=1
+ if [ "${ARROW_CUDA}" = "ON" ]; then
+ export PYARROW_WITH_CUDA=1
+ fi
+ if [ "${ARROW_FLIGHT}" = "ON" ]; then
+ export PYARROW_WITH_FLIGHT=1
+ fi
+ if [ "${ARROW_GANDIVA}" = "ON" ]; then
+ export PYARROW_WITH_GANDIVA=1
+ fi
+
+ python setup.py build_ext --inplace
+ pytest pyarrow -v --pdb
+
+ popd
+}
+
+test_glib() {
+ pushd c_glib
+
+ pip install meson
+
+ meson build --prefix=$ARROW_HOME --libdir=lib
+ ninja -C build
+ ninja -C build install
+
+ export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH
+
+ if ! bundle --version; then
+ gem install --no-document bundler
+ fi
+
+ bundle install --path vendor/bundle
+ bundle exec ruby test/run-test.rb
+
+ popd
+}
+
+test_js() {
+ pushd js
+
+ if [ "${INSTALL_NODE}" -gt 0 ]; then
+ export NVM_DIR="`pwd`/.nvm"
+ mkdir -p $NVM_DIR
+ curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.35.3/install.sh | \
+ PROFILE=/dev/null bash
+ [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
+
+ nvm install --lts
+ npm install -g yarn
+ fi
+
+ yarn --frozen-lockfile
+ yarn run-s clean:all lint build
+ yarn test
+ popd
+}
+
+test_ruby() {
+ pushd ruby
+
+ local modules="red-arrow red-arrow-dataset red-plasma red-parquet"
+ if [ "${ARROW_CUDA}" = "ON" ]; then
+ modules="${modules} red-arrow-cuda"
+ fi
+ if [ "${ARROW_FLIGHT}" = "ON" ]; then
+ modules="${modules} red-arrow-flight"
+ fi
+ if [ "${ARROW_GANDIVA}" = "ON" ]; then
+ modules="${modules} red-gandiva"
+ fi
+
+ for module in ${modules}; do
+ pushd ${module}
+ bundle install --path vendor/bundle
+ bundle exec ruby test/run-test.rb
+ popd
+ done
+
+ popd
+}
+
+test_go() {
+ local VERSION=1.15.14
+ local ARCH=amd64
+
+ if [ "$(uname)" == "Darwin" ]; then
+ local OS=darwin
+ else
+ local OS=linux
+ fi
+
+ local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz
+ wget https://dl.google.com/go/$GO_ARCHIVE
+
+ mkdir -p local-go
+ tar -xzf $GO_ARCHIVE -C local-go
+ rm -f $GO_ARCHIVE
+
+ export GOROOT=`pwd`/local-go/go
+ export GOPATH=`pwd`/local-go/gopath
+ export PATH=$GOROOT/bin:$GOPATH/bin:$PATH
+
+ pushd go/arrow
+
+ go get -v ./...
+ go test ./...
+ go clean -modcache
+
+ popd
+}
+
+# Run integration tests
+test_integration() {
+ JAVA_DIR=$PWD/java
+ CPP_BUILD_DIR=$PWD/cpp/build
+
+ export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
+ export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release
+
+ pip install -e dev/archery
+
+ INTEGRATION_TEST_ARGS=""
+
+ if [ "${ARROW_FLIGHT}" = "ON" ]; then
+ INTEGRATION_TEST_ARGS="${INTEGRATION_TEST_ARGS} --run-flight"
+ fi
+
+ # Flight integration test executable have runtime dependency on
+ # release/libgtest.so
+ LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH \
+ archery integration \
+ --with-cpp=${TEST_INTEGRATION_CPP} \
+ --with-java=${TEST_INTEGRATION_JAVA} \
+ --with-js=${TEST_INTEGRATION_JS} \
+ --with-go=${TEST_INTEGRATION_GO} \
+ $INTEGRATION_TEST_ARGS
+}
+
+clone_testing_repositories() {
+ # Clone testing repositories if not cloned already
+ if [ ! -d "arrow-testing" ]; then
+ git clone https://github.com/apache/arrow-testing.git
+ fi
+ if [ ! -d "parquet-testing" ]; then
+ git clone https://github.com/apache/parquet-testing.git
+ fi
+ export ARROW_TEST_DATA=$PWD/arrow-testing/data
+ export PARQUET_TEST_DATA=$PWD/parquet-testing/data
+}
+
+test_source_distribution() {
+ export ARROW_HOME=$ARROW_TMPDIR/install
+ export PARQUET_HOME=$ARROW_TMPDIR/install
+ export LD_LIBRARY_PATH=$ARROW_HOME/lib:${LD_LIBRARY_PATH:-}
+ export PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig:${PKG_CONFIG_PATH:-}
+
+ if [ "$(uname)" == "Darwin" ]; then
+ NPROC=$(sysctl -n hw.ncpu)
+ else
+ NPROC=$(nproc)
+ fi
+
+ clone_testing_repositories
+
+ if [ ${TEST_JAVA} -gt 0 ]; then
+ test_package_java
+ fi
+ if [ ${TEST_CPP} -gt 0 ]; then
+ test_and_install_cpp
+ fi
+ if [ ${TEST_CSHARP} -gt 0 ]; then
+ test_csharp
+ fi
+ if [ ${TEST_PYTHON} -gt 0 ]; then
+ test_python
+ fi
+ if [ ${TEST_GLIB} -gt 0 ]; then
+ test_glib
+ fi
+ if [ ${TEST_RUBY} -gt 0 ]; then
+ test_ruby
+ fi
+ if [ ${TEST_JS} -gt 0 ]; then
+ test_js
+ fi
+ if [ ${TEST_GO} -gt 0 ]; then
+ test_go
+ fi
+ if [ ${TEST_INTEGRATION} -gt 0 ]; then
+ test_integration
+ fi
+}
+
+test_binary_distribution() {
+ if [ ${TEST_BINARY} -gt 0 ]; then
+ test_binary
+ fi
+ if [ ${TEST_APT} -gt 0 ]; then
+ test_apt
+ fi
+ if [ ${TEST_YUM} -gt 0 ]; then
+ test_yum
+ fi
+}
+
+test_linux_wheels() {
+ if [ "$(uname -m)" = "aarch64" ]; then
+ local arch="aarch64"
+ else
+ local arch="x86_64"
+ fi
+
+ local py_arches="3.6m 3.7m 3.8 3.9"
+ local platform_tags="manylinux_2_12_${arch}.manylinux2010_${arch} manylinux_2_17_${arch}.manylinux2014_${arch}"
+
+ for py_arch in ${py_arches}; do
+ local env=_verify_wheel-${py_arch}
+ conda create -yq -n ${env} python=${py_arch//[mu]/}
+ conda activate ${env}
+ pip install -U pip
+
+ for tag in ${platform_tags}; do
+ # check the mandatory and optional imports
+ pip install python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-${tag}.whl
+ INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+ done
+
+ conda deactivate
+ done
+}
+
+test_macos_wheels() {
+ local py_arches="3.6m 3.7m 3.8 3.9"
+ local macos_version=$(sw_vers -productVersion)
+ local macos_short_version=${macos_version:0:5}
+
+ local check_s3=ON
+ local check_flight=ON
+
+ # macOS version <= 10.13
+ if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then
+ local check_s3=OFF
+ fi
+ # apple silicon processor
+ if [ "$(uname -m)" = "arm64" ]; then
+ local py_arches="3.8 3.9"
+ local check_flight=OFF
+ fi
+
+ # verify arch-native wheels inside an arch-native conda environment
+ for py_arch in ${py_arches}; do
+ local env=_verify_wheel-${py_arch}
+ conda create -yq -n ${env} python=${py_arch//m/}
+ conda activate ${env}
+ pip install -U pip
+
+ # check the mandatory and optional imports
+ pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION}
+ INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+ ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+
+ conda deactivate
+ done
+
+ # verify arm64 and universal2 wheels using an universal2 python binary
+ # the interpreter should be installed from python.org:
+ # https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg
+ if [ "$(uname -m)" = "arm64" ]; then
+ for py_arch in "3.9"; do
+ local pyver=${py_arch//m/}
+ local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}"
+
+ # create and activate a virtualenv for testing as arm64
+ for arch in "arm64" "x86_64"; do
+ local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv"
+ $python -m virtualenv $venv
+ source $venv/bin/activate
+ pip install -U pip
+
+ # install pyarrow's universal2 wheel
+ pip install \
+ --find-links python-rc/${VERSION}-rc${RC_NUMBER} \
+ --target $(python -c 'import site; print(site.getsitepackages()[0])') \
+ --platform macosx_11_0_universal2 \
+ --only-binary=:all: \
+ pyarrow==${VERSION}
+ # check the imports and execute the unittests
+ INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+ arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+
+ deactivate
+ done
+ done
+ fi
+}
+
+test_wheels() {
+ clone_testing_repositories
+
+ local download_dir=binaries
+ mkdir -p ${download_dir}
+
+ if [ "$(uname)" == "Darwin" ]; then
+ local filter_regex=.*macosx.*
+ else
+ local filter_regex=.*manylinux.*
+ fi
+
+ python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+ --package_type python \
+ --regex=${filter_regex} \
+ --dest=${download_dir}
+
+ verify_dir_artifact_signatures ${download_dir}
+
+ pushd ${download_dir}
+
+ if [ "$(uname)" == "Darwin" ]; then
+ test_macos_wheels
+ else
+ test_linux_wheels
+ fi
+
+ popd
+}
+
+# By default test all functionalities.
+# To deactivate one test, deactivate the test and all of its dependents
+# To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1
+
+# Install NodeJS locally for running the JavaScript tests rather than using the
+# system Node installation, which may be too old.
+: ${INSTALL_NODE:=1}
+
+if [ "${ARTIFACT}" == "source" ]; then
+ : ${TEST_SOURCE:=1}
+elif [ "${ARTIFACT}" == "wheels" ]; then
+ TEST_WHEELS=1
+else
+ TEST_BINARY_DISTRIBUTIONS=1
+fi
+: ${TEST_SOURCE:=0}
+: ${TEST_WHEELS:=0}
+: ${TEST_BINARY_DISTRIBUTIONS:=0}
+
+: ${TEST_DEFAULT:=1}
+: ${TEST_JAVA:=${TEST_DEFAULT}}
+: ${TEST_CPP:=${TEST_DEFAULT}}
+: ${TEST_CSHARP:=${TEST_DEFAULT}}
+: ${TEST_GLIB:=${TEST_DEFAULT}}
+: ${TEST_RUBY:=${TEST_DEFAULT}}
+: ${TEST_PYTHON:=${TEST_DEFAULT}}
+: ${TEST_JS:=${TEST_DEFAULT}}
+: ${TEST_GO:=${TEST_DEFAULT}}
+: ${TEST_INTEGRATION:=${TEST_DEFAULT}}
+if [ ${TEST_BINARY_DISTRIBUTIONS} -gt 0 ]; then
+ TEST_BINARY_DISTRIBUTIONS_DEFAULT=${TEST_DEFAULT}
+else
+ TEST_BINARY_DISTRIBUTIONS_DEFAULT=0
+fi
+: ${TEST_BINARY:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}}
+: ${TEST_APT:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}}
+: ${TEST_YUM:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}}
+
+# For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1
+: ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}}
+: ${TEST_INTEGRATION_JAVA:=${TEST_INTEGRATION}}
+: ${TEST_INTEGRATION_JS:=${TEST_INTEGRATION}}
+: ${TEST_INTEGRATION_GO:=${TEST_INTEGRATION}}
+
+# Automatically test if its activated by a dependent
+TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY}))
+TEST_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON} + ${TEST_INTEGRATION_CPP}))
+TEST_JAVA=$((${TEST_JAVA} + ${TEST_INTEGRATION_JAVA}))
+TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS}))
+TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO}))
+TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO}))
+
+if [ "${ARTIFACT}" == "source" ]; then
+ NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION}))
+elif [ "${ARTIFACT}" == "wheels" ]; then
+ NEED_MINICONDA=$((${TEST_WHEELS}))
+else
+ if [ -z "${PYTHON:-}" ]; then
+ NEED_MINICONDA=$((${TEST_BINARY}))
+ else
+ NEED_MINICONDA=0
+ fi
+fi
+
+: ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz}
+case "${TEST_ARCHIVE}" in
+ /*)
+ ;;
+ *)
+ TEST_ARCHIVE=${PWD}/${TEST_ARCHIVE}
+ ;;
+esac
+
+TEST_SUCCESS=no
+
+setup_tempdir "arrow-${VERSION}"
+echo "Working in sandbox ${ARROW_TMPDIR}"
+cd ${ARROW_TMPDIR}
+
+if [ ${NEED_MINICONDA} -gt 0 ]; then
+ setup_miniconda
+fi
+
+if [ "${ARTIFACT}" == "source" ]; then
+ dist_name="apache-arrow-${VERSION}"
+ if [ ${TEST_SOURCE} -gt 0 ]; then
+ import_gpg_keys
+ if [ ! -d "${dist_name}" ]; then
+ fetch_archive ${dist_name}
+ tar xf ${dist_name}.tar.gz
+ fi
+ else
+ mkdir -p ${dist_name}
+ if [ ! -f ${TEST_ARCHIVE} ]; then
+ echo "${TEST_ARCHIVE} not found"
+ exit 1
+ fi
+ tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1
+ fi
+ pushd ${dist_name}
+ test_source_distribution
+ popd
+elif [ "${ARTIFACT}" == "wheels" ]; then
+ import_gpg_keys
+ test_wheels
+else
+ import_gpg_keys
+ test_binary_distribution
+fi
+
+TEST_SUCCESS=yes
+echo 'Release candidate looks good!'
+exit 0
diff --git a/src/arrow/dev/release/verify-yum.sh b/src/arrow/dev/release/verify-yum.sh
new file mode 100755
index 000000000..a7f572a44
--- /dev/null
+++ b/src/arrow/dev/release/verify-yum.sh
@@ -0,0 +1,204 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -exu
+
+if [ $# -lt 2 ]; then
+ echo "Usage: $0 VERSION rc"
+ echo " $0 VERSION staging-rc"
+ echo " $0 VERSION release"
+ echo " $0 VERSION staging-release"
+ echo " $0 VERSION local"
+ echo " e.g.: $0 0.13.0 rc # Verify 0.13.0 RC"
+ echo " e.g.: $0 0.13.0 staging-rc # Verify 0.13.0 RC on staging"
+ echo " e.g.: $0 0.13.0 release # Verify 0.13.0"
+ echo " e.g.: $0 0.13.0 staging-release # Verify 0.13.0 on staging"
+ echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
+ exit 1
+fi
+
+VERSION="$1"
+TYPE="$2"
+
+local_prefix="/arrow/dev/tasks/linux-packages"
+
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
+
+distribution=$(. /etc/os-release && echo "${ID}")
+distribution_version=$(. /etc/os-release && echo "${VERSION_ID}" | grep -o "^[0-9]*")
+distribution_prefix="centos"
+
+cmake_package=cmake
+cmake_command=cmake
+have_flight=yes
+have_gandiva=yes
+have_glib=yes
+have_parquet=yes
+have_python=yes
+install_command="dnf install -y --enablerepo=powertools"
+
+case "${distribution}-${distribution_version}" in
+ almalinux-*)
+ distribution_prefix="almalinux"
+ ;;
+ amzn-2)
+ cmake_package=cmake3
+ cmake_command=cmake3
+ have_flight=no
+ have_gandiva=no
+ have_python=no
+ install_command="yum install -y"
+ distribution_prefix="amazon-linux"
+ amazon-linux-extras install epel -y
+ ;;
+ centos-7)
+ cmake_package=cmake3
+ cmake_command=cmake3
+ have_flight=no
+ have_gandiva=no
+ install_command="yum install -y"
+ ;;
+esac
+if [ "$(arch)" = "aarch64" ]; then
+ have_gandiva=no
+fi
+
+if [ "${TYPE}" = "local" ]; then
+ case "${VERSION}" in
+ *-dev*)
+ package_version="$(echo "${VERSION}" | sed -e 's/-dev\(.*\)$/-0.dev\1/g')"
+ ;;
+ *-rc*)
+ package_version="$(echo "${VERSION}" | sed -e 's/-rc.*$//g')"
+ package_version+="-1"
+ ;;
+ *)
+ package_version="${VERSION}-1"
+ ;;
+ esac
+ release_path="${local_prefix}/yum/repositories"
+ case "${distribution}" in
+ almalinux)
+ package_version+=".el${distribution_version}"
+ release_path+="/almalinux"
+ ;;
+ amzn)
+ package_version+=".${distribution}${distribution_version}"
+ release_path+="/amazon-linux"
+ amazon-linux-extras install -y epel
+ ;;
+ *)
+ package_version+=".el${distribution_version}"
+ release_path+="/centos"
+ ;;
+ esac
+ release_path+="/${distribution_version}/$(arch)/Packages"
+ release_path+="/apache-arrow-release-${package_version}.noarch.rpm"
+ ${install_command} "${release_path}"
+else
+ package_version="${VERSION}"
+ case "${TYPE}" in
+ rc|staging-rc|staging-release)
+ suffix=${TYPE%-release}
+ distribution_prefix+="-${suffix}"
+ ;;
+ esac
+ ${install_command} \
+ ${artifactory_base_url}/${distribution_prefix}/${distribution_version}/apache-arrow-release-latest.rpm
+fi
+
+if [ "${TYPE}" = "local" ]; then
+ sed \
+ -i"" \
+ -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \
+ /etc/yum.repos.d/Apache-Arrow.repo
+ keys="${local_prefix}/KEYS"
+ if [ -f "${keys}" ]; then
+ cp "${keys}" /etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+ fi
+else
+ case "${TYPE}" in
+ rc|staging-rc|staging-release)
+ suffix=${TYPE%-release}
+ sed \
+ -i"" \
+ -e "s,/almalinux/,/almalinux-${suffix}/,g" \
+ -e "s,/centos/,/centos-${suffix}/,g" \
+ -e "s,/amazon-linux/,/amazon-linux-${suffix}/,g" \
+ /etc/yum.repos.d/Apache-Arrow.repo
+ ;;
+ esac
+fi
+
+${install_command} --enablerepo=epel arrow-devel-${package_version}
+${install_command} \
+ ${cmake_package} \
+ gcc-c++ \
+ git \
+ libarchive \
+ make \
+ pkg-config
+mkdir -p build
+cp -a /arrow/cpp/examples/minimal_build build
+pushd build/minimal_build
+${cmake_command} .
+make -j$(nproc)
+./arrow_example
+c++ -std=c++11 -o arrow_example example.cc $(pkg-config --cflags --libs arrow)
+./arrow_example
+popd
+
+if [ "${have_glib}" = "yes" ]; then
+ ${install_command} --enablerepo=epel arrow-glib-devel-${package_version}
+ ${install_command} --enablerepo=epel arrow-glib-doc-${package_version}
+fi
+
+if [ "${have_python}" = "yes" ]; then
+ ${install_command} --enablerepo=epel arrow-python-devel-${package_version}
+fi
+
+if [ "${have_glib}" = "yes" ]; then
+ ${install_command} --enablerepo=epel plasma-glib-devel-${package_version}
+ ${install_command} --enablerepo=epel plasma-glib-doc-${package_version}
+else
+ ${install_command} --enablerepo=epel plasma-devel-${package_version}
+fi
+
+if [ "${have_flight}" = "yes" ]; then
+ ${install_command} --enablerepo=epel arrow-flight-glib-devel-${package_version}
+ ${install_command} --enablerepo=epel arrow-flight-glib-doc-${package_version}
+fi
+
+if [ "${have_gandiva}" = "yes" ]; then
+ if [ "${have_glib}" = "yes" ]; then
+ ${install_command} --enablerepo=epel gandiva-glib-devel-${package_version}
+ ${install_command} --enablerepo=epel gandiva-glib-doc-${package_version}
+ else
+ ${install_command} --enablerepo=epel gandiva-devel-${package_version}
+ fi
+fi
+
+if [ "${have_parquet}" = "yes" ]; then
+ if [ "${have_glib}" = "yes" ]; then
+ ${install_command} --enablerepo=epel parquet-glib-devel-${package_version}
+ ${install_command} --enablerepo=epel parquet-glib-doc-${package_version}
+ else
+ ${install_command} --enablerepo=epel parquet-devel-${package_version}
+ fi
+fi
diff --git a/src/arrow/dev/requirements_merge_arrow_pr.txt b/src/arrow/dev/requirements_merge_arrow_pr.txt
new file mode 100644
index 000000000..7ac17dc1b
--- /dev/null
+++ b/src/arrow/dev/requirements_merge_arrow_pr.txt
@@ -0,0 +1,3 @@
+jira
+requests
+six
diff --git a/src/arrow/dev/tasks/README.md b/src/arrow/dev/tasks/README.md
new file mode 100644
index 000000000..1af9739db
--- /dev/null
+++ b/src/arrow/dev/tasks/README.md
@@ -0,0 +1,19 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+See the usage guide under the [documentation page](../../docs/source/developers/crossbow.rst)
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 000000000..dfc87c80b
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 000000000..3416b952c
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 000000000..f819ba722
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 000000000..3e2e0ef51
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '10.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:10.2
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
new file mode 100644
index 000000000..3aba0f129
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
new file mode 100644
index 000000000..ff26bc521
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
new file mode 100644
index 000000000..5703aba68
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
new file mode 100644
index 000000000..8ff58d717
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,70 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 000000000..5bb4381fe
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 000000000..2b1715d58
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 000000000..5a0e7313e
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 000000000..16ace00bd
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,69 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
new file mode 100644
index 000000000..0be59fe1a
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
new file mode 100644
index 000000000..d2c046ab2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
new file mode 100644
index 000000000..43f634454
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.17'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
new file mode 100644
index 000000000..7cc730f9b
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- x86_64-apple-darwin13.4.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
new file mode 100644
index 000000000..e5f8e2ba2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
new file mode 100644
index 000000000..cd3eca6d2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.8
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
new file mode 100644
index 000000000..dfdfae966
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
@@ -0,0 +1,29 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.0'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cdt_name
+ - docker_image
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
new file mode 100644
index 000000000..c5f455c19
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
@@ -0,0 +1,29 @@
+c_compiler:
+- gcc
+c_compiler_version:
+- '9'
+cdt_name:
+- cos6
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '9'
+docker_image:
+- quay.io/condaforge/linux-anvil-comp7
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- linux-64
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+- - cdt_name
+ - docker_image
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
new file mode 100644
index 000000000..08bb81d08
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
@@ -0,0 +1,27 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+macos_machine:
+- x86_64-apple-darwin13.4.0
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.0'
+target_platform:
+- osx-64
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
new file mode 100644
index 000000000..9974c6638
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
@@ -0,0 +1,27 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '10.9'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+macos_machine:
+- x86_64-apple-darwin13.4.0
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- osx-64
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml
new file mode 100644
index 000000000..02c2a7075
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml
@@ -0,0 +1,12 @@
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.0'
+target_platform:
+- win-64
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
new file mode 100644
index 000000000..2fe9ad314
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
@@ -0,0 +1,12 @@
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+pin_run_as_build:
+ r-base:
+ min_pin: x.x
+ max_pin: x.x
+r_base:
+- '4.1'
+target_platform:
+- win-64
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
new file mode 100644
index 000000000..8d4e25167
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.6.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+ - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
new file mode 100644
index 000000000..8da4a8380
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.7.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+ - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
new file mode 100644
index 000000000..1980e1be3
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.17'
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+ - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
new file mode 100644
index 000000000..1106037d3
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -0,0 +1,55 @@
+aws_sdk_cpp:
+- 1.8.186
+bzip2:
+- '1'
+c_compiler:
+- vs2017
+channel_sources:
+- conda-forge,defaults
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- None
+cxx_compiler:
+- vs2017
+gflags:
+- '2.2'
+glog:
+- '0.5'
+grpc_cpp:
+- '1.38'
+libprotobuf:
+- '3.16'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.19'
+pin_run_as_build:
+ bzip2:
+ max_pin: x
+ lz4-c:
+ max_pin: x.x.x
+ python:
+ min_pin: x.x
+ max_pin: x.x
+ zlib:
+ max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.06.01
+snappy:
+- '1'
+target_platform:
+- win-64
+thrift_cpp:
+- 0.14.2
+zip_keys:
+- - numpy
+ - python
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh b/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh
new file mode 100644
index 000000000..a53ef3f2c
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Provide a unified interface for the different logging
+# utilities CI providers offer. If unavailable, provide
+# a compatible fallback (e.g. bare `echo xxxxxx`).
+
+function startgroup {
+ # Start a foldable group of log lines
+ # Pass a single argument, quoted
+ case ${CI:-} in
+ azure )
+ echo "##[group]$1";;
+ travis )
+ echo "$1"
+ echo -en 'travis_fold:start:'"${1// /}"'\\r';;
+ * )
+ echo "$1";;
+ esac
+}
+
+function endgroup {
+ # End a foldable group of log lines
+ # Pass a single argument, quoted
+ case ${CI:-} in
+ azure )
+ echo "##[endgroup]";;
+ travis )
+ echo -en 'travis_fold:end:'"${1// /}"'\\r';;
+ esac
+}
diff --git a/src/arrow/dev/tasks/conda-recipes/README.md b/src/arrow/dev/tasks/conda-recipes/README.md
new file mode 100644
index 000000000..39f82f1b0
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/README.md
@@ -0,0 +1,67 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Conda Forge recipes
+
+This directory must be migrated periodically with the upstrem updates of
+[arrow-cpp-feedstock][arrow-cpp-feedstock],
+[parquet-cpp-feedstock][parquet-cpp-feedstock].
+conda-forge repositories because of multiple vendored files.
+
+## Keeping the recipes synchronized
+
+The recipes here are tested on nightly basis, so they follow the development
+versions of arrow instead of the upstream recipes, which are suitable for the
+latest releases.
+
+### Backporting from the upstream feedstocks
+
+In most of the cases these recipes are more accurate, then the upstream
+feedstocks. Although the upstream feedstocks regularly receive automatic updates
+by the conda-forge team so we need to backport those changes to the crossbow
+recipes. Most of these updates are touching the version pinning files
+(under `.ci_support`) and other CI related configuration files.
+
+Because all three recipes must be built in the same continuous integration
+job prefer porting from the [arrpw-cpp feedstock][arrow-cpp-feedstock].
+
+#### Updating the variants:
+
+Copy the configuration files from `arrow-cpp-feedstock/.ci_support` to the
+`.ci_support` folder.
+
+#### Updating the CI configurations:
+
+The `.azure-pipelines/azure-pipelines-[linux|osx|win].yml` should be ported
+to the local counterparts under `.azure-pipelines` with keeping the crossbow
+related parts (the cloning of arrow and the jinja templated variables) and
+moving the matrix definitions like [this][matrix-definition] to the crossbow
+[tasks.yml][../tasks.yml] config file.
+
+
+### Porting recipes from crossbow to the upstream feedstocks
+
+Theoretically these recipes should be up to date with the actual version of
+Arrow, so during the release procedure the content of these recipes should be
+copied to the upstream feedstocks.
+
+
+[arrow-cpp-feedstock]: https://github.com/conda-forge/arrow-cpp-feedstock
+[parquet-cpp-feedstock]: https://github.com/conda-forge/parquet-cpp-feedstock
+[matrix-definition]: https://github.com/conda-forge/arrow-cpp-feedstock/blob/master/.azure-pipelines/azure-pipelines-linux.yml#L12
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt
new file mode 100644
index 000000000..461398bab
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt
@@ -0,0 +1,68 @@
+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+ LLVM Team
+
+ University of Illinois at Urbana-Champaign
+
+ http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties. Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program Directory
+------- ---------
+Google Test llvm/utils/unittest/googletest
+OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
+pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
+ARM contributions llvm/lib/Target/ARM/LICENSE.TXT
+md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
new file mode 100644
index 000000000..0527356f7
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
@@ -0,0 +1,55 @@
+@echo on
+
+mkdir "%SRC_DIR%"\cpp\build
+pushd "%SRC_DIR%"\cpp\build
+
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+ set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=OFF"
+) else (
+ REM this should move to nvcc-feedstock
+ set "CUDA_PATH=%CUDA_PATH:\=/%"
+ set "CUDA_HOME=%CUDA_HOME:\=/%"
+
+ set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=ON"
+)
+
+cmake -G "Ninja" ^
+ -DBUILD_SHARED_LIBS=ON ^
+ -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^
+ -DARROW_DEPENDENCY_SOURCE=SYSTEM ^
+ -DARROW_PACKAGE_PREFIX="%LIBRARY_PREFIX%" ^
+ -DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^
+ -DPython3_EXECUTABLE="%PYTHON%" ^
+ -DARROW_WITH_BZ2:BOOL=ON ^
+ -DARROW_WITH_ZLIB:BOOL=ON ^
+ -DARROW_WITH_ZSTD:BOOL=ON ^
+ -DARROW_WITH_LZ4:BOOL=ON ^
+ -DARROW_WITH_SNAPPY:BOOL=ON ^
+ -DARROW_WITH_BROTLI:BOOL=ON ^
+ -DARROW_BOOST_USE_SHARED:BOOL=ON ^
+ -DARROW_BUILD_TESTS:BOOL=OFF ^
+ -DARROW_BUILD_UTILITIES:BOOL=OFF ^
+ -DARROW_BUILD_STATIC:BOOL=OFF ^
+ -DCMAKE_BUILD_TYPE=release ^
+ -DARROW_SSE42:BOOL=OFF ^
+ -DARROW_PYTHON:BOOL=ON ^
+ -DARROW_MIMALLOC:BOOL=ON ^
+ -DARROW_DATASET:BOOL=ON ^
+ -DARROW_FLIGHT:BOOL=ON ^
+ -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS:BOOL=ON ^
+ -DARROW_HDFS:BOOL=ON ^
+ -DARROW_PARQUET:BOOL=ON ^
+ -DARROW_GANDIVA:BOOL=ON ^
+ -DARROW_ORC:BOOL=ON ^
+ -DARROW_S3:BOOL=ON ^
+ -DBoost_NO_BOOST_CMAKE=ON ^
+ -DCMAKE_UNITY_BUILD=ON ^
+ %EXTRA_CMAKE_ARGS% ^
+ ..
+if errorlevel 1 exit 1
+
+cmake --build . --target install --config Release
+if errorlevel 1 exit 1
+
+popd
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
new file mode 100644
index 000000000..89cec3710
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
@@ -0,0 +1,44 @@
+@echo on
+pushd "%SRC_DIR%"\python
+
+@rem the symlinks for cmake modules don't work here
+@rem NOTE: In contrast to conda-forge, they work here as we clone from git.
+@rem del cmake_modules\BuildUtils.cmake
+@rem del cmake_modules\SetupCxxFlags.cmake
+@rem del cmake_modules\CompilerInfo.cmake
+@rem del cmake_modules\FindNumPy.cmake
+@rem del cmake_modules\FindPythonLibsNew.cmake
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\BuildUtils.cmake" cmake_modules\
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\SetupCxxFlags.cmake" cmake_modules\
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\CompilerInfo.cmake" cmake_modules\
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindNumPy.cmake" cmake_modules\
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindPythonLibsNew.cmake" cmake_modules\
+
+SET ARROW_HOME=%LIBRARY_PREFIX%
+SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION%
+SET PYARROW_BUILD_TYPE=release
+SET PYARROW_WITH_S3=1
+SET PYARROW_WITH_HDFS=1
+SET PYARROW_WITH_DATASET=1
+SET PYARROW_WITH_FLIGHT=1
+SET PYARROW_WITH_GANDIVA=1
+SET PYARROW_WITH_PARQUET=1
+SET PYARROW_CMAKE_GENERATOR=Ninja
+
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+ set "PYARROW_WITH_CUDA=0"
+) else (
+ set "PYARROW_WITH_CUDA=1"
+)
+
+%PYTHON% setup.py ^
+ build_ext ^
+ install --single-version-externally-managed ^
+ --record=record.txt
+if errorlevel 1 exit 1
+popd
+
+if [%PKG_NAME%] == [pyarrow] (
+ rd /s /q %SP_DIR%\pyarrow\tests
+)
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
new file mode 100644
index 000000000..9e4c02c5c
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+mkdir cpp/build
+pushd cpp/build
+
+EXTRA_CMAKE_ARGS=""
+
+# Include g++'s system headers
+if [ "$(uname)" == "Linux" ]; then
+ SYSTEM_INCLUDES=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';')
+ EXTRA_CMAKE_ARGS=" -DARROW_GANDIVA_PC_CXX_FLAGS=${SYSTEM_INCLUDES}"
+fi
+
+# Enable CUDA support
+if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]
+then
+ if [[ -z "${CUDA_HOME+x}" ]]
+ then
+ echo "cuda_compiler_version=${cuda_compiler_version} CUDA_HOME=$CUDA_HOME"
+ CUDA_GDB_EXECUTABLE=$(which cuda-gdb || exit 0)
+ if [[ -n "$CUDA_GDB_EXECUTABLE" ]]
+ then
+ CUDA_HOME=$(dirname $(dirname $CUDA_GDB_EXECUTABLE))
+ else
+ echo "Cannot determine CUDA_HOME: cuda-gdb not in PATH"
+ return 1
+ fi
+ fi
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CUDA_HOME}/lib64/stubs"
+else
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF"
+fi
+
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+ # We need llvm 11+ support in Arrow for this
+ # Tell jemalloc to support 16K page size on apple arm64 silicon
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF -DARROW_JEMALLOC_LG_PAGE=14"
+ sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt
+elif [[ "${target_platform}" == "linux-aarch64" ]]; then
+ # Tell jemalloc to support both 4k and 64k page arm64 systems
+ # See https://github.com/apache/arrow/pull/10940
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON -DARROW_JEMALLOC_LG_PAGE=16"
+else
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON"
+fi
+
+cmake \
+ -DARROW_BOOST_USE_SHARED=ON \
+ -DARROW_BUILD_BENCHMARKS=OFF \
+ -DARROW_BUILD_STATIC=OFF \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DBUILD_SHARED_LIBS=ON \
+ -DARROW_DATASET=ON \
+ -DARROW_DEPENDENCY_SOURCE=SYSTEM \
+ -DARROW_FLIGHT=ON \
+ -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \
+ -DARROW_HDFS=ON \
+ -DARROW_JEMALLOC=ON \
+ -DARROW_MIMALLOC=ON \
+ -DARROW_ORC=ON \
+ -DARROW_PACKAGE_PREFIX=$PREFIX \
+ -DARROW_PARQUET=ON \
+ -DARROW_PLASMA=ON \
+ -DARROW_PYTHON=ON \
+ -DARROW_S3=ON \
+ -DARROW_SIMD_LEVEL=NONE \
+ -DARROW_USE_LD_GOLD=ON \
+ -DARROW_WITH_BROTLI=ON \
+ -DARROW_WITH_BZ2=ON \
+ -DARROW_WITH_LZ4=ON \
+ -DARROW_WITH_SNAPPY=ON \
+ -DARROW_WITH_ZLIB=ON \
+ -DARROW_WITH_ZSTD=ON \
+ -DCMAKE_BUILD_TYPE=release \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=$PREFIX \
+ -DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \
+ -DPython3_EXECUTABLE=${PYTHON} \
+ -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \
+ -GNinja \
+ ${EXTRA_CMAKE_ARGS} \
+ ..
+
+# Commented out until jemalloc and mimalloc are fixed upstream
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+ ninja jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-patch mimalloc_ep-prefix/src/mimalloc_ep-stamp/mimalloc_ep-patch
+ cp $BUILD_PREFIX/share/gnuconfig/config.* jemalloc_ep-prefix/src/jemalloc_ep/build-aux/
+ sed -ie 's/list(APPEND mi_cflags -march=native)//g' mimalloc_ep-prefix/src/mimalloc_ep/CMakeLists.txt
+ # Use the correct register for thread-local storage
+ sed -ie 's/tpidr_el0/tpidrro_el0/g' mimalloc_ep-prefix/src/mimalloc_ep/include/mimalloc-internal.h
+fi
+
+ninja install
+
+popd
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
new file mode 100644
index 000000000..f0cf9ceb4
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env sh
+
+set -e
+set -x
+
+# Build dependencies
+export ARROW_HOME=$PREFIX
+export PARQUET_HOME=$PREFIX
+export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION
+export PYARROW_BUILD_TYPE=release
+export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
+export PYARROW_WITH_DATASET=1
+export PYARROW_WITH_FLIGHT=1
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+ # We need llvm 11+ support in Arrow for this
+ export PYARROW_WITH_GANDIVA=0
+else
+ export PYARROW_WITH_GANDIVA=1
+fi
+export PYARROW_WITH_HDFS=1
+export PYARROW_WITH_ORC=1
+export PYARROW_WITH_PARQUET=1
+export PYARROW_WITH_PLASMA=1
+export PYARROW_WITH_S3=1
+export PYARROW_CMAKE_GENERATOR=Ninja
+BUILD_EXT_FLAGS=""
+
+# Enable CUDA support
+if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]; then
+ export PYARROW_WITH_CUDA=1
+else
+ export PYARROW_WITH_CUDA=0
+fi
+
+# Resolve: Make Error at cmake_modules/SetupCxxFlags.cmake:338 (message): Unsupported arch flag: -march=.
+if [[ "${target_platform}" == "linux-aarch64" ]]; then
+ export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a"
+fi
+
+cd python
+
+$PYTHON setup.py \
+ build_ext \
+ install --single-version-externally-managed \
+ --record=record.txt
+
+if [[ "$PKG_NAME" == "pyarrow" ]]; then
+ rm -r ${SP_DIR}/pyarrow/tests
+fi
diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
new file mode 100644
index 000000000..48a862986
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -0,0 +1,302 @@
+# NOTE: In constrast to the conda-forge recipe, ARROW_VERSION is a templated variable here.
+{% set version = ARROW_VERSION %}
+{% set cuda_enabled = cuda_compiler_version != "None" %}
+{% set build_ext_version = ARROW_VERSION %}
+{% set build_ext = "cuda" if cuda_enabled else "cpu" %}
+{% set proc_build_number = "0" %}
+
+package:
+ name: arrow-cpp-ext
+ version: {{ version }}
+
+source:
+ path: ../../../../
+
+build:
+ number: 0
+ # for cuda on win/linux, building with 9.2 is enough to be compatible with all later versions,
+ # since arrow is only using libcuda, and not libcudart.
+ skip: true # [(win or linux) and cuda_compiler_version not in ("None", "10.2")]
+ skip: true # [osx and cuda_compiler_version != "None"]
+ run_exports:
+ - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
+
+outputs:
+ - name: arrow-cpp-proc
+ version: {{ build_ext_version }}
+ build:
+ number: {{ proc_build_number }}
+ string: "{{ build_ext }}"
+ test:
+ commands:
+ - exit 0
+ about:
+ home: http://github.com/apache/arrow
+ license: Apache-2.0
+ license_file:
+ - LICENSE.txt
+ summary: 'A meta-package to select Arrow build variant'
+
+ - name: arrow-cpp
+ script: build-arrow.sh # [not win]
+ script: bld-arrow.bat # [win]
+ version: {{ version }}
+ build:
+ string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
+ run_exports:
+ - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
+ ignore_run_exports:
+ - cudatoolkit
+ track_features:
+ {{ "- arrow-cuda" if cuda_enabled else "" }}
+ requirements:
+ build:
+ - python # [build_platform != target_platform]
+ - cross-python_{{ target_platform }} # [build_platform != target_platform]
+ - cython # [build_platform != target_platform]
+ - numpy # [build_platform != target_platform]
+ - gnuconfig # [osx and arm64]
+ - libprotobuf
+ - grpc-cpp
+ - cmake
+ - autoconf # [unix]
+ - ninja
+ - make # [unix]
+ - {{ compiler('c') }}
+ - {{ compiler('cxx') }}
+ - {{ compiler("cuda") }} # [cuda_compiler_version != "None"]
+ host:
+ - aws-sdk-cpp
+ - boost-cpp >=1.70
+ - brotli
+ - bzip2
+ - c-ares
+ - gflags
+ - glog
+ - grpc-cpp
+ - libprotobuf
+ - clangdev 10 # [not (osx and arm64)]
+ - llvmdev 10 # [not (osx and arm64)]
+ - libutf8proc
+ - lz4-c
+ - numpy
+ - orc # [unix]
+ - python
+ - rapidjson
+ - re2
+ - snappy
+ - thrift-cpp
+ - zlib
+ - zstd
+ run:
+ - {{ pin_compatible('numpy', lower_bound='1.16') }}
+ - python
+ run_constrained:
+ - arrow-cpp-proc * {{ build_ext }}
+ - cudatoolkit >=9.2 # [cuda_compiler_version != "None"]
+
+ about:
+ home: http://github.com/apache/arrow
+ license: Apache-2.0
+ license_file:
+ - LICENSE.txt
+ summary: C++ libraries for Apache Arrow
+
+ test:
+ commands:
+ # headers
+ - test -f $PREFIX/include/arrow/api.h # [unix]
+ - test -f $PREFIX/include/arrow/flight/types.h # [unix]
+ - test -f $PREFIX/include/plasma/client.h # [unix]
+ - test -f $PREFIX/include/gandiva/engine.h # [unix and not (osx and arm64)]
+ - test -f $PREFIX/include/parquet/api/reader.h # [unix]
+ - if not exist %LIBRARY_INC%\\arrow\\api.h exit 1 # [win]
+ - if not exist %LIBRARY_INC%\\gandiva\\engine.h exit 1 # [win]
+ - if not exist %LIBRARY_INC%\\parquet\\api\\reader.h exit 1 # [win]
+
+ # shared
+ - test -f $PREFIX/lib/libarrow.so # [linux]
+ - test -f $PREFIX/lib/libarrow_dataset.so # [linux]
+ - test -f $PREFIX/lib/libarrow_flight.so # [linux]
+ - test -f $PREFIX/lib/libarrow_python.so # [linux]
+ - test -f $PREFIX/lib/libparquet.so # [linux]
+ - test -f $PREFIX/lib/libgandiva.so # [linux]
+ - test -f $PREFIX/lib/libplasma.so # [linux]
+ - test -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version != "None") and unix]
+ - test ! -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version == "None") and unix]
+ - if not exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version != "None") and win]
+ - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win]
+ - test -f $PREFIX/lib/libarrow.dylib # [osx]
+ - test -f $PREFIX/lib/libarrow_dataset.dylib # [osx]
+ - test -f $PREFIX/lib/libarrow_python.dylib # [osx]
+ - test -f $PREFIX/lib/libgandiva.dylib # [osx and not arm64]
+ - test -f $PREFIX/lib/libparquet.dylib # [osx]
+ - test -f $PREFIX/lib/libplasma.dylib # [osx]
+ - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1 # [win]
+ - if not exist %PREFIX%\\Library\\bin\\arrow_dataset.dll exit 1 # [win]
+ - if not exist %PREFIX%\\Library\\bin\\arrow_flight.dll exit 1 # [win]
+ - if not exist %PREFIX%\\Library\\bin\\arrow_python.dll exit 1 # [win]
+ - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win]
+ - if not exist %PREFIX%\\Library\\bin\\gandiva.dll exit 1 # [win]
+
+ # absence of static libraries
+ - test ! -f $PREFIX/lib/libarrow.a # [unix]
+ - test ! -f $PREFIX/lib/libarrow_dataset.a # [unix]
+ - test ! -f $PREFIX/lib/libarrow_flight.a # [unix]
+ - test ! -f $PREFIX/lib/libarrow_python.a # [unix]
+ - test ! -f $PREFIX/lib/libplasma.a # [unix]
+ - test ! -f $PREFIX/lib/libparquet.a # [unix]
+ - test ! -f $PREFIX/lib/libgandiva.a # [unix]
+ - if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win]
+ - if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win]
+ - if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win]
+ - if exist %PREFIX%\\Library\\lib\\arrow_python_static.lib exit 1 # [win]
+ - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win]
+ - if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win]
+
+ - name: pyarrow
+ script: build-pyarrow.sh # [not win]
+ script: bld-pyarrow.bat # [win]
+ version: {{ version }}
+ build:
+ string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
+ ignore_run_exports:
+ - cudatoolkit
+ track_features:
+ {{ "- arrow-cuda" if cuda_enabled else "" }}
+ requirements:
+ build:
+ - python # [build_platform != target_platform]
+ - cross-python_{{ target_platform }} # [build_platform != target_platform]
+ - cython # [build_platform != target_platform]
+ - numpy # [build_platform != target_platform]
+ - cmake
+ - ninja
+ - make # [unix]
+ - {{ compiler('c') }}
+ - {{ compiler('cxx') }}
+ # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
+ - {{ compiler("cuda") }} # [cuda_compiler_version != "None"]
+ host:
+ - {{ pin_subpackage('arrow-cpp', exact=True) }}
+ - cython
+ - numpy
+ - python
+ - setuptools
+ - setuptools_scm
+ - six
+ run:
+ - {{ pin_subpackage('arrow-cpp', exact=True) }}
+ - {{ pin_compatible('numpy', lower_bound='1.16') }}
+ # empty parquet-cpp metapackage, force old versions to be uninstalled
+ - parquet-cpp 1.5.1.*
+ - python
+ run_constrained:
+ - arrow-cpp-proc * {{ build_ext }}
+ - cudatoolkit >=9.2 # [cuda_compiler_version != "None"]
+
+ about:
+ home: http://github.com/apache/arrow
+ license: Apache-2.0
+ license_file:
+ - LICENSE.txt
+ summary: Python libraries for Apache Arrow
+
+ test:
+ imports:
+ - pyarrow
+ - pyarrow.dataset
+ - pyarrow.flight
+ - pyarrow.gandiva # [not (osx and arm64)]
+ - pyarrow.orc # [unix]
+ - pyarrow.parquet
+ - pyarrow.plasma # [unix]
+ - pyarrow.fs
+ - pyarrow._s3fs
+ - pyarrow._hdfs
+ # We can only test importing cuda package but cannot run when a
+ # CUDA device is not available, for instance, when building from CI.
+ # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see
+ # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows
+ # However, we check below for (at least) the presence of a correctly-compiled module
+ - pyarrow.cuda # [cuda_compiler_version != "None" and not win]
+ commands:
+ - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix]
+ - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win]
+ # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y"
+ - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"]
+
+ - name: pyarrow-tests
+ script: build-pyarrow.sh # [not win]
+ script: bld-pyarrow.bat # [win]
+ version: {{ version }}
+ build:
+ string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
+ ignore_run_exports:
+ - cudatoolkit
+ track_features:
+ {{ "- arrow-cuda" if cuda_enabled else "" }}
+ requirements:
+ build:
+ - python # [build_platform != target_platform]
+ - cross-python_{{ target_platform }} # [build_platform != target_platform]
+ - cython # [build_platform != target_platform]
+ - numpy # [build_platform != target_platform]
+ - cmake
+ - ninja
+ - make # [unix]
+ - {{ compiler('c') }}
+ - {{ compiler('cxx') }}
+ # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
+ - {{ compiler("cuda") }} # [cuda_compiler_version != "None"]
+ host:
+ - {{ pin_subpackage('arrow-cpp', exact=True) }}
+ - {{ pin_subpackage('pyarrow', exact=True) }}
+ - cython
+ - numpy
+ - python
+ - setuptools
+ - setuptools_scm
+ - six
+ run:
+ - {{ pin_subpackage('pyarrow', exact=True) }}
+ - python
+ run_constrained:
+ - arrow-cpp-proc * {{ build_ext }}
+ - cudatoolkit >=9.2 # [cuda_compiler_version != "None"]
+
+ about:
+ home: http://github.com/apache/arrow
+ license: Apache-2.0
+ license_file:
+ - LICENSE.txt
+ summary: Python test files for Apache Arrow
+
+ test:
+ commands:
+ - test -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix]
+ - if not exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win]
+
+about:
+ home: http://github.com/apache/arrow
+ license: Apache-2.0
+ license_file:
+ - LICENSE.txt
+ summary: C++ and Python libraries for Apache Arrow
+
+extra:
+ recipe-maintainers:
+ - wesm
+ - xhochy
+ - leifwalsh
+ - jreback
+ - cpcloud
+ - pcmoritz
+ - robertnishihara
+ - siddharthteotia
+ - kou
+ - kszucs
+ - pitrou
+ - pearu
+ - nealrichardson
+ - jakirkham
diff --git a/src/arrow/dev/tasks/conda-recipes/azure.clean.yml b/src/arrow/dev/tasks/conda-recipes/azure.clean.yml
new file mode 100644
index 000000000..84f167812
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/azure.clean.yml
@@ -0,0 +1,28 @@
+jobs:
+- job: linux
+ pool:
+ vmImage: ubuntu-latest
+ timeoutInMinutes: 360
+
+ steps:
+ - script: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ displayName: Clone arrow
+
+ - script: |
+ conda install -y -c conda-forge pandas anaconda-client packaging
+ displayName: Install requirements
+
+ - script: |
+ {% if arrow.branch == 'master' %}
+ mkdir -p $HOME/.continuum/anaconda-client/tokens/
+ echo $(CROSSBOW_ANACONDA_TOKEN) > $HOME/.continuum/anaconda-client/tokens/https%3A%2F%2Fapi.anaconda.org.token
+ {% endif %}
+ eval "$(conda shell.bash hook)"
+ conda activate base
+ python3 arrow/dev/tasks/conda-recipes/clean.py {% if arrow.branch == 'master' %}FORCE{% endif %}
+ displayName: Delete outdated packages
+
diff --git a/src/arrow/dev/tasks/conda-recipes/azure.linux.yml b/src/arrow/dev/tasks/conda-recipes/azure.linux.yml
new file mode 100755
index 000000000..c05d284d2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/azure.linux.yml
@@ -0,0 +1,38 @@
+{% import 'macros.jinja' as macros with context %}
+
+jobs:
+- job: linux
+ pool:
+ vmImage: ubuntu-latest
+ timeoutInMinutes: 360
+
+ variables:
+ CONFIG: {{ config }}
+ R_CONFIG: {{ r_config|default("") }}
+ ARROW_VERSION: {{ arrow.no_rc_version }}
+ UPLOAD_PACKAGES: False
+
+ steps:
+ # configure qemu binfmt-misc running. This allows us to run docker containers
+ # embedded qemu-static
+ - script: |
+ docker run --rm --privileged multiarch/qemu-user-static:register --reset --credential yes
+ ls /proc/sys/fs/binfmt_misc/
+ displayName: Configure binfmt_misc
+ condition: not(startsWith(variables['CONFIG'], 'linux_64'))
+
+ {{ macros.azure_checkout_arrow() }}
+
+ - task: CondaEnvironment@1
+ inputs:
+ packageSpecs: 'anaconda-client shyaml'
+ installOptions: '-c conda-forge'
+ updateConda: false
+
+ - script: |
+ mkdir build_artifacts
+ CI=azure arrow/dev/tasks/conda-recipes/run_docker_build.sh $(pwd)/build_artifacts
+ displayName: Run docker build
+
+ {{ macros.azure_upload_releases("build_artifacts/*/*.tar.bz2") }}
+ {{ macros.azure_upload_anaconda("build_artifacts/*/*.tar.bz2") }}
diff --git a/src/arrow/dev/tasks/conda-recipes/azure.osx.yml b/src/arrow/dev/tasks/conda-recipes/azure.osx.yml
new file mode 100755
index 000000000..99bb76ba5
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/azure.osx.yml
@@ -0,0 +1,83 @@
+{% import 'macros.jinja' as macros with context %}
+
+jobs:
+- job: osx
+ pool:
+ vmImage: macOS-10.14
+ timeoutInMinutes: 360
+ variables:
+ CONFIG: {{ config }}
+ R_CONFIG: {{ r_config|default("") }}
+ ARROW_VERSION: {{ arrow.no_rc_version }}
+ UPLOAD_PACKAGES: False
+ steps:
+ - bash: |
+ echo "##vso[task.prependpath]$CONDA/bin"
+ sudo chown -R $USER $CONDA
+ displayName: Add conda to PATH
+
+ - script: |
+ source activate base
+ conda config --set channel_priority strict
+ conda install -n base -c conda-forge --quiet --yes conda-forge-ci-setup=3 conda-build
+ displayName: 'Add conda-forge-ci-setup=3'
+
+ - script: |
+ echo "Removing homebrew from Azure to avoid conflicts."
+ /usr/bin/sudo mangle_homebrew
+ /usr/bin/sudo -k
+ displayName: Mangle homebrew
+
+ {{ macros.azure_checkout_arrow() }}
+
+ - script: |
+ source activate base
+ echo "Configuring conda."
+
+ setup_conda_rc ./ ./ ./.ci_support/${CONFIG}.yaml
+ export CI=azure
+ source run_conda_forge_build_setup
+ conda update --yes --quiet --override-channels -c conda-forge --all
+ displayName: Configure conda and conda-build
+ workingDirectory: arrow/dev/tasks/conda-recipes
+ env:
+ OSX_FORCE_SDK_DOWNLOAD: "1"
+
+ - script: |
+ source activate base
+ mangle_compiler ./ ./ ./.ci_support/${CONFIG}.yaml
+ workingDirectory: arrow/dev/tasks/conda-recipes
+ displayName: Mangle compiler
+
+ - script: |
+ source activate base
+ make_build_number ./ ./ ./.ci_support/${CONFIG}.yaml
+ workingDirectory: arrow/dev/tasks/conda-recipes
+ displayName: Generate build number clobber file
+
+ - script: |
+ source activate base
+ set +x
+ if [[ "${CONFIG}" == osx_arm* ]]; then
+ EXTRA_CB_OPTIONS="${EXTRA_CB_OPTIONS:-} --no-test"
+ fi
+ conda build arrow-cpp \
+ -m ./.ci_support/${CONFIG}.yaml \
+ --clobber-file ./.ci_support/clobber_${CONFIG}.yaml \
+ ${EXTRA_CB_OPTIONS:-} \
+ --output-folder ./build_artifacts
+
+ if [ ! -z "${R_CONFIG}" ]; then
+ conda build r-arrow \
+ -m ./.ci_support/r/${R_CONFIG}.yaml \
+ --output-folder ./build_artifacts
+ fi
+ workingDirectory: arrow/dev/tasks/conda-recipes
+ displayName: Build recipes
+
+ - script: |
+ sudo mv /usr/local/conda_mangled/* /usr/local/
+ displayName: Unmangle homebrew
+
+ {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
+ {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
diff --git a/src/arrow/dev/tasks/conda-recipes/azure.win.yml b/src/arrow/dev/tasks/conda-recipes/azure.win.yml
new file mode 100755
index 000000000..422e2f0e9
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/azure.win.yml
@@ -0,0 +1,77 @@
+{% import 'macros.jinja' as macros with context %}
+
+jobs:
+- job: win
+ pool:
+ vmImage: vs2017-win2016
+ timeoutInMinutes: 360
+ variables:
+ CONFIG: {{ config }}
+ R_CONFIG: {{ r_config|default("") }}
+ ARROW_VERSION: {{ arrow.no_rc_version }}
+ CONDA_BLD_PATH: D:\\bld\\
+ UPLOAD_PACKAGES: False
+
+ steps:
+ - script: |
+ choco install vcpython27 -fdv -y --debug
+ condition: contains(variables['CONFIG'], 'vs2008')
+ displayName: Install vcpython27.msi (if needed)
+
+ - powershell: |
+ Set-PSDebug -Trace 1
+ $batchcontent = @"
+ ECHO ON
+ SET vcpython=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0
+ DIR "%vcpython%"
+ CALL "%vcpython%\vcvarsall.bat" %*
+ "@
+ $batchDir = "C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC"
+ $batchPath = "$batchDir" + "\vcvarsall.bat"
+ New-Item -Path $batchPath -ItemType "file" -Force
+ Set-Content -Value $batchcontent -Path $batchPath
+ Get-ChildItem -Path $batchDir
+ Get-ChildItem -Path ($batchDir + '\..')
+ condition: contains(variables['CONFIG'], 'vs2008')
+ displayName: Patch vs2008 (if needed)
+
+ - task: CondaEnvironment@1
+ inputs:
+ packageSpecs: 'python=3.6 conda-build conda conda-forge::conda-forge-ci-setup=3 pip' # Optional
+ installOptions: "-c conda-forge"
+ updateConda: true
+ displayName: Install conda-build and activate environment
+ - script: set PYTHONUNBUFFERED=1
+
+ {{ macros.azure_checkout_arrow()|indent(2) }}
+
+ # Configure the VM
+ - script: setup_conda_rc .\ .\ .\.ci_support\%CONFIG%.yaml
+ workingDirectory: arrow\dev\tasks\conda-recipes
+
+ # Configure the VM.
+ - script: |
+ set "CI=azure"
+ call activate base
+ run_conda_forge_build_setup
+ displayName: conda-forge build setup
+ workingDirectory: arrow\dev\tasks\conda-recipes
+
+ - script: |
+ conda.exe build arrow-cpp parquet-cpp -m .ci_support\%CONFIG%.yaml
+ displayName: Build recipe
+ workingDirectory: arrow\dev\tasks\conda-recipes
+ env:
+ PYTHONUNBUFFERED: 1
+ condition: not(contains(variables['CONFIG'], 'vs2008'))
+
+ - script: |
+ conda.exe build r-arrow -m .ci_support\r\%R_CONFIG%.yaml
+ displayName: Build recipe
+ workingDirectory: arrow\dev\tasks\conda-recipes
+ env:
+ PYTHONUNBUFFERED: 1
+ condition: contains(variables['R_CONFIG'], 'win')
+
+ {{ macros.azure_upload_releases("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
+ {{ macros.azure_upload_anaconda("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
diff --git a/src/arrow/dev/tasks/conda-recipes/azure.yml b/src/arrow/dev/tasks/conda-recipes/azure.yml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/azure.yml
diff --git a/src/arrow/dev/tasks/conda-recipes/build_steps.sh b/src/arrow/dev/tasks/conda-recipes/build_steps.sh
new file mode 100755
index 000000000..25864c08a
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/build_steps.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# setup. The next time this is updated to the current version on conda-forge,
+# you will also make this additions afterwards.
+
+# PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
+# will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
+# changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
+# benefit from the improvement.
+
+set -xeuo pipefail
+
+output_dir=${1}
+
+export PYTHONUNBUFFERED=1
+export FEEDSTOCK_ROOT="${FEEDSTOCK_ROOT:-/home/conda/feedstock_root}"
+export CI_SUPPORT="${FEEDSTOCK_ROOT}/.ci_support"
+export CONFIG_FILE="${CI_SUPPORT}/${CONFIG}.yaml"
+
+cat >~/.condarc <<CONDARC
+
+conda-build:
+ root-dir: ${output_dir}
+
+CONDARC
+
+conda install --yes --quiet conda-forge-ci-setup=3 conda-build pip -c conda-forge
+
+# set up the condarc
+setup_conda_rc "${FEEDSTOCK_ROOT}" "${FEEDSTOCK_ROOT}" "${CONFIG_FILE}"
+
+source run_conda_forge_build_setup
+
+# make the build number clobber
+make_build_number "${FEEDSTOCK_ROOT}" "${FEEDSTOCK_ROOT}" "${CONFIG_FILE}"
+
+export CONDA_BLD_PATH="${output_dir}"
+
+conda build \
+ "${FEEDSTOCK_ROOT}/arrow-cpp" \
+ "${FEEDSTOCK_ROOT}/parquet-cpp" \
+ -m "${CI_SUPPORT}/${CONFIG}.yaml" \
+ --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml" \
+ --output-folder "${output_dir}"
+
+if [ ! -z "${R_CONFIG:-}" ]; then
+ conda build \
+ "${FEEDSTOCK_ROOT}/r-arrow" \
+ -m "${CI_SUPPORT}/r/${R_CONFIG}.yaml" \
+ --output-folder "${output_dir}"
+fi
+
+
+touch "${output_dir}/conda-forge-build-done-${CONFIG}"
diff --git a/src/arrow/dev/tasks/conda-recipes/clean.py b/src/arrow/dev/tasks/conda-recipes/clean.py
new file mode 100644
index 000000000..bd31c875d
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/clean.py
@@ -0,0 +1,80 @@
+from subprocess import check_output, check_call
+from typing import List
+
+import json
+import os
+import pandas as pd
+import sys
+
+from packaging.version import Version
+
+
+VERSIONS_TO_KEEP = 5
+PACKAGES = [
+ "arrow-cpp",
+ "arrow-cpp-proc",
+ "parquet-cpp",
+ "pyarrow",
+ "pyarrow-tests",
+ "r-arrow",
+]
+PLATFORMS = [
+ "linux-64",
+ "linux-aarch64",
+ "osx-64",
+ "win-64",
+]
+EXCLUDED_PATTERNS = [
+ ["r-arrow", "linux-aarch64"],
+]
+
+
+def packages_to_delete(package_name: str, platform: str) -> List[str]:
+ env = os.environ.copy()
+ env["CONDA_SUBDIR"] = platform
+ pkgs_json = check_output(
+ [
+ "conda",
+ "search",
+ "--json",
+ "-c",
+ "arrow-nightlies",
+ "--override-channels",
+ package_name,
+ ],
+ env=env,
+ )
+ pkgs = pd.DataFrame(json.loads(pkgs_json)[package_name])
+ pkgs["version"] = pkgs["version"].map(Version)
+ pkgs["py_version"] = pkgs["build"].str.slice(0, 4)
+
+ to_delete = []
+
+ for (subdir, python), group in pkgs.groupby(["subdir", "py_version"]):
+ group = group.sort_values(by="version", ascending=False)
+
+ if len(group) > VERSIONS_TO_KEEP:
+ del_candidates = group[VERSIONS_TO_KEEP:]
+ to_delete += (
+ f"arrow-nightlies/{package_name}/"
+ + del_candidates["version"].astype(str)
+ + del_candidates["url"].str.replace(
+ "https://conda.anaconda.org/arrow-nightlies", ""
+ )
+ ).to_list()
+
+ return to_delete
+
+
+if __name__ == "__main__":
+ to_delete = []
+ for package in PACKAGES:
+ for platform in PLATFORMS:
+ if [package, platform] in EXCLUDED_PATTERNS:
+ continue
+ to_delete += packages_to_delete(package, platform)
+
+ for name in to_delete:
+ print(f"Deleting {name} …")
+ if "FORCE" in sys.argv:
+ check_call(["anaconda", "remove", "-f", name])
diff --git a/src/arrow/dev/tasks/conda-recipes/conda-forge.yml b/src/arrow/dev/tasks/conda-recipes/conda-forge.yml
new file mode 100644
index 000000000..4c07b5dd3
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/conda-forge.yml
@@ -0,0 +1 @@
+channel_priority: strict
diff --git a/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml b/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml
new file mode 100644
index 000000000..5de06c32b
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml
@@ -0,0 +1,51 @@
+# ARROW-3229: this is a meta-package to prevent conflicts in the future
+
+{% set parquet_version = "1.5.1" %}
+
+package:
+ name: parquet-cpp
+ version: {{ parquet_version }}
+
+build:
+ number: 0
+ skip: true # [win32]
+ skip: true # [win and py<35]
+
+requirements:
+ host:
+ # NOTE: in the upstream feedstock use >= instead of =
+ - arrow-cpp ={{ ARROW_VERSION }}
+ run:
+ - arrow-cpp ={{ ARROW_VERSION }}
+
+test:
+ commands:
+ # headers
+ - test -f $PREFIX/include/parquet/api/reader.h # [unix]
+ - if not exist %LIBRARY_INC%\\parquet\\api\\reader.h exit 1 # [win]
+
+ # shared
+ - test -f $PREFIX/lib/libparquet.so # [linux]
+ - test -f $PREFIX/lib/libparquet.dylib # [osx]
+ - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win]
+
+ # absence of static libraries
+ - test ! -f $PREFIX/lib/libparquet.a # [unix]
+ - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win]
+
+about:
+ home: http://github.com/apache/arrow
+ license: Apache 2.0
+ summary: 'C++ libraries for the Apache Parquet file format'
+
+extra:
+ recipe-maintainers:
+ - wesm
+ - xhochy
+ - leifwalsh
+ - jreback
+ - cpcloud
+ - siddharthteotia
+ - kou
+ - kszucs
+ - pitrou
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat b/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat
new file mode 100644
index 000000000..a193ddc0a
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat
@@ -0,0 +1,9 @@
+bash %RECIPE_DIR%/build_win.sh
+IF %ERRORLEVEL% NEQ 0 exit 1
+cp %RECIPE_DIR%/configure.win r
+IF %ERRORLEVEL% NEQ 0 exit 1
+cp %RECIPE_DIR%/install.libs.R r/src
+IF %ERRORLEVEL% NEQ 0 exit 1
+set "MAKEFLAGS=-j%CPU_COUNT%"
+"%R%" CMD INSTALL --build r
+IF %ERRORLEVEL% NEQ 0 exit 1
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh b/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh
new file mode 100644
index 000000000..e868189a2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+export DISABLE_AUTOBREW=1
+$R CMD INSTALL --build r/.
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh b/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh
new file mode 100755
index 000000000..22c07d6e0
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -exuo pipefail
+
+
+# Rename arrow.dll to lib_arrow.dll to avoid conflicts with the arrow-cpp arrow.dll
+sed -i -e 's/void R_init_arrow/__declspec(dllexport) void R_init_lib_arrow/g' r/src/arrowExports.cpp
+sed -i -e 's/useDynLib(arrow/useDynLib(lib_arrow/g' r/NAMESPACE
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win b/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win
new file mode 100755
index 000000000..0b11d1335
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -euxo pipefail
+
+# Remove the -I../inst/include/ when unvendoring cpp11 in ARROW-13610
+echo "PKG_CPPFLAGS=-DNDEBUG -I\"${LIBRARY_PREFIX}/include\" -I\"${PREFIX}/include\" -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_S3 -DARROW_R_WITH_JSON -I../inst/include/" > src/Makevars.win
+echo "PKG_CXXFLAGS=\$(CXX_VISIBILITY)" >> src/Makevars.win
+echo 'CXX_STD=CXX11' >> src/Makevars.win
+echo "PKG_LIBS=-L\"${LIBRARY_PREFIX}/lib\" -larrow_dataset -lparquet -larrow" >> src/Makevars.win
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R b/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R
new file mode 100644
index 000000000..005bbe16b
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R
@@ -0,0 +1,5 @@
+src_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/")
+dest_dir <- file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/")
+
+dir.create(file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/"), recursive = TRUE, showWarnings = FALSE)
+file.copy(file.path(src_dir, "arrow.dll", fsep = "/"), file.path(dest_dir, "lib_arrow.dll", fsep = "/"))
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml b/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml
new file mode 100644
index 000000000..5f0643bef
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml
@@ -0,0 +1,66 @@
+{% set version = ARROW_VERSION %}
+{% set posix = 'm2-' if win else '' %}
+{% set native = 'm2w64-' if win else '' %}
+
+package:
+ name: r-arrow
+ version: {{ version|replace("-", "_") }}
+
+source:
+ path: ../../../../
+
+build:
+ merge_build_host: true # [win]
+ number: 0
+ rpaths:
+ - lib/R/lib/
+ - lib/
+
+requirements:
+ build:
+ - {{ compiler('c') }} # [not win]
+ - {{ compiler('cxx') }} # [not win]
+ - {{ compiler('r_clang') }} # [win]
+ - pkg-config # [not win]
+ - {{ posix }}make
+ - {{ posix }}sed # [win]
+ - {{ posix }}coreutils # [win]
+ - {{ posix }}filesystem # [win]
+ - {{ posix }}zip # [win]
+ host:
+ # Needs to be here, otherwise merge_build_host runs into issues
+ - pkg-config # [win]
+ - r-base
+ - arrow-cpp {{ version }}
+ - r-cpp11
+ - r-r6
+ - r-assertthat
+ - r-bit64
+ - r-purrr
+ - r-rlang
+ - r-tidyselect
+ run:
+ - r-base
+ - r-r6
+ - r-assertthat
+ - r-bit64
+ - r-purrr
+ - r-rlang
+ - r-tidyselect
+
+test:
+ commands:
+ - $R -e "library('arrow')" # [not win]
+ - "\"%R%\" -e \"library('arrow'); data(mtcars); write_parquet(mtcars, 'test.parquet')\"" # [win]
+
+about:
+ home: https://github.com/apache/arrow
+ license: Apache-2.0
+ license_file: LICENSE.txt
+ summary: R Integration to 'Apache' 'Arrow'.
+ license_family: APACHE
+
+extra:
+ recipe-maintainers:
+ - conda-forge/r
+ - conda-forge/arrow-cpp
diff --git a/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh b/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh
new file mode 100755
index 000000000..7645c43e2
--- /dev/null
+++ b/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# setup. The next time this is updated to the current version on conda-forge,
+# you will also make this additions afterwards.
+
+# PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
+# will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
+# changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
+# benefit from the improvement.
+
+set -xeo pipefail
+
+build_dir=${1}
+
+THISDIR="$( cd "$( dirname "$0" )" >/dev/null && pwd )"
+ARROW_ROOT=$(cd "$THISDIR/../../.."; pwd;)
+FEEDSTOCK_ROOT=$THISDIR
+
+docker info
+
+# In order for the conda-build process in the container to write to the mounted
+# volumes, we need to run with the same id as the host machine, which is
+# normally the owner of the mounted volumes, or at least has write permission
+export HOST_USER_ID=$(id -u)
+# Check if docker-machine is being used (normally on OSX) and get the uid from
+# the VM
+if hash docker-machine 2> /dev/null && docker-machine active > /dev/null; then
+ export HOST_USER_ID=$(docker-machine ssh $(docker-machine active) id -u)
+fi
+
+if [ -z "$CONFIG" ]; then
+ set +x
+ FILES=`ls .ci_support/linux_*`
+ CONFIGS=""
+ for file in $FILES; do
+ CONFIGS="${CONFIGS}'${file:12:-5}' or ";
+ done
+ echo "Need to set CONFIG env variable. Value can be one of ${CONFIGS:0:-4}"
+ exit 1
+fi
+
+if [ -z "${DOCKER_IMAGE}" ]; then
+ SHYAML_INSTALLED="$(shyaml -h || echo NO)"
+ if [ "${SHYAML_INSTALLED}" == "NO" ]; then
+ echo "WARNING: DOCKER_IMAGE variable not set and shyaml not installed. Falling back to condaforge/linux-anvil-comp7"
+ DOCKER_IMAGE="condaforge/linux-anvil-comp7"
+ else
+ DOCKER_IMAGE="$(cat "${FEEDSTOCK_ROOT}/.ci_support/${CONFIG}.yaml" | shyaml get-value docker_image.0 condaforge/linux-anvil-comp7 )"
+ fi
+fi
+
+mkdir -p "${build_dir}"
+DONE_CANARY="${build_dir}/conda-forge-build-done-${CONFIG}"
+rm -f "$DONE_CANARY"
+
+if [ -z "${CI}" ]; then
+ DOCKER_RUN_ARGS="-it "
+fi
+
+export UPLOAD_PACKAGES="${UPLOAD_PACKAGES:-True}"
+docker run ${DOCKER_RUN_ARGS} \
+ --shm-size=2G \
+ -v "${ARROW_ROOT}":/arrow:rw,z \
+ -v "${build_dir}":/build:rw \
+ -e FEEDSTOCK_ROOT="/arrow/dev/tasks/conda-recipes" \
+ -e CONFIG \
+ -e R_CONFIG \
+ -e HOST_USER_ID \
+ -e UPLOAD_PACKAGES \
+ -e ARROW_VERSION \
+ -e CI \
+ $DOCKER_IMAGE \
+ bash /arrow/dev/tasks/conda-recipes/build_steps.sh /build
+
+# verify that the end of the script was reached
+test -f "$DONE_CANARY"
diff --git a/src/arrow/dev/tasks/cpp-examples/github.linux.yml b/src/arrow/dev/tasks/cpp-examples/github.linux.yml
new file mode 100644
index 000000000..717d3c443
--- /dev/null
+++ b/src/arrow/dev/tasks/cpp-examples/github.linux.yml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ test:
+ name: C++ Example
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Arrow
+ shell: bash
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Run
+ shell: bash
+ run: |
+ cd arrow/cpp/examples/{{ type }}
+ docker-compose run --rm {{ run }}
diff --git a/src/arrow/dev/tasks/docker-tests/azure.linux.yml b/src/arrow/dev/tasks/docker-tests/azure.linux.yml
new file mode 100644
index 000000000..b8f1151f7
--- /dev/null
+++ b/src/arrow/dev/tasks/docker-tests/azure.linux.yml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+jobs:
+- job: linux
+ pool:
+ vmImage: ubuntu-latest
+ timeoutInMinutes: 360
+ {% if env is defined %}
+ variables:
+ {% for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {% endfor %}
+ {% endif %}
+
+ steps:
+ - task: DockerInstaller@0
+ displayName: Docker Installer
+ inputs:
+ dockerVersion: 17.09.0-ce
+ releaseType: stable
+
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.6'
+
+ - script: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ displayName: Clone arrow
+
+ - script: pip install -e arrow/dev/archery[docker]
+ displayName: Setup Archery
+
+ - script: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }}
+ displayName: Execute Docker Build
diff --git a/src/arrow/dev/tasks/docker-tests/circle.linux.yml b/src/arrow/dev/tasks/docker-tests/circle.linux.yml
new file mode 100644
index 000000000..3ddb93dc9
--- /dev/null
+++ b/src/arrow/dev/tasks/docker-tests/circle.linux.yml
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version: 2
+jobs:
+ build:
+ machine:
+ image: ubuntu-1604:202004-01
+ {%- if env is defined %}
+ environment:
+ {%- for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {%- endfor %}
+ {%- endif %}
+ steps:
+ - run: |
+ docker -v
+ docker-compose -v
+ - run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - run:
+ name: Execute Docker Build
+ command: |
+ pyenv versions
+ pyenv global 3.6.10
+ pip install -e arrow/dev/archery[docker]
+ archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }}
+ no_output_timeout: "1h"
+
+workflows:
+ version: 2
+ build:
+ jobs:
+ - build
diff --git a/src/arrow/dev/tasks/docker-tests/github.linux.yml b/src/arrow/dev/tasks/docker-tests/github.linux.yml
new file mode 100644
index 000000000..1faf482cb
--- /dev/null
+++ b/src/arrow/dev/tasks/docker-tests/github.linux.yml
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ test:
+ name: Docker Test
+ runs-on: ubuntu-latest
+ {% if env is defined %}
+ env:
+ {% for key, value in env.items() %}
+ {{ key }}: "{{ value }}"
+ {% endfor %}
+ {% endif %}
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+
+ - name: Execute Docker Build
+ shell: bash
+ run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }} {{ command|default("") }}
+
+ {% if '-r' in image %}
+ - name: Dump R install logs
+ run: cat arrow/r/check/arrow.Rcheck/00install.out
+ continue-on-error: true
+ if: always()
+ {% endif %}
+
+ {% if arrow.branch == 'master' %}
+ {{ macros.github_login_dockerhub()|indent }}
+ - name: Push Docker Image
+ shell: bash
+ run: archery docker push {{ image }}
+ {% endif %}
diff --git a/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb b/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb
new file mode 100644
index 000000000..5f39666bd
--- /dev/null
+++ b/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -0,0 +1,69 @@
+class ApacheArrow < Formula
+ desc "Columnar in-memory analytics layer designed to accelerate big data"
+ homepage "https://arrow.apache.org/"
+ url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.1/apache-arrow-6.0.1.tar.gz"
+ sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
+ license "Apache-2.0"
+ head "https://github.com/apache/arrow.git"
+
+ depends_on "boost" => :build
+ depends_on "cmake" => :build
+ depends_on "llvm" => :build
+ depends_on "brotli"
+ depends_on "glog"
+ depends_on "grpc"
+ depends_on "lz4"
+ depends_on "numpy"
+ depends_on "openssl@1.1"
+ depends_on "protobuf"
+ depends_on "python@3.9"
+ depends_on "rapidjson"
+ depends_on "snappy"
+ depends_on "thrift"
+ depends_on "zstd"
+
+ def install
+ ENV.cxx11
+ # link against system libc++ instead of llvm provided libc++
+ ENV.remove "HOMEBREW_LIBRARY_PATHS", Formula["llvm"].opt_lib
+ args = %W[
+ -DARROW_FLIGHT=ON
+ -DARROW_GANDIVA=ON
+ -DARROW_JEMALLOC=ON
+ -DARROW_MIMALLOC=ON
+ -DARROW_ORC=ON
+ -DARROW_PARQUET=ON
+ -DARROW_PLASMA=ON
+ -DARROW_PROTOBUF_USE_SHARED=ON
+ -DARROW_PYTHON=ON
+ -DARROW_WITH_BZ2=ON
+ -DARROW_WITH_ZLIB=ON
+ -DARROW_WITH_ZSTD=ON
+ -DARROW_WITH_LZ4=ON
+ -DARROW_WITH_SNAPPY=ON
+ -DARROW_WITH_BROTLI=ON
+ -DARROW_INSTALL_NAME_RPATH=OFF
+ -DPython3_EXECUTABLE=#{Formula["python@3.9"].bin/"python3"}
+ ]
+ # Re-enable -DARROW_S3=ON and add back aws-sdk-cpp to depends_on in ARROW-6437
+
+ mkdir "build"
+ cd "build" do
+ system "cmake", "../cpp", *std_cmake_args, *args
+ system "make"
+ system "make", "install"
+ end
+ end
+
+ test do
+ (testpath/"test.cpp").write <<~EOS
+ #include "arrow/api.h"
+ int main(void) {
+ arrow::int64();
+ return 0;
+ }
+ EOS
+ system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test"
+ system "./test"
+ end
+end
diff --git a/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
new file mode 100644
index 000000000..2a77b01fc
--- /dev/null
+++ b/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# https://github.com/autobrew/homebrew-core/blob/master/Formula/apache-arrow.rb
+class ApacheArrow < Formula
+ desc "Columnar in-memory analytics layer designed to accelerate big data"
+ homepage "https://arrow.apache.org/"
+ url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.1/apache-arrow-6.0.1.tar.gz"
+ sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
+ head "https://github.com/apache/arrow.git"
+
+ bottle do
+ cellar :any
+ sha256 "a55211ba6f464681b7ca1b48defdad9cfbe1cf6fad8ff9ec875dc5a3c8f3c5ed" => :el_capitan_or_later
+ root_url "https://autobrew.github.io/bottles"
+ end
+
+ # NOTE: if you add something here, be sure to add to PKG_LIBS in r/tools/autobrew
+ depends_on "boost" => :build
+ depends_on "cmake" => :build
+ depends_on "aws-sdk-cpp"
+ depends_on "lz4"
+ depends_on "snappy"
+ depends_on "thrift"
+ depends_on "zstd"
+
+ def install
+ ENV.cxx11
+ args = %W[
+ -DARROW_BUILD_SHARED=OFF
+ -DARROW_BUILD_UTILITIES=ON
+ -DARROW_COMPUTE=ON
+ -DARROW_CSV=ON
+ -DARROW_DATASET=ON
+ -DARROW_FILESYSTEM=ON
+ -DARROW_HDFS=OFF
+ -DARROW_JEMALLOC=ON
+ -DARROW_JSON=ON
+ -DARROW_MIMALLOC=ON
+ -DARROW_PARQUET=ON
+ -DARROW_PYTHON=OFF
+ -DARROW_S3=ON
+ -DARROW_USE_GLOG=OFF
+ -DARROW_VERBOSE_THIRDPARTY_BUILD=ON
+ -DARROW_WITH_LZ4=ON
+ -DARROW_WITH_SNAPPY=ON
+ -DARROW_WITH_ZLIB=ON
+ -DARROW_WITH_ZSTD=ON
+ -DCMAKE_UNITY_BUILD=OFF
+ -DPARQUET_BUILD_EXECUTABLES=ON
+ -DLZ4_HOME=#{Formula["lz4"].prefix}
+ -DTHRIFT_HOME=#{Formula["thrift"].prefix}
+ ]
+
+ mkdir "build"
+ cd "build" do
+ system "cmake", "../cpp", *std_cmake_args, *args
+ system "make"
+ system "make", "install"
+ end
+ end
+
+ test do
+ (testpath/"test.cpp").write <<~EOS
+ #include "arrow/api.h"
+ int main(void) {
+ arrow::int64();
+ return 0;
+ }
+ EOS
+ system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test"
+ system "./test"
+ end
+end
diff --git a/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml b/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml
new file mode 100644
index 000000000..232cc38a9
--- /dev/null
+++ b/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ autobrew:
+ name: "Autobrew"
+ runs-on: macOS-latest
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Configure homebrew formula for testing
+ env:
+ ARROW_FORMULA: ./arrow/dev/tasks/homebrew-formulae/{{ formula }}
+ run: |
+ # Pin the current commit in the formula to test so that we're not always pulling from master
+ sed -i.bak -E -e 's@https://github.com/apache/arrow.git"$@{{ arrow.remote }}.git", revision: "{{ arrow.head }}"@' $ARROW_FORMULA && rm -f $ARROW_FORMULA.bak
+ # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one
+ sed -i.bak -E -e 's@.git.git@.git@' $ARROW_FORMULA && rm -f $ARROW_FORMULA.bak
+ brew update
+ brew --version
+ brew unlink python@2 || true
+ brew config
+ brew doctor || true
+ cp $ARROW_FORMULA $(brew --repository homebrew/core)/Formula/apache-arrow.rb
+ - name: Test formula
+ run: |
+ brew install -v --HEAD apache-arrow
+ brew test apache-arrow
+ brew audit --strict apache-arrow
diff --git a/src/arrow/dev/tasks/java-jars/README.md b/src/arrow/dev/tasks/java-jars/README.md
new file mode 100644
index 000000000..1d61662d4
--- /dev/null
+++ b/src/arrow/dev/tasks/java-jars/README.md
@@ -0,0 +1,29 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Jars.
+
+This directory is responsible to generate the jar files for the Arrow components that depend on C++ shared libraries to execute.
+
+The Arrow C++ libraries are compiled both on MacOS and Linux distributions, with their dependencies linked statically, and they are added
+in the jars at the end, so the file can be used on both systems.
+
+## Linux Docker Image
+To compile the C++ libraries in Linux, a docker image is used.
+It is created used the **ci/docker/java-bundled-jars.dockerfile** file.
+If it is necessary to add any new dependency, you need to change that file. \ No newline at end of file
diff --git a/src/arrow/dev/tasks/java-jars/github.yml b/src/arrow/dev/tasks/java-jars/github.yml
new file mode 100644
index 000000000..81d31dd4c
--- /dev/null
+++ b/src/arrow/dev/tasks/java-jars/github.yml
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+
+ build-cpp-ubuntu:
+ name: Build C++ Libs Ubuntu
+ runs-on: ubuntu-18.04
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+ - name: Build C++ Libs
+ run: archery docker run java-jni-manylinux-2014
+ - name: Compress into single artifact
+ run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/
+ - name: Upload Artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: ubuntu-shared-lib
+ path: arrow-shared-libs-linux.tar.gz
+ {% if arrow.branch == 'master' %}
+ {{ macros.github_login_dockerhub()|indent }}
+ - name: Push Docker Image
+ shell: bash
+ run: archery docker push java-jni-manylinux-2014
+ {% endif %}
+
+ build-cpp-macos:
+ name: Build C++ Libs MacOS
+ runs-on: macos-latest
+ env:
+ MACOSX_DEPLOYMENT_TARGET: "10.11"
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+ - name: Install Dependencies
+ run: brew bundle --file=arrow/cpp/Brewfile
+ - name: Build C Data Interface lib
+ run: |
+ set -e
+ arrow/ci/scripts/java_cdata_build.sh \
+ $GITHUB_WORKSPACE/arrow \
+ $GITHUB_WORKSPACE/arrow/java-native-build \
+ $GITHUB_WORKSPACE/arrow/java-dist
+ - name: Build C++ Libs
+ run: |
+ set -e
+ arrow/ci/scripts/java_jni_macos_build.sh \
+ $GITHUB_WORKSPACE/arrow \
+ $GITHUB_WORKSPACE/arrow/cpp-build \
+ $GITHUB_WORKSPACE/arrow/java-dist
+ - name: Compress into single artifact
+ run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java-dist/
+ - name: Upload Artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: macos-shared-lib
+ path: arrow-shared-libs-macos.tar.gz
+
+ package-jars:
+ name: Build Jar Files
+ runs-on: macos-latest
+ needs: [build-cpp-macos, build-cpp-ubuntu]
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ - name: Download Linux C++ Libraries
+ uses: actions/download-artifact@v2
+ with:
+ name: ubuntu-shared-lib
+ - name: Download MacOS C++ Library
+ uses: actions/download-artifact@v2
+ with:
+ name: macos-shared-lib
+ - name: Descompress artifacts
+ run: |
+ tar -xvzf arrow-shared-libs-macos.tar.gz
+ tar -xvzf arrow-shared-libs-linux.tar.gz
+ - name: Test that Shared Libraries Exist
+ run: |
+ test -f arrow/java-dist/libarrow_cdata_jni.dylib
+ test -f arrow/java-dist/libarrow_dataset_jni.dylib
+ test -f arrow/java-dist/libgandiva_jni.dylib
+ test -f arrow/java-dist/libarrow_orc_jni.dylib
+ test -f arrow/java-dist/libarrow_cdata_jni.so
+ test -f arrow/java-dist/libarrow_dataset_jni.so
+ test -f arrow/java-dist/libarrow_orc_jni.so
+ test -f arrow/java-dist/libgandiva_jni.so
+ - name: Build Bundled Jar
+ run: |
+ set -e
+ pushd arrow/java
+ mvn versions:set -DnewVersion={{ arrow.no_rc_version }}
+ popd
+ arrow/ci/scripts/java_full_build.sh \
+ $GITHUB_WORKSPACE/arrow \
+ $GITHUB_WORKSPACE/arrow/java-dist
+ {{ macros.github_upload_releases(["arrow/java-dist/*.jar", "arrow/java-dist/*.pom"])|indent }}
diff --git a/src/arrow/dev/tasks/linux-packages/.gitignore b/src/arrow/dev/tasks/linux-packages/.gitignore
new file mode 100644
index 000000000..0e49a90c1
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/.gitignore
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/*/*.tar.gz
+/*/apt/repositories/
+/*/apt/tmp/
+/*/apt/build.sh
+/*/apt/env.sh
+/*/yum/repositories/
+/*/yum/tmp/
+/*/yum/build.sh
+/*/yum/env.sh
+/apt/repositories/
+/yum/repositories/
diff --git a/src/arrow/dev/tasks/linux-packages/README.md b/src/arrow/dev/tasks/linux-packages/README.md
new file mode 100644
index 000000000..cafcc04ed
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/README.md
@@ -0,0 +1,40 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Linux packages for Apache Arrow C++ and GLib
+
+## Requirements
+
+ * Ruby
+ * Docker
+ * Tools to build tar.gz for Apache Arrow C++ and GLib
+
+## How to build .deb packages
+
+```console
+% rake version:update
+% rake apt
+```
+
+## How to build .rpm packages
+
+```console
+% rake version:update
+% rake yum
+```
diff --git a/src/arrow/dev/tasks/linux-packages/Rakefile b/src/arrow/dev/tasks/linux-packages/Rakefile
new file mode 100644
index 000000000..58cd981fe
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/Rakefile
@@ -0,0 +1,249 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "English"
+
+require_relative "../../release/binary-task"
+require_relative "helper"
+
+packages = [
+ "apache-arrow",
+ "apache-arrow-apt-source",
+ "apache-arrow-release",
+]
+
+
+namespace :apt do
+ desc "Build deb packages"
+ task :build do
+ packages.each do |package|
+ cd(package) do
+ ruby("-S", "rake", "apt:build")
+ end
+ end
+ end
+end
+
+namespace :yum do
+ desc "Build RPM packages"
+ task :build do
+ packages.each do |package|
+ cd(package) do
+ ruby("-S", "rake", "yum:build")
+ end
+ end
+ end
+end
+
+namespace :version do
+ desc "Update versions"
+ task :update do
+ packages.each do |package|
+ cd(package) do
+ ruby("-S", "rake", "version:update")
+ end
+ end
+ end
+end
+
+namespace :docker do
+ desc "Pull built images"
+ task :pull do
+ packages.each do |package|
+ cd(package) do
+ ruby("-S", "rake", "docker:pull")
+ end
+ end
+ end
+
+ desc "Push built images"
+ task :push do
+ packages.each do |package|
+ cd(package) do
+ ruby("-S", "rake", "docker:push")
+ end
+ end
+ end
+end
+
+
+class LocalBinaryTask < BinaryTask
+ include Helper::ApacheArrow
+
+ def initialize(packages)
+ @packages = packages
+ super()
+ end
+
+ def define
+ define_apt_test_task
+ define_yum_test_task
+ end
+
+ private
+ def latest_commit_time(git_directory)
+ cd(git_directory) do
+ return Time.iso8601(`git log -n 1 --format=%aI`.chomp).utc
+ end
+ end
+
+ def version
+ @version ||= detect_version(detect_release_time)
+ end
+
+ def resolve_docker_image(target)
+ image = ""
+ target = target.gsub(/\Aamazon-linux/, "amazonlinux")
+ case target
+ when /-(?:arm64|aarch64)\z/
+ target = $PREMATCH
+ image << "arm64v8/"
+ end
+ image << target.gsub(/-/, ":")
+ end
+
+ def verify(target)
+ verify_command_line = [
+ "docker",
+ "run",
+ "--rm",
+ "--log-driver", "none",
+ "--volume", "#{File.expand_path(arrow_source_dir)}:/arrow:delegated",
+ ]
+ if $stdin.tty?
+ verify_command_line << "--interactive"
+ verify_command_line << "--tty"
+ else
+ verify_command_line.concat(["--attach", "STDOUT"])
+ verify_command_line.concat(["--attach", "STDERR"])
+ end
+ verify_command_line << resolve_docker_image(target)
+ case target
+ when /\Adebian-/, /\Aubuntu-/
+ verify_command_line << "/arrow/dev/release/verify-apt.sh"
+ else
+ verify_command_line << "/arrow/dev/release/verify-yum.sh"
+ end
+ verify_command_line << version
+ verify_command_line << "local"
+ sh(*verify_command_line)
+ end
+
+ def apt_test_targets
+ targets = (ENV["APT_TARGETS"] || "").split(",")
+ targets = apt_test_targets_default if targets.empty?
+ targets
+ end
+
+ def apt_test_targets_default
+ # Disable arm64 targets by default for now
+ # because they require some setups on host.
+ [
+ "debian-buster",
+ # "debian-buster-arm64",
+ "debian-bullseye",
+ # "debian-bullseye-arm64",
+ "ubuntu-xenial",
+ # "ubuntu-xenial-arm64",
+ "ubuntu-bionic",
+ # "ubuntu-bionic-arm64",
+ "ubuntu-focal",
+ # "ubuntu-focal-arm64",
+ "ubuntu-hirsute",
+ # "ubuntu-hirsute-arm64",
+ "ubuntu-impish",
+ # "ubuntu-impish-arm64",
+ ]
+ end
+
+ def define_apt_test_task
+ namespace :apt do
+ desc "Test deb packages"
+ task :test do
+ repositories_dir = "apt/repositories"
+ rm_rf(repositories_dir)
+ @packages.each do |package|
+ package_repositories = "#{package}/apt/repositories"
+ next unless File.exist?(package_repositories)
+ sh("rsync", "-a", "#{package_repositories}/", repositories_dir)
+ end
+ Dir.glob("#{repositories_dir}/ubuntu/pool/*") do |code_name_dir|
+ universe_dir = "#{code_name_dir}/universe"
+ next unless File.exist?(universe_dir)
+ mv(universe_dir, "#{code_name_dir}/main")
+ end
+ base_dir = "nonexistent"
+ merged_dir = "apt/merged"
+ apt_update(base_dir, repositories_dir, merged_dir)
+ Dir.glob("#{merged_dir}/*/dists/*") do |dists_code_name_dir|
+ prefix = dists_code_name_dir.split("/")[-3..-1].join("/")
+ mv(Dir.glob("#{dists_code_name_dir}/*Release*"),
+ "#{repositories_dir}/#{prefix}")
+ end
+ apt_test_targets.each do |target|
+ verify(target)
+ end
+ end
+ end
+ end
+
+ def yum_test_targets
+ targets = (ENV["YUM_TARGETS"] || "").split(",")
+ targets = yum_test_targets_default if targets.empty?
+ targets
+ end
+
+ def yum_test_targets_default
+ # Disable aarch64 targets by default for now
+ # because they require some setups on host.
+ [
+ "almalinux-8",
+ # "almalinux-8-aarch64",
+ "amazon-linux-2",
+ # "amazon-linux-2-aarch64",
+ "centos-7",
+ "centos-8",
+ # "centos-8-aarch64",
+ ]
+ end
+
+ def define_yum_test_task
+ namespace :yum do
+ desc "Test RPM packages"
+ task :test do
+ repositories_dir = "yum/repositories"
+ rm_rf(repositories_dir)
+ @packages.each do |package|
+ package_repositories = "#{package}/yum/repositories"
+ next unless File.exist?(package_repositories)
+ sh("rsync", "-a", "#{package_repositories}/", repositories_dir)
+ end
+ rpm_sign(repositories_dir)
+ base_dir = "nonexistent"
+ yum_update(base_dir, repositories_dir)
+ yum_test_targets.each do |target|
+ verify(target)
+ end
+ end
+ end
+ end
+end
+
+local_binary_task = LocalBinaryTask.new(packages)
+local_binary_task.define
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
new file mode 100644
index 000000000..210fa951e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
@@ -0,0 +1,64 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "../helper"
+require_relative "../package-task"
+
+class ApacheArrowAptSourcePackageTask < PackageTask
+ include Helper::ApacheArrow
+
+ def initialize
+ release_time = detect_release_time
+ super("apache-arrow-apt-source",
+ detect_version(release_time),
+ release_time,
+ :rc_build_type => :release)
+ end
+
+ private
+ def define_archive_task
+ file @archive_name do
+ rm_rf(@archive_base_name)
+ mkdir(@archive_base_name)
+ download("https://downloads.apache.org/arrow/KEYS",
+ "#{@archive_base_name}/KEYS")
+ sh("tar", "czf", @archive_name, @archive_base_name)
+ rm_rf(@archive_base_name)
+ end
+
+ if deb_archive_name != @archive_name
+ file deb_archive_name => @archive_name do
+ if @archive_base_name == deb_archive_base_name
+ cp(@archive_name, deb_archive_name)
+ else
+ sh("tar", "xf", @archive_name)
+ mv(@archive_base_name, deb_archive_base_name)
+ sh("tar", "czf", deb_archive_name, deb_archive_base_name)
+ end
+ end
+ end
+ end
+
+ def enable_yum?
+ false
+ end
+end
+
+task = ApacheArrowAptSourcePackageTask.new
+task.define
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile
new file mode 100644
index 000000000..f9541efde
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM debian:bookworm
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
new file mode 100644
index 000000000..b0842a0c0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM debian:bullseye
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
new file mode 100644
index 000000000..0d37f5dee
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM debian:buster
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
new file mode 100644
index 000000000..53e11fb7e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:bionic
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
new file mode 100644
index 000000000..dc902d14d
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:focal
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 000000000..8b6fd7f0e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:hirsute
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile
new file mode 100644
index 000000000..640cd715d
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:impish
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
new file mode 100644
index 000000000..7bcb2ecc9
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
@@ -0,0 +1,2 @@
+etc/apt/sources.list.d/*
+usr/share/keyrings/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
new file mode 100644
index 000000000..27e14c802
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -0,0 +1,11 @@
+apache-arrow-apt-source (6.0.1-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Sutou Kouhei <kou@clear-code.com> Wed, 10 Nov 2021 02:04:45 -0000
+
+apache-arrow-apt-source (6.0.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 21 Oct 2021 19:01:54 -0000
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
new file mode 100644
index 000000000..ec635144f
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
new file mode 100644
index 000000000..f54d52f98
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
@@ -0,0 +1,23 @@
+Source: apache-arrow-apt-source
+Section: misc
+Priority: important
+Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
+Build-Depends:
+ debhelper (>= 9),
+ gnupg,
+ lsb-release
+Standards-Version: 3.9.7
+Homepage: https://arrow.apache.org/
+
+Package: apache-arrow-apt-source
+Section: misc
+Architecture: all
+Replaces: apache-arrow-archive-keyring
+Breaks: apache-arrow-archive-keyring
+Depends:
+ ${misc:Depends},
+ apt-transport-https,
+ gnupg
+Description: GnuPG archive key of the Apache Arrow archive
+ The Apache Arrow project digitally signs its Release files. This
+ package contains the archive key used for that.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
new file mode 100644
index 000000000..274d64ca0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
@@ -0,0 +1,26 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: Apache Arrow
+Upstream-Contact: Apache Arrow Developers <dev@arrow.apache.org>
+
+Files: *
+Copyright: 2016 The Apache Software Foundation
+License: Apache-2.0
+
+License: Apache-2.0
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ .
+ On Debian systems, the full text of the Apache Software License version 2 can
+ be found in the file `/usr/share/common-licenses/Apache-2.0'.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
new file mode 100755
index 000000000..bf7a85c8c
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
@@ -0,0 +1,37 @@
+#!/usr/bin/make -f
+# -*- makefile-gmake -*-
+#
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+# This has to be exported to make some magic below work.
+export DH_OPTIONS
+
+%:
+ dh $@
+
+override_dh_auto_build:
+ gpg \
+ --no-default-keyring \
+ --keyring ./apache-arrow-apt-source.gpg \
+ --import KEYS
+
+ ( \
+ distribution=$$(lsb_release --id --short | tr 'A-Z' 'a-z'); \
+ code_name=$$(lsb_release --codename --short); \
+ echo "Types: deb deb-src"; \
+ echo "URIs: https://apache.jfrog.io/artifactory/arrow/$${distribution}/"; \
+ echo "Suites: $${code_name}"; \
+ echo "Components: main"; \
+ echo "Signed-By: /usr/share/keyrings/apache-arrow-apt-source.gpg"; \
+ ) > apache-arrow.sources
+
+override_dh_install:
+ install -d debian/tmp/usr/share/keyrings/
+ install -m 0644 apache-arrow-apt-source.gpg \
+ debian/tmp/usr/share/keyrings/
+
+ install -d debian/tmp/etc/apt/sources.list.d/
+ install -m 0644 apache-arrow.sources \
+ debian/tmp/etc/apt/sources.list.d/
+
+ dh_install
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
new file mode 100644
index 000000000..163aaf8d8
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile
new file mode 100644
index 000000000..4a341c6f1
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile
@@ -0,0 +1,66 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "../helper"
+require_relative "../package-task"
+
+class ApacheArrowReleasePackageTask < PackageTask
+ include Helper::ApacheArrow
+
+ def initialize
+ release_time = detect_release_time
+ super("apache-arrow-release",
+ detect_version(release_time),
+ release_time,
+ :rc_build_type => :release)
+ end
+
+ private
+ def repo_path
+ "#{yum_dir}/Apache-Arrow.repo"
+ end
+
+ def define_archive_task
+ file @archive_name => [repo_path] do
+ rm_rf(@archive_base_name)
+ mkdir(@archive_base_name)
+ download("https://downloads.apache.org/arrow/KEYS",
+ "#{@archive_base_name}/KEYS")
+ cp(repo_path, @archive_base_name)
+ sh("tar", "czf", @archive_name, @archive_base_name)
+ rm_rf(@archive_base_name)
+ end
+
+ if rpm_archive_name != @archive_name
+ file rpm_archive_name => @archive_name do
+ sh("tar", "xf", @archive_name)
+ rpm_archive_base_name = File.basename(rpm_archive_name, ".tar.gz")
+ mv(@archive_base_name, rpm_archive_base_name)
+ sh("tar", "czf", rpm_archive_name, rpm_archive_base_name)
+ end
+ end
+ end
+
+ def enable_apt?
+ false
+ end
+end
+
+task = ApacheArrowReleasePackageTask.new
+task.define
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
new file mode 100644
index 000000000..0d1666520
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[apache-arrow-almalinux]
+name=Apache Arrow for AlmaLinux $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/almalinux/$releasever/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+
+[apache-arrow-amazon-linux]
+name=Apache Arrow for Amazon Linux $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/amazon-linux/$releasever/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+
+[apache-arrow-centos]
+name=Apache Arrow for CentOS $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+
+[apache-arrow-rhel]
+name=Apache Arrow for RHEL $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile
new file mode 100644
index 000000000..5be1edc3d
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM almalinux:8
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ dnf install --enablerepo=powertools -y ${quiet} \
+ rpmdevtools && \
+ dnf clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 000000000..800df6c68
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amazonlinux:2
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ yum install -y ${quiet} \
+ rpmdevtools && \
+ yum clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
new file mode 100644
index 000000000..594cda373
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -0,0 +1,113 @@
+# -*- sh-shell: rpm -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+%define use_dnf (%{rhel} >= 8)
+%if %{use_dnf}
+%define yum_repository_enable() (dnf config-manager --set-enabled %1)
+%define yum_repository_disable() (dnf config-manager --set-disabled %1)
+%else
+%define yum_repository_enable() (yum-config-manager --enable %1)
+%define yum_repository_disable() (yum-config-manager --disable %1)
+%endif
+
+Name: @PACKAGE@
+Version: @VERSION@
+Release: @RELEASE@%{?dist}
+Summary: Apache Arrow release files
+
+License: Apache-2.0
+URL: https://arrow.apache.org/
+Source0: @PACKAGE@-%{version}.tar.gz
+
+BuildArch: noarch
+
+Requires: epel-release
+%if %{use_dnf}
+Requires: dnf-command(config-manager)
+%else
+Requires: yum-utils
+%endif
+
+%description
+Apache Arrow release files.
+
+%prep
+%setup -q
+
+%build
+distribution=$(. /etc/os-release && echo "${ID}")
+if [ "${distribution}" = "rhel" ]; then
+ # We use distribution version explicitly for RHEL because we can't
+ # use symbolic link on Artifactory. CentOS uses 7 and 8 but RHEL uses
+ # 7Server and 8Server for $releasever. If we can use symbolic link
+ # on Artifactory we can use $releasever directly.
+ distribution_version=$(. /etc/os-release && echo "${VERSION_ID}")
+ sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
+fi
+
+%install
+rm -rf $RPM_BUILD_ROOT
+
+%{__install} -Dp -m0644 KEYS \
+ $RPM_BUILD_ROOT%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+
+%{__install} -d $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d/
+%{__install} -Dp -m0644 Apache-Arrow.repo \
+ $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d/Apache-Arrow.repo
+
+%files
+%defattr(-, root, root, 0755)
+%doc
+%dir %{_sysconfdir}/yum.repos.d/
+%dir %{_sysconfdir}/pki/rpm-gpg/
+%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
+%config(noreplace) %{_sysconfdir}/yum.repos.d/Apache-Arrow.repo
+
+%post
+if grep -q 'Amazon Linux release 2' /etc/system-release 2>/dev/null; then
+ %{yum_repository_enable apache-arrow-amazon-linux}
+elif grep -q 'Red Hat Enterprise Linux' /etc/system-release 2>/dev/null; then
+ %{yum_repository_enable apache-arrow-rhel}
+elif grep -q 'AlmaLinux' /etc/system-release 2>/dev/null; then
+ %{yum_repository_enable apache-arrow-almalinux}
+else
+ %{yum_repository_enable apache-arrow-centos}
+fi
+
+%changelog
+* Wed Nov 10 2021 Sutou Kouhei <kou@clear-code.com> - 6.0.1-1
+- New upstream release.
+
+* Thu Oct 21 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 6.0.0-1
+- New upstream release.
+
+* Mon Jan 18 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 3.0.0-1
+- New upstream release.
+
+* Mon Oct 12 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 2.0.0-1
+- New upstream release.
+
+* Mon Jul 20 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 1.0.0-1
+- New upstream release.
+
+* Thu Apr 16 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.17.0-1
+- New upstream release.
+
+* Thu Jan 30 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.16.0-1
+- New upstream release.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
new file mode 100644
index 000000000..236b0e297
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM centos:7
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ yum install -y ${quiet} \
+ rpmdevtools && \
+ yum clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
new file mode 100644
index 000000000..e36850630
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM centos:8
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ dnf install --enablerepo=powertools -y ${quiet} \
+ rpmdevtools && \
+ dnf clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile
new file mode 100644
index 000000000..9dfae955e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -0,0 +1,162 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "../helper"
+require_relative "../package-task"
+
+class ApacheArrowPackageTask < PackageTask
+ include Helper::ApacheArrow
+
+ def initialize
+ release_time = detect_release_time
+ super("apache-arrow",
+ detect_version(release_time),
+ release_time,
+ :rc_build_type => :release)
+ @rpm_package = "arrow"
+ end
+
+ private
+ def define_archive_task
+ file @archive_name do
+ case @version
+ when /\A\d+\.\d+\.\d+-rc\d+\z/
+ download_rc_archive
+ when /\A\d+\.\d+\.\d+\z/
+ download_released_archive
+ else
+ build_archive
+ end
+ end
+
+ if deb_archive_name != @archive_name
+ file deb_archive_name => @archive_name do
+ cp(@archive_name, deb_archive_name)
+ end
+ end
+
+ if rpm_archive_name != @archive_name
+ file rpm_archive_name => @archive_name do
+ cp(@archive_name, rpm_archive_name)
+ end
+ end
+ end
+
+ def download_rc_archive
+ base_url = "https://dist.apache.org/repos/dist/dev/arrow"
+ archive_name_no_rc = @archive_name.gsub(/-rc\d+(\.tar\.gz)\z/, "\\1")
+ url = "#{base_url}/#{@package}-#{@version}/#{archive_name_no_rc}"
+ download(url, @archive_name)
+ end
+
+ def download_released_archive
+ mirror_base_url = "https://www.apache.org/dyn/closer.lua/arrow"
+ mirror_list_url = "#{mirror_base_url}/arrow-#{@version}/#{@archive_name}"
+ open(mirror_list_url) do |response|
+ if /href="(.+?\/#{Regexp.escape(@archive_name)})"/ =~ response.read
+ download($1, ".")
+ end
+ end
+ end
+
+ def build_archive
+ cd(arrow_source_dir) do
+ sh("git", "archive", "HEAD",
+ "--prefix", "#{@archive_base_name}/",
+ "--output", @full_archive_name)
+ end
+ end
+
+ def apt_arm64_cuda_available_target?(target)
+ false
+ end
+
+ def apt_prepare_debian_control_cuda_architecture(control, target)
+ if apt_arm64_cuda_available_target?(target)
+ cuda_architecture = "any"
+ else
+ cuda_architecture = "i386 amd64"
+ end
+ control.gsub(/@CUDA_ARCHITECTURE@/, cuda_architecture)
+ end
+
+ def apt_prepare_debian_control_c_ares(control, target)
+ case target
+ when /\Aubuntu-bionic/
+ use_system_c_ares = "#"
+ else
+ use_system_c_ares = ""
+ end
+ control.gsub(/@USE_SYSTEM_C_ARES@/, use_system_c_ares)
+ end
+
+ def apt_prepare_debian_control_grpc(control, target)
+ case target
+ when /\Adebian-buster/, /\Aubuntu-(?:bionic|focal)/
+ use_system_grpc = "#"
+ else
+ use_system_grpc = ""
+ end
+ control.gsub(/@USE_SYSTEM_GRPC@/, use_system_grpc)
+ end
+
+ def apt_prepare_debian_control_thrift(control, target)
+ case target
+ when /\Aubuntu-bionic/
+ use_system_thrift = "#"
+ else
+ use_system_thrift = ""
+ end
+ control.gsub(/@USE_SYSTEM_THRIFT@/, use_system_thrift)
+ end
+
+ def apt_prepare_debian_control_utf8proc(control, target)
+ case target
+ when /\Aubuntu-bionic/
+ use_system_utf8proc = "#"
+ else
+ use_system_utf8proc = ""
+ end
+ control.gsub(/@USE_SYSTEM_UTF8PROC@/, use_system_utf8proc)
+ end
+
+ def apt_prepare_debian_control_zstd(control, target)
+ case target
+ when /\Adebian-buster/, /\Aubuntu-bionic/
+ use_system_zstd = "#"
+ else
+ use_system_zstd = ""
+ end
+ control.gsub(/@USE_SYSTEM_ZSTD@/, use_system_zstd)
+ end
+
+ def apt_prepare_debian_control(control_in, target)
+ control = control_in.dup
+ control = apt_prepare_debian_control_cuda_architecture(control, target)
+ control = apt_prepare_debian_control_c_ares(control, target)
+ control = apt_prepare_debian_control_grpc(control, target)
+ control = apt_prepare_debian_control_thrift(control, target)
+ control = apt_prepare_debian_control_utf8proc(control, target)
+ control = apt_prepare_debian_control_zstd(control, target)
+ control
+ end
+end
+
+task = ApacheArrowPackageTask.new
+task.define
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from
new file mode 100644
index 000000000..022eaf290
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/debian:bookworm
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
new file mode 100644
index 000000000..e88ed53ff
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=debian:bookworm
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libprotoc-dev \
+ libprotobuf-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ if apt list | grep '^nvidia-cuda-toolkit/'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ fi && \
+ pip3 install --upgrade meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ apt clean
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from
new file mode 100644
index 000000000..34187b2af
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/debian:bullseye
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
new file mode 100644
index 000000000..2a5b18f5b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=debian:bullseye
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libprotoc-dev \
+ libprotobuf-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ if apt list | grep '^nvidia-cuda-toolkit/'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ fi && \
+ pip3 install --upgrade meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from
new file mode 100644
index 000000000..8da222b86
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/debian:buster
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
new file mode 100644
index 000000000..11a33a130
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=debian:buster
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list
+
+RUN \
+ echo "deb http://deb.debian.org/debian buster-backports main" > \
+ /etc/apt/sources.list.d/backports.list
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ apt install -y -V -t buster-backports ${quiet} \
+ clang-11 \
+ llvm-11-dev && \
+ if apt list | grep '^nvidia-cuda-toolkit/'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ fi && \
+ pip3 install --upgrade meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from
new file mode 100644
index 000000000..c3ba00cf0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:bionic
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
new file mode 100644
index 000000000..ac0c6a58d
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:bionic
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang-10 \
+ cmake \
+ devscripts \
+ fakeroot \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgoogle-glog-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ llvm-10-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ python3-setuptools \
+ python3-wheel \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ (echo "includedir=/usr/include" && \
+ echo "libdir=/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)" && \
+ echo "" && \
+ echo "Name: re2" && \
+ echo "Description: RE2 is a fast, safe, thread-friendly regular expression engine." && \
+ echo "Version: 0.0.0" && \
+ echo "Cflags: -std=c++11 -pthread -I\${includedir}" && \
+ echo "Libs: -pthread -L\${libdir} -lre2") | \
+ tee "/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/pkgconfig/re2.pc" && \
+ if apt list | grep '^nvidia-cuda-toolkit/'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ fi && \
+ apt install -y -V -t bionic-backports ${quiet} \
+ debhelper && \
+ pip3 install --upgrade meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from
new file mode 100644
index 000000000..52ab48b66
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:focal
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
new file mode 100644
index 000000000..112cc1846
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:focal
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ python3-setuptools \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ if apt list | grep '^nvidia-cuda-toolkit/'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ fi && \
+ apt clean && \
+ python3 -m pip install --no-use-pep517 meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
new file mode 100644
index 000000000..f19ea9022
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:hirsute
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 000000000..7e26d3eb2
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:hirsute
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libprotoc-dev \
+ libprotobuf-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ python3-setuptools \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ if apt list | grep -q '^libcuda1'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ else \
+ :; \
+ fi && \
+ apt clean && \
+ python3 -m pip install --no-use-pep517 meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from
new file mode 100644
index 000000000..0c8a1c7a1
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:impish
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
new file mode 100644
index 000000000..2e94dd9b6
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:impish
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ ccache \
+ clang \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libprotoc-dev \
+ libprotobuf-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-dev \
+ python3-numpy \
+ python3-pip \
+ python3-setuptools \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ if apt list | grep -q '^libcuda1'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ else \
+ :; \
+ fi && \
+ apt clean && \
+ python3 -m pip install --no-use-pep517 meson && \
+ ln -s /usr/local/bin/meson /usr/bin/ && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog
new file mode 100644
index 000000000..65948629e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -0,0 +1,123 @@
+apache-arrow (6.0.1-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Sutou Kouhei <kou@clear-code.com> Wed, 10 Nov 2021 02:04:45 -0000
+
+apache-arrow (6.0.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 21 Oct 2021 19:01:54 -0000
+
+apache-arrow (3.0.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 18 Jan 2021 21:33:18 -0000
+
+apache-arrow (2.0.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 12 Oct 2020 23:38:01 -0000
+
+apache-arrow (1.0.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 20 Jul 2020 20:41:07 -0000
+
+apache-arrow (0.17.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 16 Apr 2020 12:05:43 -0000
+
+apache-arrow (0.16.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 30 Jan 2020 20:21:44 -0000
+
+apache-arrow (0.15.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 30 Sep 2019 17:19:02 -0000
+
+apache-arrow (0.14.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Sutou Kouhei <kou@clear-code.com> Fri, 28 Jun 2019 22:22:35 -0000
+
+apache-arrow (0.13.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Thu, 28 Mar 2019 02:24:58 -0000
+
+apache-arrow (0.12.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com> Wed, 16 Jan 2019 03:29:25 -0000
+
+apache-arrow (0.11.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Thu, 04 Oct 2018 00:33:42 -0000
+
+apache-arrow (0.10.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Phillip Cloud <cpcloud@gmail.com> Thu, 02 Aug 2018 23:58:23 -0000
+
+apache-arrow (0.9.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Fri, 16 Mar 2018 16:56:31 -0000
+
+apache-arrow (0.8.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ * Add libarrow-gpu-glib0, libarrow-gpu-glib-dev and gir1.2-arrow-gpu-1.0.
+
+ -- Uwe L. Korn <uwelk@xhochy.com> Sun, 17 Dec 2017 20:24:44 -0000
+
+apache-arrow (0.7.1-2) unstable; urgency=low
+
+ * Add libarrow-gpu0 and libarrow-gpu-dev.
+
+ * Add libarrow-python-dev.
+
+ -- Kouhei Sutou <kou@clear-code.com> Sun, 29 Oct 2017 21:59:13 +0900
+
+apache-arrow (0.7.1-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Wed, 27 Sep 2017 13:19:05 -0000
+
+apache-arrow (0.7.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Wes McKinney <wes.mckinney@twosigma.com> Tue, 12 Sep 2017 22:01:14 -0000
+
+apache-arrow (0.6.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Fri, 11 Aug 2017 21:27:51 -0000
+
+apache-arrow (0.6.0.20170802-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Kouhei Sutou <kou@clear-code.com> Wed, 02 Aug 2017 22:28:18 -0000
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat
new file mode 100644
index 000000000..48082f72f
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat
@@ -0,0 +1 @@
+12
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in
new file mode 100644
index 000000000..6b0332ed2
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -0,0 +1,640 @@
+Source: apache-arrow
+Section: devel
+Priority: optional
+Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
+Build-Depends:
+ cmake,
+ debhelper (>= 12),
+ git,
+ gobject-introspection,
+ gtk-doc-tools,
+ libboost-filesystem-dev,
+ libboost-system-dev,
+ libbrotli-dev,
+ libbz2-dev,
+@USE_SYSTEM_C_ARES@ libc-ares-dev,
+ libcurl4-openssl-dev,
+ libgirepository1.0-dev,
+ libgoogle-glog-dev,
+@USE_SYSTEM_GRPC@ libgrpc++-dev,
+ libgtest-dev,
+ liblz4-dev,
+@USE_SYSTEM_GRPC@ libprotoc-dev,
+@USE_SYSTEM_GRPC@ libprotobuf-dev,
+ libre2-dev,
+ libsnappy-dev,
+ libssl-dev,
+@USE_SYSTEM_THRIFT@ libthrift-dev,
+@USE_SYSTEM_UTF8PROC@ libutf8proc-dev,
+@USE_SYSTEM_ZSTD@ libzstd-dev,
+ ninja-build,
+ nvidia-cuda-toolkit [!arm64],
+ pkg-config,
+@USE_SYSTEM_GRPC@ protobuf-compiler-grpc,
+ python3-dev,
+ python3-numpy,
+ tzdata,
+ zlib1g-dev
+Build-Depends-Indep: libglib2.0-doc
+Standards-Version: 3.9.8
+Homepage: https://arrow.apache.org/
+
+Package: libarrow600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files.
+
+Package: libarrow-cuda600
+Section: libs
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files for CUDA support.
+
+Package: libarrow-dataset600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version}),
+ libparquet600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files for Dataset module.
+
+Package: libarrow-flight600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files for Flight RPC system.
+
+Package: libarrow-python600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version}),
+ python3,
+ python3-numpy
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files for Python support.
+
+Package: libarrow-python-flight600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-flight600 (= ${binary:Version}),
+ libarrow-python600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ library files for Flight and Python support.
+
+Package: libarrow-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow600 (= ${binary:Version}),
+ libbrotli-dev,
+ libbz2-dev,
+@USE_SYSTEM_C_ARES@ libc-ares-dev,
+@USE_SYSTEM_GRPC@ libgrpc++-dev,
+ liblz4-dev,
+ libre2-dev,
+ libsnappy-dev,
+ libssl-dev,
+@USE_SYSTEM_THRIFT@ libthrift-dev,
+@USE_SYSTEM_UTF8PROC@ libutf8proc-dev,
+@USE_SYSTEM_ZSTD@ libzstd-dev,
+@USE_SYSTEM_GRPC@ protobuf-compiler-grpc,
+ zlib1g-dev
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files.
+
+Package: libarrow-cuda-dev
+Section: libdevel
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libarrow-cuda600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files for CUDA support.
+
+Package: libarrow-dataset-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libarrow-dataset600 (= ${binary:Version}),
+ libparquet-dev (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files for dataset module.
+
+Package: libarrow-flight-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libarrow-flight600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files for Flight RPC system.
+
+Package: libarrow-python-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libarrow-python600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files for Python support.
+
+Package: libarrow-python-flight-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-flight-dev (= ${binary:Version}),
+ libarrow-python-dev (= ${binary:Version}),
+ libarrow-python-flight600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides C++ header files for Flight and Python support.
+
+Package: libgandiva600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides C++ library files.
+
+Package: libgandiva-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libgandiva600 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides C++ header files.
+
+Package: libplasma600
+Section: libs
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-cuda600 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ library files to connect plasma-store-server.
+
+Package: plasma-store-server
+Section: utils
+Architecture: @CUDA_ARCHITECTURE@
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libplasma600 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides plasma-store-server.
+
+Package: libplasma-dev
+Section: libdevel
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-cuda-dev (= ${binary:Version}),
+ libplasma600 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides C++ header files.
+
+Package: libparquet600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends}
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides C++ library files to process Apache Parquet format.
+
+Package: libparquet-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dev (= ${binary:Version}),
+ libparquet600 (= ${binary:Version})
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides C++ header files.
+
+Package: libarrow-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files.
+
+Package: gir1.2-arrow-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libarrow-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libglib2.0-dev,
+ libarrow-dev (= ${binary:Version}),
+ libarrow-glib600 (= ${binary:Version}),
+ gir1.2-arrow-1.0 (= ${binary:Version})
+Suggests: libarrow-glib-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files.
+
+Package: libarrow-glib-doc
+Section: doc
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides documentations.
+
+Package: libarrow-cuda-glib600
+Section: libs
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-glib600 (= ${binary:Version}),
+ libarrow-cuda600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files for CUDA support.
+
+Package: gir1.2-arrow-cuda-1.0
+Section: introspection
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files for CUDA support.
+
+Package: libarrow-cuda-glib-dev
+Section: libdevel
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-cuda-dev (= ${binary:Version}),
+ libarrow-glib-dev (= ${binary:Version}),
+ libarrow-cuda-glib600 (= ${binary:Version}),
+ gir1.2-arrow-cuda-1.0 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files for CUDA support.
+
+Package: libarrow-dataset-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-glib600 (= ${binary:Version}),
+ libarrow-dataset600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files for dataset module.
+
+Package: gir1.2-arrow-dataset-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files for dataset module.
+
+Package: libarrow-dataset-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-dataset-dev (= ${binary:Version}),
+ libarrow-glib-dev (= ${binary:Version}),
+ libarrow-dataset-glib600 (= ${binary:Version}),
+ gir1.2-arrow-dataset-1.0 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files for dataset module.
+
+Package: libarrow-dataset-glib-doc
+Section: doc
+Architecture: any
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libarrow-glib-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides documentations for dataset module.
+
+Package: libarrow-flight-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-glib600 (= ${binary:Version}),
+ libarrow-flight600 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files for Apache Arrow Flight.
+
+Package: gir1.2-arrow-flight-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files for Apache Arrow
+ Flight.
+
+Package: libarrow-flight-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-flight-dev (= ${binary:Version}),
+ libarrow-glib-dev (= ${binary:Version}),
+ libarrow-flight-glib600 (= ${binary:Version}),
+ gir1.2-arrow-flight-1.0 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files for Apache Arrow Flight.
+
+Package: libarrow-flight-glib-doc
+Section: doc
+Architecture: any
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libarrow-glib-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides documentations for Apache Arrow Flight.
+
+Package: libgandiva-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-glib600 (= ${binary:Version}),
+ libgandiva600 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GLib based library files.
+
+Package: gir1.2-gandiva-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libgandiva-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libgandiva-dev (= ${binary:Version}),
+ libarrow-glib-dev (= ${binary:Version}),
+ libgandiva-glib600 (= ${binary:Version}),
+ gir1.2-gandiva-1.0 (= ${binary:Version})
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides GLib based header files.
+
+Package: libgandiva-glib-doc
+Section: doc
+Architecture: any
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Gandiva is a toolset for compiling and evaluating expressions
+ on Arrow Data.
+ .
+ This package provides documentations.
+
+Package: libplasma-glib600
+Section: libs
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-cuda-glib600 (= ${binary:Version}),
+ libplasma600 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based library files to connect plasma-store-server.
+
+Package: gir1.2-plasma-1.0
+Section: introspection
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libplasma-glib-dev
+Section: libdevel
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libplasma-dev (= ${binary:Version}),
+ libarrow-cuda-glib-dev (= ${binary:Version}),
+ libplasma-glib600 (= ${binary:Version}),
+ gir1.2-plasma-1.0 (= ${binary:Version})
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides GLib based header files.
+
+Package: libplasma-glib-doc
+Section: doc
+Architecture: @CUDA_ARCHITECTURE@
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Plasma is an in-memory object store and cache for big data.
+ .
+ This package provides documentations.
+
+Package: libparquet-glib600
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+ ${misc:Depends},
+ ${shlibs:Depends},
+ libarrow-glib600 (= ${binary:Version}),
+ libparquet600 (= ${binary:Version})
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides GLib based library files.
+
+Package: gir1.2-parquet-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${gir:Depends},
+ ${misc:Depends}
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides GObject Introspection typelib files.
+
+Package: libparquet-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ libarrow-glib-dev (= ${binary:Version}),
+ libparquet-dev (= ${binary:Version}),
+ libparquet-glib600 (= ${binary:Version}),
+ gir1.2-parquet-1.0 (= ${binary:Version})
+Suggests: libparquet-glib-doc
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides GLib based header files.
+
+Package: libparquet-glib-doc
+Section: doc
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends}
+Recommends: libglib2.0-doc
+Description: Apache Parquet is a columnar storage format
+ .
+ This package provides documentations.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright
new file mode 100644
index 000000000..9db0ea76f
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright
@@ -0,0 +1,193 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: Apache Arrow
+Upstream-Contact: <dev@arrow.apache.org>
+Source: https://dist.apache.org/repos/dist/release/arrow/
+
+Files: *
+Copyright: 2016 The Apache Software Foundation
+License: Apache-2.0
+
+Files: TODO for "This product includes software from the SFrame project"
+Copyright: 2015 Dato, Inc.
+ 2009 Carnegie Mellon University.
+License: BSD-3-clause
+
+Files: TODO for "This product includes software from the Numpy project"
+Copyright: 1995, 1996, 1997 Jim Hugunin, hugunin@mit.edu
+ 2005 Travis E. Oliphant oliphant@ee.byu.edu Brigham Young University
+License: BSD-3-clause
+
+Files: TODO for "This product includes software from the Feather project"
+Copyright: TODO
+License: Apache-2.0
+
+Files: TODO for "This product includes software from the DyND project"
+Copyright: TODO
+License: BSD-2-clause
+
+Files: TODO for "This product includes software from the LLVM project"
+Copyright: 2003-2007 University of Illinois at Urbana-Champaign.
+License: U-OF-I-BSD-LIKE
+
+Files: TODO for "This product includes software from the google-lint project"
+Copyright: 2009 Google Inc. All rights reserved.
+License: BSD-3-clause
+
+Files: TODO for "This product includes software from the mman-win32 project"
+Copyright: 2010 kutuzov.viktor.84
+License: MIT
+
+Files: TODO for "This product includes software from the LevelDB project"
+Copyright: 2011 The LevelDB Authors. All rights reserved.
+License: BSD-3-clause
+
+Files: TODO for "This product includes software from the CMake project"
+Copyright: 2001-2009 Kitware, Inc.
+ 2012-2014 Continuum Analytics, Inc.
+License: BSD-3-clause
+
+Files: TODO for "This product includes software from https://github.com/matthew-brett/multibuild"
+Copyright: 2013-2016, Matt Terry and Matthew Brett; all rights reserved.
+License: BSD-2-clause
+
+Files: TODO for "This product includes software from the Ibis project"
+Copyright: 2015 Cloudera, Inc.
+License: Apache-2.0
+
+Files: TODO for "This product includes code from Apache Kudu"
+Copyright: 2016 The Apache Software Foundation
+License: Apache-2.0
+
+License: Apache-2.0
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ .
+ On Debian systems, the full text of the Apache Software License version 2 can
+ be found in the file `/usr/share/common-licenses/Apache-2.0'.
+
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ .
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+License: BSD-2-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ .
+ 1) Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ .
+ 2) Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+License: U-OF-I-BSD-LIKE
+ ==============================================================================
+ LLVM Release License
+ ==============================================================================
+ University of Illinois/NCSA
+ Open Source License
+ .
+ Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
+ All rights reserved.
+ .
+ Developed by:
+ .
+ LLVM Team
+ .
+ University of Illinois at Urbana-Champaign
+ .
+ http://llvm.org
+ .
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
+ this software and associated documentation files (the "Software"), to deal with
+ the Software without restriction, including without limitation the rights to
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is furnished to do
+ so, subject to the following conditions:
+ .
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+ .
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+ .
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+ .
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+ SOFTWARE.
+
+License: MIT
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ .
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ .
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
new file mode 100644
index 000000000..e0197fcd3
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Arrow-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install
new file mode 100644
index 000000000..ef0d9f56f
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/ArrowCUDA-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install
new file mode 100644
index 000000000..27091dab3
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/ArrowDataset-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install
new file mode 100644
index 000000000..0433b367a
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Gandiva-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install
new file mode 100644
index 000000000..13fde6681
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Parquet-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install
new file mode 100644
index 000000000..7b7ce2158
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install
@@ -0,0 +1 @@
+usr/lib/*/girepository-1.0/Plasma-1.0.typelib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
new file mode 100644
index 000000000..77e0b70f6
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
@@ -0,0 +1,6 @@
+usr/lib/*/cmake/arrow/ArrowCUDAConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowCUDATargets*.cmake
+usr/lib/*/cmake/arrow/FindArrowCUDA.cmake
+usr/lib/*/libarrow_cuda.a
+usr/lib/*/libarrow_cuda.so
+usr/lib/*/pkgconfig/arrow-cuda.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
new file mode 100644
index 000000000..778ae5fd7
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/arrow-cuda-glib/
+usr/lib/*/libarrow-cuda-glib.so
+usr/lib/*/pkgconfig/arrow-cuda-glib.pc
+usr/share/gir-1.0/ArrowCUDA-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
new file mode 100644
index 000000000..a6d637526
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-cuda-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
new file mode 100644
index 000000000..5ae464687
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_cuda.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
new file mode 100644
index 000000000..53e727ae0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
@@ -0,0 +1,6 @@
+usr/lib/*/cmake/arrow/ArrowDatasetConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowDatasetTargets*.cmake
+usr/lib/*/cmake/arrow/FindArrowDataset.cmake
+usr/lib/*/libarrow_dataset.a
+usr/lib/*/libarrow_dataset.so
+usr/lib/*/pkgconfig/arrow-dataset.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
new file mode 100644
index 000000000..4c50bde97
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/arrow-dataset-glib/
+usr/lib/*/libarrow-dataset-glib.so
+usr/lib/*/pkgconfig/arrow-dataset-glib.pc
+usr/share/gir-1.0/ArrowDataset-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
new file mode 100644
index 000000000..5ec8156b0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: arrow-dataset-glib
+Title: Apache Arrow Dataset GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Arrow Dataset GLib provides an API to read and write semantic datasets stored in different locations and formats that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/arrow-dataset-glib/index.html
+Files: /usr/share/gtk-doc/html/arrow-dataset-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
new file mode 100644
index 000000000..523bc206e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/arrow-dataset-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
new file mode 100644
index 000000000..3d880362b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/arrow-dataset-glib usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-dataset-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-dataset-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
new file mode 100644
index 000000000..10085f3a0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-dataset-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
new file mode 100644
index 000000000..014634165
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_dataset.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
new file mode 100644
index 000000000..ccd0c4e5b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
@@ -0,0 +1,19 @@
+usr/include/arrow/
+usr/lib/*/cmake/arrow/ArrowConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowOptions.cmake
+usr/lib/*/cmake/arrow/ArrowTargets*.cmake
+usr/lib/*/cmake/arrow/Find*Alt.cmake
+usr/lib/*/cmake/arrow/FindArrow.cmake
+usr/lib/*/cmake/arrow/FindBrotli.cmake
+usr/lib/*/cmake/arrow/FindLz4.cmake
+usr/lib/*/cmake/arrow/Find[Suz]*.cmake
+usr/lib/*/cmake/arrow/arrow-config.cmake
+usr/lib/*/libarrow.a
+usr/lib/*/libarrow.so
+usr/lib/*/libarrow_bundled_dependencies.a
+usr/lib/*/pkgconfig/arrow-compute.pc
+usr/lib/*/pkgconfig/arrow-csv.pc
+usr/lib/*/pkgconfig/arrow-filesystem.pc
+usr/lib/*/pkgconfig/arrow-json.pc
+usr/lib/*/pkgconfig/arrow-orc.pc
+usr/lib/*/pkgconfig/arrow.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
new file mode 100644
index 000000000..20ca33d84
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
@@ -0,0 +1,6 @@
+usr/lib/*/cmake/arrow/ArrowFlightConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowFlightTargets*.cmake
+usr/lib/*/cmake/arrow/FindArrowFlight.cmake
+usr/lib/*/libarrow_flight.a
+usr/lib/*/libarrow_flight.so
+usr/lib/*/pkgconfig/arrow-flight.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
new file mode 100644
index 000000000..8a8dee3ac
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/arrow-flight-glib/
+usr/lib/*/libarrow-flight-glib.so
+usr/lib/*/pkgconfig/arrow-flight-glib.pc
+usr/share/gir-1.0/ArrowFlight-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
new file mode 100644
index 000000000..94b17c11b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: arrow-flight-glib
+Title: Apache Arrow Flight GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Arrow Flight GLib provides a general-purpose client-server framework to simplify high performance transport of large datasets over network interfaces.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/arrow-flight-glib/index.html
+Files: /usr/share/gtk-doc/html/arrow-flight-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
new file mode 100644
index 000000000..3c95f17ed
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/arrow-flight-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
new file mode 100644
index 000000000..d55c89a1b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
new file mode 100644
index 000000000..a6156ed94
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-flight-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
new file mode 100644
index 000000000..abdb96d4c
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_flight.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
new file mode 100644
index 000000000..f6de7eedb
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
@@ -0,0 +1,6 @@
+usr/include/arrow-glib/
+usr/lib/*/libarrow-glib.so
+usr/lib/*/pkgconfig/arrow-glib.pc
+usr/lib/*/pkgconfig/arrow-orc-glib.pc
+usr/share/arrow-glib/example/
+usr/share/gir-1.0/Arrow-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
new file mode 100644
index 000000000..8ae4ffb6d
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: arrow-glib
+Title: Apache Arrow GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Arrow GLib is a data processing library for analysis that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/arrow-glib/index.html
+Files: /usr/share/gtk-doc/html/arrow-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
new file mode 100644
index 000000000..912a29c58
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
@@ -0,0 +1,2 @@
+usr/share/doc/arrow-glib/
+usr/share/gtk-doc/html/arrow-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
new file mode 100644
index 000000000..556987d0a
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/arrow-glib usr/share/doc/libarrow-glib-doc/arrow-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
new file mode 100644
index 000000000..ec369d153
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
new file mode 100644
index 000000000..807583f98
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
@@ -0,0 +1,6 @@
+usr/lib/*/cmake/arrow/ArrowPythonConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowPythonTargets*.cmake
+usr/lib/*/cmake/arrow/FindArrowPython.cmake
+usr/lib/*/libarrow_python.a
+usr/lib/*/libarrow_python.so
+usr/lib/*/pkgconfig/arrow-python.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
new file mode 100644
index 000000000..6cf96e227
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
@@ -0,0 +1,6 @@
+usr/lib/*/cmake/arrow/ArrowPythonFlightConfig*.cmake
+usr/lib/*/cmake/arrow/ArrowPythonFlightTargets*.cmake
+usr/lib/*/cmake/arrow/FindArrowPythonFlight.cmake
+usr/lib/*/libarrow_python_flight.a
+usr/lib/*/libarrow_python_flight.so
+usr/lib/*/pkgconfig/arrow-python-flight.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
new file mode 100644
index 000000000..b7cbfec1f
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_python_flight.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
new file mode 100644
index 000000000..eef3e6648
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow_python.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
new file mode 100644
index 000000000..98ef2139c
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
new file mode 100644
index 000000000..26e7e76fb
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
@@ -0,0 +1,7 @@
+usr/include/gandiva/
+usr/lib/*/cmake/arrow/GandivaConfig*.cmake
+usr/lib/*/cmake/arrow/GandivaTargets*.cmake
+usr/lib/*/cmake/arrow/FindGandiva.cmake
+usr/lib/*/libgandiva.a
+usr/lib/*/libgandiva.so
+usr/lib/*/pkgconfig/gandiva.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
new file mode 100644
index 000000000..fe7d8bb79
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/gandiva-glib/
+usr/lib/*/libgandiva-glib.so
+usr/lib/*/pkgconfig/gandiva-glib.pc
+usr/share/gir-1.0/Gandiva-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
new file mode 100644
index 000000000..2bf913062
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: gandiva-glib
+Title: Gandiva GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Gandiva GLib is a toolset for compiling and evaluating expressions on Arrow Data that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/gandiva-glib/index.html
+Files: /usr/share/gtk-doc/html/gandiva-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
new file mode 100644
index 000000000..358e4e5c7
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/gandiva-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
new file mode 100644
index 000000000..234794e23
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/gandiva-glib usr/share/doc/libgandiva-glib-doc/gandiva-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libgandiva-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libgandiva-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
new file mode 100644
index 000000000..6257fd438
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libgandiva-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
new file mode 100644
index 000000000..1475f49cf
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
@@ -0,0 +1 @@
+usr/lib/*/libgandiva.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
new file mode 100644
index 000000000..e163115f0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
@@ -0,0 +1,7 @@
+usr/include/parquet/
+usr/lib/*/cmake/arrow/ParquetConfig*.cmake
+usr/lib/*/cmake/arrow/ParquetTargets*.cmake
+usr/lib/*/cmake/arrow/FindParquet.cmake
+usr/lib/*/libparquet.a
+usr/lib/*/libparquet.so
+usr/lib/*/pkgconfig/parquet.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
new file mode 100644
index 000000000..9cce737a7
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/parquet-glib/
+usr/lib/*/libparquet-glib.so
+usr/lib/*/pkgconfig/parquet-glib.pc
+usr/share/gir-1.0/Parquet-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
new file mode 100644
index 000000000..cc68e2df6
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: parquet-glib
+Title: Apache Parquet GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Parquet GLib is a columnar storage format processing library that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/parquet-glib/index.html
+Files: /usr/share/gtk-doc/html/parquet-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
new file mode 100644
index 000000000..5843ea3da
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/parquet-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
new file mode 100644
index 000000000..c31f346b1
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/parquet-glib usr/share/doc/libparquet-glib-doc/parquet-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libparquet-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libparquet-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
new file mode 100644
index 000000000..1c0e44199
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libparquet-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
new file mode 100644
index 000000000..540a91d5e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
@@ -0,0 +1 @@
+usr/lib/*/libparquet.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
new file mode 100644
index 000000000..c315d4dfc
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
@@ -0,0 +1,7 @@
+usr/include/plasma/
+usr/lib/*/cmake/arrow/PlasmaConfig*.cmake
+usr/lib/*/cmake/arrow/PlasmaTargets*.cmake
+usr/lib/*/cmake/arrow/FindPlasma.cmake
+usr/lib/*/libplasma.a
+usr/lib/*/libplasma.so
+usr/lib/*/pkgconfig/plasma.pc
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
new file mode 100644
index 000000000..7800681d2
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/plasma-glib/
+usr/lib/*/libplasma-glib.so
+usr/lib/*/pkgconfig/plasma-glib.pc
+usr/share/gir-1.0/Plasma-1.0.gir
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
new file mode 100644
index 000000000..a9d306d8b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: plasma-glib
+Title: Plasma GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Plasma GLib is an in-memory object store and cache for big data that uses GLib.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/plasma-glib/index.html
+Files: /usr/share/gtk-doc/html/plasma-glib/*.html
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
new file mode 100644
index 000000000..ad13b94cd
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/plasma-glib
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
new file mode 100644
index 000000000..193262f9b
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/plasma-glib usr/share/doc/libplasma-glib-doc/plasma-glib
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libplasma-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libplasma-glib-doc/gobject
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
new file mode 100644
index 000000000..339bcca3e
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma-glib.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
new file mode 100644
index 000000000..f8a744b65
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
@@ -0,0 +1 @@
+usr/lib/*/libplasma.so.*
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
new file mode 100644
index 000000000..bd13b0e81
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
@@ -0,0 +1 @@
+usr/bin/plasma-store-server
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules
new file mode 100755
index 000000000..2de533615
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -0,0 +1,104 @@
+#!/usr/bin/make -f
+# -*- makefile-gmake -*-
+#
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+# This has to be exported to make some magic below work.
+export DH_OPTIONS
+
+export DEB_BUILD_MAINT_OPTIONS=reproducible=-timeless
+
+BUILD_TYPE=release
+
+%:
+ dh $@ --with gir
+
+override_dh_auto_configure:
+ if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \
+ ARROW_CUDA=ON; \
+ ARROW_PLASMA=ON; \
+ else \
+ ARROW_CUDA=OFF; \
+ ARROW_PLASMA=OFF; \
+ fi; \
+ dh_auto_configure \
+ --sourcedirectory=cpp \
+ --builddirectory=cpp_build \
+ --buildsystem=cmake+ninja \
+ -- \
+ -DARROW_CUDA=$${ARROW_CUDA} \
+ -DARROW_FLIGHT=ON \
+ -DARROW_GANDIVA=ON \
+ -DARROW_GANDIVA_JAVA=OFF \
+ -DARROW_MIMALLOC=ON \
+ -DARROW_ORC=ON \
+ -DARROW_PACKAGE_KIND=deb \
+ -DARROW_PARQUET=ON \
+ -DARROW_PLASMA=$${ARROW_PLASMA} \
+ -DARROW_PYTHON=ON \
+ -DARROW_S3=ON \
+ -DARROW_USE_CCACHE=OFF \
+ -DARROW_WITH_BROTLI=ON \
+ -DARROW_WITH_BZ2=ON \
+ -DARROW_WITH_LZ4=ON \
+ -DARROW_WITH_SNAPPY=ON \
+ -DARROW_WITH_ZLIB=ON \
+ -DARROW_WITH_ZSTD=ON \
+ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
+ -DCMAKE_UNITY_BUILD=ON \
+ -DPARQUET_REQUIRE_ENCRYPTION=ON \
+ -DPythonInterp_FIND_VERSION=ON \
+ -DPythonInterp_FIND_VERSION_MAJOR=3
+
+override_dh_auto_build:
+ dh_auto_build \
+ --sourcedirectory=cpp \
+ --builddirectory=cpp_build
+ dh_auto_configure \
+ --sourcedirectory=c_glib \
+ --builddirectory=c_glib_build \
+ --buildsystem=meson+ninja \
+ -- \
+ -Darrow_cpp_build_type=$(BUILD_TYPE) \
+ -Darrow_cpp_build_dir=../cpp_build \
+ -Dgtk_doc=true
+ env \
+ LD_LIBRARY_PATH=$(CURDIR)/cpp_build/$(BUILD_TYPE) \
+ dh_auto_build \
+ --sourcedirectory=c_glib \
+ --builddirectory=c_glib_build \
+ --buildsystem=meson+ninja
+
+override_dh_auto_install:
+ dh_auto_install \
+ --sourcedirectory=c_glib \
+ --builddirectory=c_glib_build \
+ --buildsystem=meson+ninja
+ # Remove built files to reduce disk usage
+ dh_auto_clean \
+ --sourcedirectory=c_glib \
+ --builddirectory=c_glib_build \
+ --buildsystem=meson+ninja
+
+ dh_auto_install \
+ --sourcedirectory=cpp \
+ --builddirectory=cpp_build
+ # Remove built files to reduce disk usage
+ dh_auto_clean \
+ --sourcedirectory=cpp \
+ --builddirectory=cpp_build
+
+override_dh_auto_test:
+ # TODO: We need Boost 1.64 or later to build tests for
+ # Apache Arrow Flight.
+ # git clone --depth 1 https://github.com/apache/arrow-testing.git
+ # git clone --depth 1 https://github.com/apache/parquet-testing.git
+ # cd cpp_build && \
+ # env \
+ # ARROW_TEST_DATA=$(CURDIR)/arrow-testing/data \
+ # PARQUET_TEST_DATA=$(CURDIR)/parquet-testing/data \
+ # ctest --exclude-regex 'arrow-cuda-test|plasma-client_tests'
+
+# skip file failing with "Unknown DWARF DW_OP_255" (see bug#949296)
+override_dh_dwz:
+ dh_dwz --exclude=libgandiva.so
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format
new file mode 100644
index 000000000..163aaf8d8
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch
new file mode 100644
index 000000000..5cb3f0091
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch
@@ -0,0 +1,2 @@
+version=3
+https://dist.apache.org/repos/dist/release/arrow/arrow-(.+)/apache-arrow-(.+).tar.gz
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from
new file mode 100644
index 000000000..b570587e0
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/almalinux:8
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
new file mode 100644
index 000000000..003b26747
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=almalinux:8
+FROM ${FROM}
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ dnf install -y ${quiet} epel-release && \
+ dnf install --enablerepo=powertools -y ${quiet} \
+ bison \
+ boost-devel \
+ brotli-devel \
+ bzip2-devel \
+ c-ares-devel \
+ ccache \
+ clang \
+ cmake \
+ curl-devel \
+ flex \
+ gcc-c++ \
+ gflags-devel \
+ git \
+ glog-devel \
+ gobject-introspection-devel \
+ gtk-doc \
+ libarchive \
+ libzstd-devel \
+ llvm-devel \
+ llvm-static \
+ lz4-devel \
+ make \
+ ncurses-devel \
+ ninja-build \
+ openssl-devel \
+ pkg-config \
+ python3 \
+ python3-devel \
+ python3-numpy \
+ python3-pip \
+ re2-devel \
+ # rapidjson-devel \
+ rpmdevtools \
+ snappy-devel \
+ tar \
+ # utf8proc-devel \
+ zlib-devel && \
+ dnf clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 000000000..6ada89729
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=amazonlinux:2
+FROM ${FROM}
+
+COPY qemu-* /usr/bin/
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ yum update -y ${quiet} && \
+ amazon-linux-extras install -y epel && \
+ yum install -y ${quiet} \
+ bison \
+ brotli-devel \
+ bzip2-devel \
+ ccache \
+ cmake3 \
+ flex \
+ gcc-c++ \
+ git \
+ glog-devel \
+ gobject-introspection-devel \
+ gtk-doc \
+ lz4-devel \
+ make \
+ openssl-devel \
+ pkg-config \
+ rapidjson-devel \
+ rpmdevtools \
+ snappy-devel \
+ tar \
+ utf8proc-devel \
+ zlib-devel && \
+ # Install ninja-build dependencies in amzn2-core
+ yum install -y ${quiet} ninja-build && \
+ # Install ninja-build from EPEL because ninja-build in amzn2-core is old.
+ yum install -y ${quiet} --disablerepo=amzn2-core ninja-build && \
+ yum clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
new file mode 100755
index 000000000..279d89545
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
@@ -0,0 +1,33 @@
+#!/usr/bin/env sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
+# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
+# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
+# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
+#
+# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
+# the "COPY" is failed. It means that we always require "qemu*" even if we
+# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
+# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
+# need.
+#
+# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
+# Azure Pipelines uses old Ubuntu (18.04).
+# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
new file mode 100644
index 000000000..3a8f71ecc
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -0,0 +1,892 @@
+# -*- sh-shell: rpm -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+%define _amzn %{?amzn:%{amzn}}%{!?amzn:0}
+%define is_amazon_linux (%{_amzn} != 0)
+
+%define boost_version %( \
+ if [ %{rhel} -eq 7 ]; then \
+ echo 169; \
+ fi)
+%define cmake_version %( \
+ if [ %{rhel} -lt 8 ]; then \
+ echo 3; \
+ fi)
+%define python_version %( \
+ if [ %{rhel} -eq 7 ]; then \
+ echo 36; \
+ else \
+ echo 3; \
+ fi)
+
+%define lz4_requirement %( \
+ if [ %{_amzn} -eq 0 ]; then \
+ echo ">= 1.8.0"; \
+ fi)
+
+%define use_boost (!%{is_amazon_linux})
+%define use_flight (%{rhel} >= 8)
+%define use_gandiva (%{rhel} >= 8 && %{_arch} != "aarch64")
+%define use_gflags (!%{is_amazon_linux})
+%define use_mimalloc (%{rhel} >= 8)
+%define use_python (!%{is_amazon_linux})
+# TODO: Enable this. This works on local but is fragile on GitHub Actions and
+# Travis CI.
+# %define use_s3 (%{rhel} >= 8)
+%define use_s3 0
+
+%define have_rapidjson (%{rhel} == 7)
+%define have_re2 (%{rhel} >= 8)
+# EPEL ships utf8proc but it's old.
+# %define have_utf8proc (%{rhel} == 7)
+%define have_utf8proc 0
+%define have_zstd (!%{is_amazon_linux})
+
+Name: @PACKAGE@
+Version: @VERSION@
+Release: @RELEASE@%{?dist}
+Summary: Apache Arrow is a data processing library for analysis
+
+License: Apache-2.0
+URL: https://arrow.apache.org/
+Source0: https://dist.apache.org/repos/dist/release/@PACKAGE@/@PACKAGE@-%{version}/apache-@PACKAGE@-%{version}.tar.gz
+
+BuildRequires: bison
+%if %{use_boost}
+BuildRequires: boost%{boost_version}-devel
+%endif
+BuildRequires: brotli-devel
+BuildRequires: bzip2-devel
+%if %{use_flight}
+BuildRequires: c-ares-devel
+%endif
+BuildRequires: cmake%{cmake_version}
+%if %{use_s3}
+BuildRequires: curl-devel
+%endif
+BuildRequires: flex
+BuildRequires: gcc-c++
+%if %{use_gflags}
+BuildRequires: gflags-devel
+%endif
+BuildRequires: git
+BuildRequires: glog-devel
+%if %{have_zstd}
+BuildRequires: libzstd-devel
+%endif
+BuildRequires: lz4-devel %{lz4_requirement}
+BuildRequires: ninja-build
+BuildRequires: openssl-devel
+BuildRequires: pkgconfig
+%if %{use_python}
+BuildRequires: python%{python_version}-devel
+BuildRequires: python%{python_version}-numpy
+%endif
+%if %{have_rapidjson}
+BuildRequires: rapidjson-devel
+%endif
+%if %{have_re2}
+BuildRequires: re2-devel
+%endif
+BuildRequires: snappy-devel
+%if %{have_utf8proc}
+BuildRequires: utf8proc-devel
+%endif
+BuildRequires: zlib-devel
+
+%if %{use_gandiva}
+BuildRequires: llvm-devel
+BuildRequires: ncurses-devel
+%endif
+
+BuildRequires: gobject-introspection-devel
+BuildRequires: gtk-doc
+
+%description
+Apache Arrow is a data processing library for analysis.
+
+%prep
+%setup -q -n apache-@PACKAGE@-%{version}
+
+%build
+cpp_build_type=release
+mkdir cpp/build
+cd cpp/build
+%cmake3 .. \
+ -DARROW_CSV=ON \
+ -DARROW_DATASET=ON \
+%if %{use_flight}
+ -DARROW_FLIGHT=ON \
+%endif
+%if %{use_gandiva}
+ -DARROW_GANDIVA=ON \
+%endif
+ -DARROW_HDFS=ON \
+ -DARROW_JSON=ON \
+%if %{use_mimalloc}
+ -DARROW_MIMALLOC=ON \
+%endif
+ -DARROW_ORC=ON \
+ -DARROW_PACKAGE_KIND=rpm \
+ -DARROW_PARQUET=ON \
+ -DARROW_PLASMA=ON \
+%if %{use_python}
+ -DARROW_PYTHON=ON \
+%endif
+%if %{use_s3}
+ -DARROW_S3=ON \
+%endif
+ -DARROW_WITH_BROTLI=ON \
+ -DARROW_WITH_BZ2=ON \
+ -DARROW_WITH_LZ4=ON \
+ -DARROW_WITH_SNAPPY=ON \
+ -DARROW_WITH_ZLIB=ON \
+ -DARROW_WITH_ZSTD=ON \
+ -DCMAKE_BUILD_TYPE=$cpp_build_type \
+ -DARROW_USE_CCACHE=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=ON \
+%if %{use_python}
+ -DPythonInterp_FIND_VERSION=ON \
+ -DPythonInterp_FIND_VERSION_MAJOR=3 \
+%endif
+ -GNinja
+
+ninja %{?_smp_mflags}
+cd -
+
+cd c_glib
+pip3 install meson
+meson setup build \
+ --default-library=both \
+ --libdir=%{_libdir} \
+ --prefix=%{_prefix} \
+ -Darrow_cpp_build_dir=../cpp/build \
+ -Darrow_cpp_build_type=$cpp_build_type \
+ -Dgtk_doc=true
+LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type \
+ ninja -C build %{?_smp_mflags}
+cd -
+
+%install
+cpp_build_type=release
+
+cd c_glib
+DESTDIR=$RPM_BUILD_ROOT ninja -C build install
+ninja -C build clean
+cd -
+
+cd cpp/build
+DESTDIR=$RPM_BUILD_ROOT ninja install
+ninja clean
+cd -
+
+%package libs
+Summary: Runtime libraries for Apache Arrow C++
+License: Apache-2.0
+Requires: brotli
+%if %{use_gflags}
+Requires: gflags
+%endif
+Requires: glog
+%if %{have_zstd}
+Requires: libzstd
+%endif
+Requires: lz4 %{lz4_requirement}
+%if %{have_re2}
+Requires: re2
+%endif
+Requires: snappy
+%if %{have_utf8proc}
+Requires: utf8proc
+%endif
+Requires: zlib
+
+%description libs
+This package contains the libraries for Apache Arrow C++.
+
+%files libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_docdir}/arrow/
+%{_libdir}/libarrow.so.*
+
+%package devel
+Summary: Libraries and header files for Apache Arrow C++
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+Requires: brotli-devel
+Requires: bzip2-devel
+%if %{use_flight}
+Requires: c-ares-devel
+%endif
+%if %{have_zstd}
+Requires: libzstd-devel
+%endif
+Requires: lz4-devel %{lz4_requirement}
+Requires: openssl-devel
+%if %{have_rapidjson}
+Requires: rapidjson-devel
+%endif
+%if %{have_re2}
+Requires: re2-devel
+%endif
+Requires: snappy-devel
+%if %{have_utf8proc}
+Requires: utf8proc-devel
+%endif
+Requires: zlib-devel
+
+%description devel
+Libraries and header files for Apache Arrow C++.
+
+%files devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow/
+%exclude %{_includedir}/arrow/dataset/
+%if %{use_flight}
+%exclude %{_includedir}/arrow/flight/
+%endif
+%exclude %{_includedir}/arrow/python/
+%{_libdir}/cmake/arrow/ArrowConfig*.cmake
+%{_libdir}/cmake/arrow/ArrowOptions.cmake
+%{_libdir}/cmake/arrow/ArrowTargets*.cmake
+%{_libdir}/cmake/arrow/FindArrow.cmake
+%{_libdir}/cmake/arrow/FindBrotli.cmake
+%{_libdir}/cmake/arrow/FindLz4.cmake
+%{_libdir}/cmake/arrow/FindSnappy.cmake
+%if %{use_flight}
+%{_libdir}/cmake/arrow/Findc-aresAlt.cmake
+%endif
+%if %{have_re2}
+%{_libdir}/cmake/arrow/Findre2Alt.cmake
+%endif
+%if %{have_utf8proc}
+%{_libdir}/cmake/arrow/Findutf8proc.cmake
+%endif
+%if %{have_zstd}
+%{_libdir}/cmake/arrow/Findzstd.cmake
+%endif
+%{_libdir}/cmake/arrow/arrow-config.cmake
+%{_libdir}/libarrow.a
+%{_libdir}/libarrow.so
+%{_libdir}/libarrow_bundled_dependencies.a
+%{_libdir}/pkgconfig/arrow-compute.pc
+%{_libdir}/pkgconfig/arrow-csv.pc
+%{_libdir}/pkgconfig/arrow-filesystem.pc
+%{_libdir}/pkgconfig/arrow-json.pc
+%{_libdir}/pkgconfig/arrow-orc.pc
+%{_libdir}/pkgconfig/arrow.pc
+
+%package dataset-libs
+Summary: C++ library to read and write semantic datasets stored in different locations and formats
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+
+%description dataset-libs
+This package contains the libraries for Apache Arrow dataset.
+
+%files dataset-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow_dataset.so.*
+
+%package dataset-devel
+Summary: Libraries and header files for Apache Arrow dataset.
+License: Apache-2.0
+Requires: %{name}-dataset-libs = %{version}-%{release}
+
+%description dataset-devel
+Libraries and header files for Apache Arrow dataset.
+
+%files dataset-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow/dataset/
+%{_libdir}/cmake/arrow/ArrowDatasetConfig*.cmake
+%{_libdir}/cmake/arrow/ArrowDatasetTargets*.cmake
+%{_libdir}/cmake/arrow/FindArrowDataset.cmake
+%{_libdir}/libarrow_dataset.a
+%{_libdir}/libarrow_dataset.so
+%{_libdir}/pkgconfig/arrow-dataset.pc
+
+%if %{use_flight}
+%package flight-libs
+Summary: C++ library for fast data transport.
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+%if %{use_flight}
+Requires: c-ares
+%endif
+Requires: openssl
+
+%description flight-libs
+This package contains the libraries for Apache Arrow Flight.
+
+%files flight-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow_flight.so.*
+
+%package flight-devel
+Summary: Libraries and header files for Apache Arrow Flight.
+License: Apache-2.0
+Requires: %{name}-flight-libs = %{version}-%{release}
+
+%description flight-devel
+Libraries and header files for Apache Arrow Flight.
+
+%files flight-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow/flight/
+%{_libdir}/cmake/arrow/ArrowFlightConfig*.cmake
+%{_libdir}/cmake/arrow/ArrowFlightTargets*.cmake
+%{_libdir}/cmake/arrow/FindArrowFlight.cmake
+%{_libdir}/libarrow_flight.a
+%{_libdir}/libarrow_flight.so
+%{_libdir}/pkgconfig/arrow-flight.pc
+%endif
+
+%if %{use_gandiva}
+%package -n gandiva-libs
+Summary: C++ library for compiling and evaluating expressions on Apache Arrow data.
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+Requires: ncurses-libs
+
+%description -n gandiva-libs
+This package contains the libraries for Gandiva.
+
+%files -n gandiva-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libgandiva.so.*
+
+%package -n gandiva-devel
+Summary: Libraries and header files for Gandiva.
+License: Apache-2.0
+Requires: gandiva-libs = %{version}-%{release}
+Requires: llvm-devel
+
+%description -n gandiva-devel
+Libraries and header files for Gandiva.
+
+%files -n gandiva-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/gandiva/
+%{_libdir}/cmake/arrow/GandivaConfig*.cmake
+%{_libdir}/cmake/arrow/GandivaTargets*.cmake
+%{_libdir}/cmake/arrow/FindGandiva.cmake
+%{_libdir}/libgandiva.a
+%{_libdir}/libgandiva.so
+%{_libdir}/pkgconfig/gandiva.pc
+%endif
+
+%if %{use_python}
+%package python-libs
+Summary: Python integration library for Apache Arrow
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+Requires: python%{python_version}-numpy
+
+%description python-libs
+This package contains the Python integration library for Apache Arrow.
+
+%files python-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow_python.so.*
+
+%package python-devel
+Summary: Libraries and header files for Python integration library for Apache Arrow
+License: Apache-2.0
+Requires: %{name}-devel = %{version}-%{release}
+Requires: %{name}-libs = %{version}-%{release}
+Requires: python%{python_version}-devel
+
+%description python-devel
+Libraries and header files for Python integration library for Apache Arrow.
+
+%files python-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow/python/
+%exclude %{_includedir}/arrow/python/flight.h
+%{_libdir}/cmake/arrow/ArrowPythonConfig*.cmake
+%{_libdir}/cmake/arrow/ArrowPythonTargets*.cmake
+%{_libdir}/cmake/arrow/FindArrowPython.cmake
+%{_libdir}/libarrow_python.a
+%{_libdir}/libarrow_python.so
+%{_libdir}/pkgconfig/arrow-python.pc
+
+%if %{use_flight}
+%package python-flight-libs
+Summary: Python integration library for Apache Arrow Flight
+License: Apache-2.0
+Requires: %{name}-flight-libs = %{version}-%{release}
+Requires: %{name}-python-libs = %{version}-%{release}
+
+%description python-flight-libs
+This package contains the Python integration library for Apache Arrow Flight.
+
+%files python-flight-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow_python_flight.so.*
+
+%package python-flight-devel
+Summary: Libraries and header files for Python integration library for Apache Arrow Flight.
+License: Apache-2.0
+Requires: %{name}-flight-devel = %{version}-%{release}
+Requires: %{name}-python-devel = %{version}-%{release}
+Requires: %{name}-python-flight-libs = %{version}-%{release}
+
+%description python-flight-devel
+Libraries and header files for Python integration library for
+Apache Arrow Flight.
+
+%files python-flight-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow/python/flight.h
+%{_libdir}/cmake/arrow/ArrowPythonFlightConfig*.cmake
+%{_libdir}/cmake/arrow/ArrowPythonFlightTargets*.cmake
+%{_libdir}/cmake/arrow/FindArrowPythonFlight.cmake
+%{_libdir}/libarrow_python_flight.a
+%{_libdir}/libarrow_python_flight.so
+%{_libdir}/pkgconfig/arrow-python-flight.pc
+%endif
+%endif
+
+%package -n plasma-libs
+Summary: Runtime libraries for Plasma in-memory object store
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+
+%description -n plasma-libs
+This package contains the libraries for Plasma in-memory object store.
+
+%files -n plasma-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libplasma.so.*
+
+%package -n plasma-store-server
+Summary: Server for Plasma in-memory object store
+License: Apache-2.0
+Requires: plasma-libs = %{version}-%{release}
+
+%description -n plasma-store-server
+This package contains the server for Plasma in-memory object store.
+
+%files -n plasma-store-server
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_bindir}/plasma-store-server
+
+%package -n plasma-devel
+Summary: Libraries and header files for Plasma in-memory object store
+License: Apache-2.0
+Requires: plasma-libs = %{version}-%{release}
+
+%description -n plasma-devel
+Libraries and header files for Plasma in-memory object store.
+
+%files -n plasma-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/plasma/
+%{_libdir}/cmake/arrow/PlasmaConfig*.cmake
+%{_libdir}/cmake/arrow/PlasmaTargets*.cmake
+%{_libdir}/cmake/arrow/FindPlasma.cmake
+%{_libdir}/libplasma.a
+%{_libdir}/libplasma.so
+%{_libdir}/pkgconfig/plasma*.pc
+
+%package -n parquet-libs
+Summary: Runtime libraries for Apache Parquet C++
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+Requires: openssl
+
+%description -n parquet-libs
+This package contains the libraries for Apache Parquet C++.
+
+%files -n parquet-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libparquet.so.*
+
+%package -n parquet-devel
+Summary: Libraries and header files for Apache Parquet C++
+License: Apache-2.0
+Requires: parquet-libs = %{version}-%{release}
+Requires: zlib-devel
+
+%description -n parquet-devel
+Libraries and header files for Apache Parquet C++.
+
+%files -n parquet-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/parquet/
+%{_libdir}/cmake/arrow/ParquetConfig*.cmake
+%{_libdir}/cmake/arrow/ParquetTargets*.cmake
+%{_libdir}/cmake/arrow/FindParquet.cmake
+%{_libdir}/libparquet.a
+%{_libdir}/libparquet.so
+%{_libdir}/pkgconfig/parquet*.pc
+
+%package glib-libs
+Summary: Runtime libraries for Apache Arrow GLib
+License: Apache-2.0
+Requires: %{name}-libs = %{version}-%{release}
+Requires: glib2
+
+%description glib-libs
+This package contains the libraries for Apache Arrow GLib.
+
+%files glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow-glib.so.*
+%{_datadir}/gir-1.0/Arrow-1.0.gir
+
+%package glib-devel
+Summary: Libraries and header files for Apache Arrow GLib
+License: Apache-2.0
+Requires: %{name}-devel = %{version}-%{release}
+Requires: glib2-devel
+Requires: gobject-introspection-devel
+
+%description glib-devel
+Libraries and header files for Apache Arrow GLib.
+
+%files glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow-glib/
+%{_libdir}/libarrow-glib.a
+%{_libdir}/libarrow-glib.so
+%{_libdir}/pkgconfig/arrow-glib.pc
+%{_libdir}/pkgconfig/arrow-orc-glib.pc
+%{_libdir}/girepository-1.0/Arrow-1.0.typelib
+%{_datadir}/arrow-glib/example/
+
+%package glib-doc
+Summary: Documentation for Apache Arrow GLib
+License: Apache-2.0
+
+%description glib-doc
+Documentation for Apache Arrow GLib.
+
+%files glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_docdir}/arrow-glib/
+%{_datadir}/gtk-doc/html/arrow-glib/
+
+%package dataset-glib-libs
+Summary: Runtime libraries for Apache Arrow Dataset GLib
+License: Apache-2.0
+Requires: %{name}-dataset-libs = %{version}-%{release}
+Requires: %{name}-glib-libs = %{version}-%{release}
+
+%description dataset-glib-libs
+This package contains the libraries for Apache Arrow Dataset GLib.
+
+%files dataset-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow-dataset-glib.so.*
+%{_datadir}/gir-1.0/ArrowDataset-1.0.gir
+
+%package dataset-glib-devel
+Summary: Libraries and header files for Apache Arrow Dataset GLib
+License: Apache-2.0
+Requires: %{name}-dataset-devel = %{version}-%{release}
+Requires: %{name}-glib-devel = %{version}-%{release}
+
+%description dataset-glib-devel
+Libraries and header files for Apache Arrow Dataset GLib.
+
+%files dataset-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow-dataset-glib/
+%{_libdir}/libarrow-dataset-glib.a
+%{_libdir}/libarrow-dataset-glib.so
+%{_libdir}/pkgconfig/arrow-dataset-glib.pc
+%{_libdir}/girepository-1.0/ArrowDataset-1.0.typelib
+
+%package dataset-glib-doc
+Summary: Documentation for Apache Arrow Dataset GLib
+License: Apache-2.0
+
+%description dataset-glib-doc
+Documentation for Apache Arrow dataset GLib.
+
+%files dataset-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/arrow-dataset-glib/
+
+%if %{use_flight}
+%package flight-glib-libs
+Summary: Runtime libraries for Apache Arrow Flight GLib
+License: Apache-2.0
+Requires: %{name}-flight-libs = %{version}-%{release}
+Requires: %{name}-glib-libs = %{version}-%{release}
+
+%description flight-glib-libs
+This package contains the libraries for Apache Arrow Flight GLib.
+
+%files flight-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow-flight-glib.so.*
+%{_datadir}/gir-1.0/ArrowFlight-1.0.gir
+
+%package flight-glib-devel
+Summary: Libraries and header files for Apache Arrow Flight GLib
+License: Apache-2.0
+Requires: %{name}-flight-devel = %{version}-%{release}
+Requires: %{name}-glib-devel = %{version}-%{release}
+
+%description flight-glib-devel
+Libraries and header files for Apache Arrow Flight GLib.
+
+%files flight-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow-flight-glib/
+%{_libdir}/libarrow-flight-glib.a
+%{_libdir}/libarrow-flight-glib.so
+%{_libdir}/pkgconfig/arrow-flight-glib.pc
+%{_libdir}/girepository-1.0/ArrowFlight-1.0.typelib
+
+%package flight-glib-doc
+Summary: Documentation for Apache Arrow Flight GLib
+License: Apache-2.0
+
+%description flight-glib-doc
+Documentation for Apache Arrow Flight GLib.
+
+%files flight-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/arrow-flight-glib/
+%endif
+
+%if %{use_gandiva}
+%package -n gandiva-glib-libs
+Summary: Runtime libraries for Gandiva GLib
+License: Apache-2.0
+Requires: gandiva-libs = %{version}-%{release}
+Requires: %{name}-glib-libs = %{version}-%{release}
+
+%description -n gandiva-glib-libs
+This package contains the libraries for Gandiva GLib.
+
+%files -n gandiva-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libgandiva-glib.so.*
+%{_datadir}/gir-1.0/Gandiva-1.0.gir
+
+%package -n gandiva-glib-devel
+Summary: Libraries and header files for Gandiva GLib
+License: Apache-2.0
+Requires: gandiva-devel = %{version}-%{release}
+Requires: %{name}-glib-devel = %{version}-%{release}
+
+%description -n gandiva-glib-devel
+Libraries and header files for Gandiva GLib.
+
+%files -n gandiva-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/gandiva-glib/
+%{_libdir}/libgandiva-glib.a
+%{_libdir}/libgandiva-glib.so
+%{_libdir}/pkgconfig/gandiva-glib.pc
+%{_libdir}/girepository-1.0/Gandiva-1.0.typelib
+
+%package -n gandiva-glib-doc
+Summary: Documentation for Gandiva GLib
+License: Apache-2.0
+
+%description -n gandiva-glib-doc
+Documentation for Gandiva GLib.
+
+%files -n gandiva-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/gandiva-glib/
+%endif
+
+%package -n plasma-glib-libs
+Summary: Runtime libraries for Plasma GLib
+License: Apache-2.0
+Requires: plasma-libs = %{version}-%{release}
+Requires: %{name}-glib-libs = %{version}-%{release}
+
+%description -n plasma-glib-libs
+This package contains the libraries for Plasma GLib.
+
+%files -n plasma-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libplasma-glib.so.*
+%{_datadir}/gir-1.0/Plasma-1.0.gir
+
+%package -n plasma-glib-devel
+Summary: Libraries and header files for Plasma GLib
+License: Apache-2.0
+Requires: plasma-devel = %{version}-%{release}
+Requires: %{name}-glib-devel = %{version}-%{release}
+
+%description -n plasma-glib-devel
+Libraries and header files for Plasma GLib.
+
+%files -n plasma-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/plasma-glib/
+%{_libdir}/libplasma-glib.a
+%{_libdir}/libplasma-glib.so
+%{_libdir}/pkgconfig/plasma-glib.pc
+%{_libdir}/girepository-1.0/Plasma-1.0.typelib
+
+%package -n plasma-glib-doc
+Summary: Documentation for Plasma GLib
+License: Apache-2.0
+
+%description -n plasma-glib-doc
+Documentation for Plasma GLib.
+
+%files -n plasma-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/plasma-glib/
+
+%package -n parquet-glib-libs
+Summary: Runtime libraries for Apache Parquet GLib
+License: Apache-2.0
+Requires: parquet-libs = %{version}-%{release}
+Requires: %{name}-glib-libs = %{version}-%{release}
+
+%description -n parquet-glib-libs
+This package contains the libraries for Apache Parquet GLib.
+
+%files -n parquet-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libparquet-glib.so.*
+%{_datadir}/gir-1.0/Parquet-1.0.gir
+
+%package -n parquet-glib-devel
+Summary: Libraries and header files for Apache Parquet GLib
+License: Apache-2.0
+Requires: parquet-devel = %{version}-%{release}
+Requires: %{name}-glib-devel = %{version}-%{release}
+
+%description -n parquet-glib-devel
+Libraries and header files for Apache Parquet GLib.
+
+%files -n parquet-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/parquet-glib/
+%{_libdir}/libparquet-glib.a
+%{_libdir}/libparquet-glib.so
+%{_libdir}/pkgconfig/parquet-glib.pc
+%{_libdir}/girepository-1.0/Parquet-1.0.typelib
+
+%package -n parquet-glib-doc
+Summary: Documentation for Apache Parquet GLib
+License: Apache-2.0
+
+%description -n parquet-glib-doc
+Documentation for Apache Parquet GLib.
+
+%files -n parquet-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/parquet-glib/
+
+%changelog
+* Wed Nov 10 2021 Sutou Kouhei <kou@clear-code.com> - 6.0.1-1
+- New upstream release.
+
+* Thu Oct 21 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 6.0.0-1
+- New upstream release.
+
+* Mon Jan 18 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 3.0.0-1
+- New upstream release.
+
+* Mon Oct 12 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 2.0.0-1
+- New upstream release.
+
+* Mon Jul 20 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 1.0.0-1
+- New upstream release.
+
+* Thu Apr 16 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.17.0-1
+- New upstream release.
+
+* Thu Jan 30 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.16.0-1
+- New upstream release.
+
+* Mon Sep 30 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.15.0-1
+- New upstream release.
+
+* Fri Jun 28 2019 Sutou Kouhei <kou@clear-code.com> - 0.14.0-1
+- New upstream release.
+
+* Thu Mar 28 2019 Kouhei Sutou <kou@clear-code.com> - 0.13.0-1
+- New upstream release.
+
+* Wed Jan 16 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.12.0-1
+- New upstream release.
+
+* Thu Oct 04 2018 Kouhei Sutou <kou@clear-code.com> - 0.11.0-1
+- New upstream release.
+
+* Thu Aug 02 2018 Phillip Cloud <cpcloud@gmail.com> - 0.10.0-1
+- New upstream release.
+
+* Fri Mar 16 2018 Kouhei Sutou <kou@clear-code.com> - 0.9.0-1
+- New upstream release.
+
+* Sun Dec 17 2017 Uwe Korn <uwelk@xhochy.com> - 0.8.0-1
+- New upstream release.
+
+* Wed Sep 27 2017 Kouhei Sutou <kou@clear-code.com> - 0.7.1-1
+- New upstream release.
+
+* Tue Sep 12 2017 Wes McKinney <wes.mckinney@twosigma.com> - 0.7.0-1
+- New upstream release.
+
+* Fri Aug 11 2017 Kouhei Sutou <kou@clear-code.com> - 0.6.0-1
+- New upstream release.
+
+* Wed Aug 02 2017 Kouhei Sutou <kou@clear-code.com> - 0.6.0.20170802-1
+- New upstream release.
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
new file mode 100644
index 000000000..6856e3854
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=centos:7
+FROM ${FROM}
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ yum update -y ${quiet} && \
+ yum install -y ${quiet} epel-release && \
+ yum install -y ${quiet} \
+ bison \
+ boost169-devel \
+ brotli-devel \
+ bzip2-devel \
+ ccache \
+ cmake3 \
+ flex \
+ gcc-c++ \
+ gflags-devel \
+ git \
+ glog-devel \
+ gobject-introspection-devel \
+ gtk-doc \
+ libzstd-devel \
+ lz4-devel \
+ make \
+ ninja-build \
+ openssl-devel \
+ pkg-config \
+ python36 \
+ python36-devel \
+ python36-numpy \
+ rapidjson-devel \
+ rpmdevtools \
+ snappy-devel \
+ tar \
+ zlib-devel && \
+ yum clean ${quiet} all
+
+ENV \
+ BOOST_INCLUDEDIR=/usr/include/boost169 \
+ BOOST_LIBRARYDIR=/usr/lib64/boost169
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from
new file mode 100644
index 000000000..587ce9d4a
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/centos:8
diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
new file mode 100644
index 000000000..ad145c4ee
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=centos:8
+FROM ${FROM}
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+ dnf install -y ${quiet} epel-release && \
+ dnf install --enablerepo=powertools -y ${quiet} \
+ bison \
+ boost-devel \
+ brotli-devel \
+ bzip2-devel \
+ c-ares-devel \
+ ccache \
+ clang \
+ cmake \
+ curl-devel \
+ flex \
+ gcc-c++ \
+ gflags-devel \
+ git \
+ glog-devel \
+ gobject-introspection-devel \
+ gtk-doc \
+ libarchive \
+ libzstd-devel \
+ llvm-devel \
+ llvm-static \
+ lz4-devel \
+ make \
+ ncurses-devel \
+ ninja-build \
+ openssl-devel \
+ pkg-config \
+ python3 \
+ python3-devel \
+ python3-numpy \
+ python3-pip \
+ re2-devel \
+ # rapidjson-devel \
+ rpmdevtools \
+ snappy-devel \
+ tar \
+ # utf8proc-devel \
+ zlib-devel && \
+ dnf clean ${quiet} all
diff --git a/src/arrow/dev/tasks/linux-packages/apt/build.sh b/src/arrow/dev/tasks/linux-packages/apt/build.sh
new file mode 100755
index 000000000..a54567a5c
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/apt/build.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# -*- sh-indentation: 2; sh-basic-offset: 2 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+LANG=C
+
+set -u
+
+run()
+{
+ "$@"
+ if test $? -ne 0; then
+ echo "Failed $@"
+ exit 1
+ fi
+}
+
+. /host/env.sh
+
+distribution=$(lsb_release --id --short | tr 'A-Z' 'a-z')
+code_name=$(lsb_release --codename --short)
+case "${distribution}" in
+ debian)
+ component=main
+ ;;
+ ubuntu)
+ component=universe
+ ;;
+esac
+architecture=$(dpkg-architecture -q DEB_BUILD_ARCH)
+
+debuild_options=()
+dpkg_buildpackage_options=(-us -uc)
+
+run mkdir -p /build
+run cd /build
+find . -not -path ./ccache -a -not -path "./ccache/*" -delete
+if which ccache > /dev/null 2>&1; then
+ export CCACHE_COMPILERCHECK=content
+ export CCACHE_COMPRESS=1
+ export CCACHE_COMPRESSLEVEL=6
+ export CCACHE_DIR="${PWD}/ccache"
+ export CCACHE_MAXSIZE=500M
+ ccache --show-stats
+ debuild_options+=(-eCCACHE_COMPILERCHECK)
+ debuild_options+=(-eCCACHE_COMPRESS)
+ debuild_options+=(-eCCACHE_COMPRESSLEVEL)
+ debuild_options+=(-eCCACHE_DIR)
+ debuild_options+=(-eCCACHE_MAXSIZE)
+ if [ -d /usr/lib/ccache ] ;then
+ debuild_options+=(--prepend-path=/usr/lib/ccache)
+ fi
+fi
+run cp /host/tmp/${PACKAGE}-${VERSION}.tar.gz \
+ ${PACKAGE}_${VERSION}.orig.tar.gz
+run tar xfz ${PACKAGE}_${VERSION}.orig.tar.gz
+case "${VERSION}" in
+ *~dev*)
+ run mv ${PACKAGE}-$(echo $VERSION | sed -e 's/~dev/-dev/') \
+ ${PACKAGE}-${VERSION}
+ ;;
+ *~rc*)
+ run mv ${PACKAGE}-$(echo $VERSION | sed -r -e 's/~rc[0-9]+//') \
+ ${PACKAGE}-${VERSION}
+ ;;
+esac
+run cd ${PACKAGE}-${VERSION}/
+platform="${distribution}-${code_name}"
+if [ -d "/host/tmp/debian.${platform}-${architecture}" ]; then
+ run cp -rp "/host/tmp/debian.${platform}-${architecture}" debian
+elif [ -d "/host/tmp/debian.${platform}" ]; then
+ run cp -rp "/host/tmp/debian.${platform}" debian
+else
+ run cp -rp "/host/tmp/debian" debian
+fi
+: ${DEB_BUILD_OPTIONS:="parallel=$(nproc)"}
+# DEB_BUILD_OPTIONS="${DEB_BUILD_OPTIONS} noopt"
+export DEB_BUILD_OPTIONS
+if [ "${DEBUG:-no}" = "yes" ]; then
+ run debuild "${debuild_options[@]}" "${dpkg_buildpackage_options[@]}"
+else
+ run debuild "${debuild_options[@]}" "${dpkg_buildpackage_options[@]}" > /dev/null
+fi
+if which ccache > /dev/null 2>&1; then
+ ccache --show-stats
+fi
+run cd -
+
+repositories="/host/repositories"
+package_initial=$(echo "${PACKAGE}" | sed -e 's/\(.\).*/\1/')
+pool_dir="${repositories}/${distribution}/pool/${code_name}/${component}/${package_initial}/${PACKAGE}"
+run mkdir -p "${pool_dir}/"
+run \
+ find . \
+ -maxdepth 1 \
+ -type f \
+ -not -path '*.build' \
+ -not -path '*.buildinfo' \
+ -exec cp '{}' "${pool_dir}/" ';'
+
+run chown -R "$(stat --format "%u:%g" "${repositories}")" "${repositories}"
diff --git a/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml b/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml
new file mode 100644
index 000000000..557c4ab41
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ package:
+ name: Package
+ runs-on: ubuntu-20.04
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_login_dockerhub()|indent }}
+
+ - name: Set up Ruby
+ uses: ruby/setup-ruby@v1
+ with:
+ ruby-version: '3.0'
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Cache ccache
+ uses: actions/cache@v2
+ with:
+ path: arrow/dev/tasks/linux-packages/apache-arrow/{{ task_namespace }}/build/{{ target }}/ccache
+ key: linux-{{ task_namespace }}-ccache-{{ target }}-{{ "${{ hashFiles('arrow/cpp/**') }}" }}
+ restore-keys: linux-{{ task_namespace }}-ccache-{{ target }}-
+ - name: Build
+ run: |
+ set -e
+ pushd arrow/dev/tasks/linux-packages
+ rake version:update
+ rake docker:pull || :
+ rake --trace {{ task_namespace }}:build BUILD_DIR=build
+ sudo rm -rf */*/build
+ popd
+ env:
+ APT_TARGETS: {{ target }}
+ ARROW_VERSION: {{ arrow.version }}
+ REPO: {{ '${{ secrets.REPO }}' }}
+ YUM_TARGETS: {{ target }}
+ - uses: actions/upload-artifact@v2
+ with:
+ name: packages
+ path: packages/*/{{ task_namespace }}/repositories/
+ - name: Docker Push
+ continue-on-error: true
+ shell: bash
+ run: |
+ pushd arrow/dev/tasks/linux-packages
+ rake docker:push
+ popd
+ env:
+ APT_TARGETS: {{ target }}
+ REPO: {{ '${{ secrets.REPO }}' }}
+ YUM_TARGETS: {{ target }}
+ - name: Set up test
+ run: |
+ set -e
+ sudo apt update
+ # We can install createrepo_c by package with Ubuntu 22.04.
+ # createrepo_c \
+ sudo apt install -y \
+ apt-utils \
+ devscripts \
+ gpg \
+ rpm
+ gem install apt-dists-merge
+ (echo "Key-Type: RSA"; \
+ echo "Key-Length: 4096"; \
+ echo "Name-Real: Test"; \
+ echo "Name-Email: test@example.com"; \
+ echo "%no-protection") | \
+ gpg --full-generate-key --batch
+ GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10)
+ echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV}
+ gpg --export --armor test@example.com > arrow/dev/tasks/linux-packages/KEYS
+ # We can install createrepo_c by package with Ubuntu 22.04.
+ # This is workaround:
+ - name: Install createrepo_c
+ run: |
+ sudo apt install -y \
+ cmake \
+ libbz2-dev \
+ libcurl4-openssl-dev \
+ libglib2.0-dev \
+ liblzma-dev \
+ libmagic-dev \
+ librpm-dev \
+ libsqlite3-dev \
+ libssl-dev \
+ libxml2-dev \
+ libzstd-dev \
+ pkg-config \
+ zlib1g-dev
+ git clone --depth 1 https://github.com/rpm-software-management/createrepo_c.git
+ pushd createrepo_c
+ /usr/bin/cmake \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DENABLE_BASHCOMP=OFF \
+ -DENABLE_DRPM=OFF \
+ -DENABLE_PYTHON=OFF \
+ -DWITH_LIBMODULEMD=OFF \
+ -DWITH_ZCHUNK=OFF \
+ .
+ make -j$(nproc)
+ sudo make install
+ popd
+ rm -rf createrepo_c
+ - name: Test
+ run: |
+ set -e
+ pushd arrow/dev/tasks/linux-packages
+ rake --trace {{ task_namespace }}:test
+ rm -rf {{ task_namespace }}/repositories
+ popd
+ env:
+ APT_TARGETS: {{ target }}
+ ARROW_VERSION: {{ arrow.version }}
+ YUM_TARGETS: {{ target }}
+
+ {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %}
+ {{ macros.github_upload_releases(patterns)|indent }}
diff --git a/src/arrow/dev/tasks/linux-packages/helper.rb b/src/arrow/dev/tasks/linux-packages/helper.rb
new file mode 100644
index 000000000..30ac3b898
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/helper.rb
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Helper
+ module ApacheArrow
+ private
+ def detect_release_time
+ release_time_env = ENV["ARROW_RELEASE_TIME"]
+ if release_time_env
+ Time.parse(release_time_env).utc
+ else
+ latest_commit_time(arrow_source_dir) || Time.now.utc
+ end
+ end
+
+ def arrow_source_dir
+ File.join(__dir__, "..", "..", "..")
+ end
+
+ def detect_version(release_time)
+ version_env = ENV["ARROW_VERSION"]
+ return version_env if version_env
+
+ pom_xml_path = File.join(arrow_source_dir, "java", "pom.xml")
+ pom_xml_content = File.read(pom_xml_path)
+ version = pom_xml_content[/^ <version>(.+?)<\/version>/, 1]
+ formatted_release_time = release_time.strftime("%Y%m%d")
+ version.gsub(/-SNAPSHOT\z/) {"-dev#{formatted_release_time}"}
+ end
+
+ def detect_env(name)
+ value = ENV[name]
+ return value if value and not value.empty?
+
+ dot_env_path = File.join(arrow_source_dir, ".env")
+ File.open(dot_env_path) do |dot_env|
+ dot_env.each_line do |line|
+ case line.chomp
+ when /\A#{Regexp.escape(name)}=(.*)/
+ return $1
+ end
+ end
+ end
+ raise "Failed to detect #{name} environment variable"
+ end
+
+ def detect_repo
+ detect_env("REPO")
+ end
+
+ def docker_image(os, architecture)
+ architecture ||= "amd64"
+ "#{detect_repo}:#{architecture}-#{os}-package-#{@package}"
+ end
+ end
+end
diff --git a/src/arrow/dev/tasks/linux-packages/package-task.rb b/src/arrow/dev/tasks/linux-packages/package-task.rb
new file mode 100644
index 000000000..9dd1b2d93
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/package-task.rb
@@ -0,0 +1,645 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "English"
+require "open-uri"
+require "time"
+
+class PackageTask
+ include Rake::DSL
+
+ def initialize(package, version, release_time, options={})
+ @package = package
+ @version = version
+ @release_time = release_time
+
+ @archive_base_name = "#{@package}-#{@version}"
+ @archive_name = "#{@archive_base_name}.tar.gz"
+ @full_archive_name = File.expand_path(@archive_name)
+
+ @rpm_package = @package
+ case @version
+ when /-((dev|rc)\d+)\z/
+ base_version = $PREMATCH
+ sub_version = $1
+ type = $2
+ if type == "rc" and options[:rc_build_type] == :release
+ @deb_upstream_version = base_version
+ @deb_archive_base_name_version = base_version
+ @rpm_version = base_version
+ @rpm_release = "1"
+ else
+ @deb_upstream_version = "#{base_version}~#{sub_version}"
+ @deb_archive_base_name_version = @version
+ @rpm_version = base_version
+ @rpm_release = "0.#{sub_version}"
+ end
+ else
+ @deb_upstream_version = @version
+ @deb_archive_base_name_version = @version
+ @rpm_version = @version
+ @rpm_release = "1"
+ end
+ @deb_release = ENV["DEB_RELEASE"] || "1"
+ end
+
+ def define
+ define_dist_task
+ define_apt_task
+ define_yum_task
+ define_version_task
+ define_docker_tasks
+ end
+
+ private
+ def env_value(name)
+ value = ENV[name]
+ raise "Specify #{name} environment variable" if value.nil?
+ value
+ end
+
+ def debug_build?
+ ENV["DEBUG"] != "no"
+ end
+
+ def git_directory?(directory)
+ candidate_paths = [".git", "HEAD"]
+ candidate_paths.any? do |candidate_path|
+ File.exist?(File.join(directory, candidate_path))
+ end
+ end
+
+ def latest_commit_time(git_directory)
+ return nil unless git_directory?(git_directory)
+ cd(git_directory) do
+ return Time.iso8601(`git log -n 1 --format=%aI`.chomp).utc
+ end
+ end
+
+ def download(url, output_path)
+ if File.directory?(output_path)
+ base_name = url.split("/").last
+ output_path = File.join(output_path, base_name)
+ end
+ absolute_output_path = File.expand_path(output_path)
+
+ unless File.exist?(absolute_output_path)
+ mkdir_p(File.dirname(absolute_output_path))
+ rake_output_message "Downloading... #{url}"
+ open_url(url) do |downloaded_file|
+ File.open(absolute_output_path, "wb") do |output_file|
+ IO.copy_stream(downloaded_file, output_file)
+ end
+ end
+ end
+
+ absolute_output_path
+ end
+
+ def open_url(url, &block)
+ URI(url).open(&block)
+ end
+
+ def substitute_content(content)
+ content.gsub(/@(.+?)@/) do |matched|
+ yield($1, matched)
+ end
+ end
+
+ def docker_image(os, architecture)
+ image = "#{@package}-#{os}"
+ image << "-#{architecture}" if architecture
+ image
+ end
+
+ def docker_run(os, architecture, console: false)
+ id = os
+ id = "#{id}-#{architecture}" if architecture
+ image = docker_image(os, architecture)
+ build_command_line = [
+ "docker",
+ "build",
+ "--cache-from", image,
+ "--tag", image,
+ ]
+ run_command_line = [
+ "docker",
+ "run",
+ "--rm",
+ "--log-driver", "none",
+ "--volume", "#{Dir.pwd}:/host:rw",
+ ]
+ if $stdin.tty?
+ run_command_line << "--interactive"
+ run_command_line << "--tty"
+ else
+ run_command_line.concat(["--attach", "STDOUT"])
+ run_command_line.concat(["--attach", "STDERR"])
+ end
+ build_dir = ENV["BUILD_DIR"]
+ if build_dir
+ build_dir = "#{File.expand_path(build_dir)}/#{id}"
+ mkdir_p(build_dir)
+ run_command_line.concat(["--volume", "#{build_dir}:/build:rw"])
+ end
+ if debug_build?
+ build_command_line.concat(["--build-arg", "DEBUG=yes"])
+ run_command_line.concat(["--env", "DEBUG=yes"])
+ end
+ pass_through_env_names = [
+ "DEB_BUILD_OPTIONS",
+ "RPM_BUILD_NCPUS",
+ ]
+ pass_through_env_names.each do |name|
+ value = ENV[name]
+ next unless value
+ run_command_line.concat(["--env", "#{name}=#{value}"])
+ end
+ if File.exist?(File.join(id, "Dockerfile"))
+ docker_context = id
+ else
+ from = File.readlines(File.join(id, "from")).find do |line|
+ /^[a-z]/i =~ line
+ end
+ build_command_line.concat(["--build-arg", "FROM=#{from.chomp}"])
+ docker_context = os
+ end
+ build_command_line.concat(docker_build_options(os, architecture))
+ run_command_line.concat(docker_run_options(os, architecture))
+ build_command_line << docker_context
+ run_command_line << image
+ run_command_line << "/host/build.sh" unless console
+
+ sh(*build_command_line)
+ sh(*run_command_line)
+ end
+
+ def docker_build_options(os, architecture)
+ []
+ end
+
+ def docker_run_options(os, architecture)
+ []
+ end
+
+ def docker_pull(os, architecture)
+ image = docker_image(os, architecture)
+ command_line = [
+ "docker",
+ "pull",
+ image,
+ ]
+ command_line.concat(docker_pull_options(os, architecture))
+ sh(*command_line)
+ end
+
+ def docker_pull_options(os, architecture)
+ []
+ end
+
+ def docker_push(os, architecture)
+ image = docker_image(os, architecture)
+ command_line = [
+ "docker",
+ "push",
+ image,
+ ]
+ command_line.concat(docker_push_options(os, architecture))
+ sh(*command_line)
+ end
+
+ def docker_push_options(os, architecture)
+ []
+ end
+
+ def define_dist_task
+ define_archive_task
+ desc "Create release package"
+ task :dist => [@archive_name]
+ end
+
+ def split_target(target)
+ components = target.split("-")
+ if components[0, 2] == ["amazon", "linux"]
+ components[0, 2] = components[0, 2].join("-")
+ end
+ if components.size >= 3
+ components[2..-1] = components[2..-1].join("-")
+ end
+ components
+ end
+
+ def enable_apt?
+ true
+ end
+
+ def apt_targets
+ return [] unless enable_apt?
+
+ targets = (ENV["APT_TARGETS"] || "").split(",")
+ targets = apt_targets_default if targets.empty?
+
+ targets.find_all do |target|
+ Dir.exist?(File.join(apt_dir, target))
+ end
+ end
+
+ def apt_targets_default
+ # Disable arm64 targets by default for now
+ # because they require some setups on host.
+ [
+ "debian-buster",
+ # "debian-buster-arm64",
+ "debian-bullseye",
+ # "debian-bullseye-arm64",
+ "debian-bookworm",
+ # "debian-bookworm-arm64",
+ "ubuntu-bionic",
+ # "ubuntu-bionic-arm64",
+ "ubuntu-focal",
+ # "ubuntu-focal-arm64",
+ "ubuntu-hirsute",
+ # "ubuntu-hirsute-arm64",
+ "ubuntu-impish",
+ # "ubuntu-impish-arm64",
+ ]
+ end
+
+ def deb_archive_base_name
+ "#{@package}-#{@deb_archive_base_name_version}"
+ end
+
+ def deb_archive_name
+ "#{@package}-#{@deb_upstream_version}.tar.gz"
+ end
+
+ def apt_dir
+ "apt"
+ end
+
+ def apt_prepare_debian_dir(tmp_dir, target)
+ source_debian_dir = nil
+ specific_debian_dir = "debian.#{target}"
+ distribution, code_name, _architecture = split_target(target)
+ platform = [distribution, code_name].join("-")
+ platform_debian_dir = "debian.#{platform}"
+ if File.exist?(specific_debian_dir)
+ source_debian_dir = specific_debian_dir
+ elsif File.exist?(platform_debian_dir)
+ source_debian_dir = platform_debian_dir
+ else
+ source_debian_dir = "debian"
+ end
+
+ prepared_debian_dir = "#{tmp_dir}/debian.#{target}"
+ cp_r(source_debian_dir, prepared_debian_dir)
+ control_in_path = "#{prepared_debian_dir}/control.in"
+ if File.exist?(control_in_path)
+ control_in = File.read(control_in_path)
+ rm_f(control_in_path)
+ File.open("#{prepared_debian_dir}/control", "w") do |control|
+ prepared_control = apt_prepare_debian_control(control_in, target)
+ control.print(prepared_control)
+ end
+ end
+ end
+
+ def apt_prepare_debian_control(control_in, target)
+ message = "#{__method__} must be defined to use debian/control.in"
+ raise NotImplementedError, message
+ end
+
+ def apt_build(console: false)
+ tmp_dir = "#{apt_dir}/tmp"
+ rm_rf(tmp_dir)
+ mkdir_p(tmp_dir)
+ cp(deb_archive_name,
+ File.join(tmp_dir, deb_archive_name))
+ apt_targets.each do |target|
+ apt_prepare_debian_dir(tmp_dir, target)
+ end
+
+ env_sh = "#{apt_dir}/env.sh"
+ File.open(env_sh, "w") do |file|
+ file.puts(<<-ENV)
+PACKAGE=#{@package}
+VERSION=#{@deb_upstream_version}
+ ENV
+ end
+
+ apt_targets.each do |target|
+ cd(apt_dir) do
+ distribution, version, architecture = split_target(target)
+ os = "#{distribution}-#{version}"
+ docker_run(os, architecture, console: console)
+ end
+ end
+ end
+
+ def define_apt_task
+ namespace :apt do
+ source_build_sh = "#{__dir__}/apt/build.sh"
+ build_sh = "#{apt_dir}/build.sh"
+ repositories_dir = "#{apt_dir}/repositories"
+
+ file build_sh => source_build_sh do
+ cp(source_build_sh, build_sh)
+ end
+
+ directory repositories_dir
+
+ desc "Build deb packages"
+ if enable_apt?
+ build_dependencies = [
+ deb_archive_name,
+ build_sh,
+ repositories_dir,
+ ]
+ else
+ build_dependencies = []
+ end
+ task :build => build_dependencies do
+ apt_build if enable_apt?
+ end
+
+ namespace :build do
+ desc "Open console"
+ task :console => build_dependencies do
+ apt_build(console: true) if enable_apt?
+ end
+ end
+ end
+
+ desc "Release APT repositories"
+ apt_tasks = [
+ "apt:build",
+ ]
+ task :apt => apt_tasks
+ end
+
+ def enable_yum?
+ true
+ end
+
+ def yum_targets
+ return [] unless enable_yum?
+
+ targets = (ENV["YUM_TARGETS"] || "").split(",")
+ targets = yum_targets_default if targets.empty?
+
+ targets.find_all do |target|
+ Dir.exist?(File.join(yum_dir, target))
+ end
+ end
+
+ def yum_targets_default
+ # Disable aarch64 targets by default for now
+ # because they require some setups on host.
+ [
+ "almalinux-8",
+ # "almalinux-8-arch64",
+ "amazon-linux-2",
+ # "amazon-linux-2-arch64",
+ "centos-7",
+ # "centos-7-aarch64",
+ "centos-8",
+ # "centos-8-aarch64",
+ ]
+ end
+
+ def rpm_archive_base_name
+ "#{@package}-#{@rpm_version}"
+ end
+
+ def rpm_archive_name
+ "#{rpm_archive_base_name}.tar.gz"
+ end
+
+ def yum_dir
+ "yum"
+ end
+
+ def yum_build_sh
+ "#{yum_dir}/build.sh"
+ end
+
+ def yum_expand_variable(key)
+ case key
+ when "PACKAGE"
+ @rpm_package
+ when "VERSION"
+ @rpm_version
+ when "RELEASE"
+ @rpm_release
+ else
+ nil
+ end
+ end
+
+ def yum_spec_in_path
+ "#{yum_dir}/#{@rpm_package}.spec.in"
+ end
+
+ def yum_build(console: false)
+ tmp_dir = "#{yum_dir}/tmp"
+ rm_rf(tmp_dir)
+ mkdir_p(tmp_dir)
+ cp(rpm_archive_name,
+ File.join(tmp_dir, rpm_archive_name))
+
+ env_sh = "#{yum_dir}/env.sh"
+ File.open(env_sh, "w") do |file|
+ file.puts(<<-ENV)
+SOURCE_ARCHIVE=#{rpm_archive_name}
+PACKAGE=#{@rpm_package}
+VERSION=#{@rpm_version}
+RELEASE=#{@rpm_release}
+ ENV
+ end
+
+ spec = "#{tmp_dir}/#{@rpm_package}.spec"
+ spec_in_data = File.read(yum_spec_in_path)
+ spec_data = substitute_content(spec_in_data) do |key, matched|
+ yum_expand_variable(key) || matched
+ end
+ File.open(spec, "w") do |spec_file|
+ spec_file.print(spec_data)
+ end
+
+ yum_targets.each do |target|
+ cd(yum_dir) do
+ distribution, version, architecture = split_target(target)
+ os = "#{distribution}-#{version}"
+ docker_run(os, architecture, console: console)
+ end
+ end
+ end
+
+ def define_yum_task
+ namespace :yum do
+ source_build_sh = "#{__dir__}/yum/build.sh"
+ file yum_build_sh => source_build_sh do
+ cp(source_build_sh, yum_build_sh)
+ end
+
+ repositories_dir = "#{yum_dir}/repositories"
+ directory repositories_dir
+
+ desc "Build RPM packages"
+ if enable_yum?
+ build_dependencies = [
+ repositories_dir,
+ rpm_archive_name,
+ yum_build_sh,
+ yum_spec_in_path,
+ ]
+ else
+ build_dependencies = []
+ end
+ task :build => build_dependencies do
+ yum_build if enable_yum?
+ end
+
+ namespace :build do
+ desc "Open console"
+ task :console => build_dependencies do
+ yum_build(console: true) if enable_yum?
+ end
+ end
+ end
+
+ desc "Release Yum repositories"
+ yum_tasks = [
+ "yum:build",
+ ]
+ task :yum => yum_tasks
+ end
+
+ def define_version_task
+ namespace :version do
+ desc "Update versions"
+ task :update do
+ update_debian_changelog
+ update_spec
+ end
+ end
+ end
+
+ def package_changelog_message
+ "New upstream release."
+ end
+
+ def packager_name
+ ENV["DEBFULLNAME"] || ENV["NAME"] || guess_packager_name_from_git
+ end
+
+ def guess_packager_name_from_git
+ name = `git config --get user.name`.chomp
+ return name unless name.empty?
+ `git log -n 1 --format=%aN`.chomp
+ end
+
+ def packager_email
+ ENV["DEBEMAIL"] || ENV["EMAIL"] || guess_packager_email_from_git
+ end
+
+ def guess_packager_email_from_git
+ email = `git config --get user.email`.chomp
+ return email unless email.empty?
+ `git log -n 1 --format=%aE`.chomp
+ end
+
+ def update_content(path)
+ if File.exist?(path)
+ content = File.read(path)
+ else
+ content = ""
+ end
+ content = yield(content)
+ File.open(path, "w") do |file|
+ file.puts(content)
+ end
+ end
+
+ def update_debian_changelog
+ return unless enable_apt?
+
+ Dir.glob("debian*") do |debian_dir|
+ update_content("#{debian_dir}/changelog") do |content|
+ <<-CHANGELOG.rstrip
+#{@package} (#{@deb_upstream_version}-#{@deb_release}) unstable; urgency=low
+
+ * New upstream release.
+
+ -- #{packager_name} <#{packager_email}> #{@release_time.rfc2822}
+
+#{content}
+ CHANGELOG
+ end
+ end
+ end
+
+ def update_spec
+ return unless enable_yum?
+
+ release_time = @release_time.strftime("%a %b %d %Y")
+ update_content(yum_spec_in_path) do |content|
+ content = content.sub(/^(%changelog\n)/, <<-CHANGELOG)
+%changelog
+* #{release_time} #{packager_name} <#{packager_email}> - #{@rpm_version}-#{@rpm_release}
+- #{package_changelog_message}
+
+ CHANGELOG
+ content = content.sub(/^(Release:\s+)\d+/, "\\11")
+ content.rstrip
+ end
+ end
+
+ def define_docker_tasks
+ namespace :docker do
+ pull_tasks = []
+ push_tasks = []
+
+ (apt_targets + yum_targets).each do |target|
+ distribution, version, architecture = split_target(target)
+ os = "#{distribution}-#{version}"
+
+ namespace :pull do
+ desc "Pull built image for #{target}"
+ task target do
+ docker_pull(os, architecture)
+ end
+ pull_tasks << "docker:pull:#{target}"
+ end
+
+ namespace :push do
+ desc "Push built image for #{target}"
+ task target do
+ docker_push(os, architecture)
+ end
+ push_tasks << "docker:push:#{target}"
+ end
+ end
+
+ desc "Pull built images"
+ task :pull => pull_tasks
+
+ desc "Push built images"
+ task :push => push_tasks
+ end
+ end
+end
diff --git a/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml b/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml
new file mode 100644
index 000000000..3703f4c46
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -0,0 +1,155 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+arch: arm64-graviton2
+virt: vm
+os: linux
+dist: focal
+group: edge
+language: minimal
+
+addons:
+ apt:
+ packages:
+ - apt-utils
+ # We need Ubuntu 20.10 or later
+ # - createrepo_c
+ - devscripts
+ - gpg
+ - libgit2-dev
+ - python3-pip
+ - rake
+ - rpm
+
+ # https://bugs.launchpad.net/ubuntu/+source/glibc/+bug/1916485
+ # We need to use runc 1.0.0~rc93 or later from focal-updated.
+ - runc
+
+ # To build createrepo_c from source.
+ # We can remove them when we can install createrepo_c package
+ - cmake
+ - libbz2-dev
+ - libcurl4-openssl-dev
+ - libglib2.0-dev
+ - liblzma-dev
+ - libmagic-dev
+ - librpm-dev
+ - libsqlite3-dev
+ - libssl-dev
+ - libxml2-dev
+ - libzstd-dev
+ - pkg-config
+ - zlib1g-dev
+ update: true
+
+services:
+ - docker
+
+# don't build twice
+if: tag IS blank
+
+env:
+ global:
+ - APT_TARGETS={{ target }}
+ - ARROW_VERSION={{ arrow.version }}
+ - BUILD_REF={{ arrow.head }}
+ - TRAVIS_TAG={{ task.tag }}
+ - YUM_TARGETS={{ target }}
+
+before_script:
+ - set -e
+ {{ macros.travis_checkout_arrow() }}
+ {{ macros.travis_docker_login() }}
+
+ # Build createrepo_c from source.
+ # We can remove them when we can install createrepo_c package
+ - git clone --depth 1 https://github.com/rpm-software-management/createrepo_c.git
+ - pushd createrepo_c
+ - |
+ /usr/bin/cmake \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DENABLE_BASHCOMP=OFF \
+ -DENABLE_DRPM=OFF \
+ -DENABLE_PYTHON=OFF \
+ -DWITH_LIBMODULEMD=OFF \
+ -DWITH_ZCHUNK=OFF \
+ .
+ - make -j$(nproc)
+ - sudo make install
+ - popd
+ - rm -rf createrepo_c
+
+script:
+ # Build packages
+ - pushd arrow/dev/tasks/linux-packages
+ - rake version:update
+ - |
+ rake docker:pull || :
+ - pushd apache-arrow-apt-source/apt
+ - |
+ for target in debian-* ubuntu-*; do
+ cp -a ${target} ${target}-arm64
+ done
+ - popd
+ - pushd apache-arrow-release/yum
+ - |
+ for target in almalinux-* centos-*; do
+ cp -a ${target} ${target}-aarch64
+ done
+ - popd
+ - |
+ rake \
+ --trace \
+ {{ task_namespace }}:build \
+ BUILD_DIR=build \
+ DEB_BUILD_OPTIONS=parallel=2 \
+ RPM_BUILD_NCPUS=2
+ - sudo rm -rf */*/build
+ - popd
+ # Push Docker image
+ - pushd arrow/dev/tasks/linux-packages
+ - |
+ docker login -u "${DOCKERHUB_USER}" \
+ -p "${DOCKERHUB_TOKEN}" || :
+ - |
+ rake docker:push || :
+ - popd
+ # Test built packages
+ - sudo gem install apt-dists-merge
+ - |
+ (echo "Key-Type: RSA"; \
+ echo "Key-Length: 4096"; \
+ echo "Name-Real: Test"; \
+ echo "Name-Email: test@example.com"; \
+ echo "%no-protection") | \
+ gpg --full-generate-key --batch
+ - |
+ GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10)
+ - gpg --export --armor test@example.com > arrow/dev/tasks/linux-packages/KEYS
+ - pushd arrow/dev/tasks/linux-packages
+ - |
+ rake --trace {{ task_namespace }}:test \
+ CREATEREPO=createrepo_c \
+ GPG_KEY_ID=${GPG_KEY_ID}
+ - rm -rf {{ task_namespace }}/repositories
+ - popd
+
+after_success:
+ {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %}
+ {{ macros.travis_upload_releases(patterns) }}
diff --git a/src/arrow/dev/tasks/linux-packages/yum/build.sh b/src/arrow/dev/tasks/linux-packages/yum/build.sh
new file mode 100755
index 000000000..5224f23e9
--- /dev/null
+++ b/src/arrow/dev/tasks/linux-packages/yum/build.sh
@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# -*- sh-indentation: 2; sh-basic-offset: 2 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -u
+
+run()
+{
+ "$@"
+ if test $? -ne 0; then
+ echo "Failed $@"
+ exit 1
+ fi
+}
+
+rpmbuild_options=
+
+. /host/env.sh
+
+if grep -q amazon /etc/system-release-cpe; then
+ distribution=$(cut -d ":" -f 5 /etc/system-release-cpe | tr '_' '-')
+ distribution_version=$(cut -d ":" -f 6 /etc/system-release-cpe)
+else
+ distribution=$(cut -d ":" -f 4 /etc/system-release-cpe)
+ distribution_version=$(cut -d ":" -f 5 /etc/system-release-cpe)
+fi
+distribution_version=$(echo ${distribution_version} | sed -e 's/\..*$//g')
+
+architecture="$(arch)"
+lib_directory=/usr/lib64
+case "${architecture}" in
+ i*86)
+ architecture=i386
+ lib_directory=/usr/lib
+ ;;
+esac
+
+run mkdir -p /build
+run cd /build
+find . -not -path ./ccache -a -not -path "./ccache/*" -delete
+if which ccache > /dev/null 2>&1; then
+ export CCACHE_COMPILERCHECK=content
+ export CCACHE_COMPRESS=1
+ export CCACHE_COMPRESSLEVEL=6
+ export CCACHE_MAXSIZE=500M
+ export CCACHE_DIR="${PWD}/ccache"
+ ccache --show-stats
+ if [ -d "${lib_directory}/ccache" ]; then
+ PATH="${lib_directory}/ccache:$PATH"
+ fi
+fi
+
+run mkdir -p rpmbuild
+run cd
+rm -rf rpmbuild
+run ln -fs /build/rpmbuild ./
+if [ -x /usr/bin/rpmdev-setuptree ]; then
+ rm -rf .rpmmacros
+ run rpmdev-setuptree
+else
+ run cat <<RPMMACROS > ~/.rpmmacros
+%_topdir ${HOME}/rpmbuild
+RPMMACROS
+ run mkdir -p rpmbuild/SOURCES
+ run mkdir -p rpmbuild/SPECS
+ run mkdir -p rpmbuild/BUILD
+ run mkdir -p rpmbuild/RPMS
+ run mkdir -p rpmbuild/SRPMS
+fi
+
+repositories="/host/repositories"
+repository="${repositories}/${distribution}/${distribution_version}"
+rpm_dir="${repository}/${architecture}/Packages"
+srpm_dir="${repository}/source/SRPMS"
+run mkdir -p "${rpm_dir}" "${srpm_dir}"
+
+# for debug
+# rpmbuild_options="$rpmbuild_options --define 'optflags -O0 -g3'"
+
+if [ -n "${SOURCE_ARCHIVE}" ]; then
+ case "${RELEASE}" in
+ 0.dev*)
+ source_archive_base_name=$( \
+ echo ${SOURCE_ARCHIVE} | sed -e 's/\.tar\.gz$//')
+ run tar xf /host/tmp/${SOURCE_ARCHIVE} \
+ --transform="s,^[^/]*,${PACKAGE},"
+ run mv \
+ ${PACKAGE} \
+ ${source_archive_base_name}
+ run tar czf \
+ rpmbuild/SOURCES/${SOURCE_ARCHIVE} \
+ ${source_archive_base_name}
+ run rm -rf ${source_archive_base_name}
+ ;;
+ *)
+ run cp /host/tmp/${SOURCE_ARCHIVE} rpmbuild/SOURCES/
+ ;;
+ esac
+else
+ run cp /host/tmp/${PACKAGE}-${VERSION}.* rpmbuild/SOURCES/
+fi
+run cp \
+ /host/tmp/${PACKAGE}.spec \
+ rpmbuild/SPECS/
+
+run cat <<BUILD > build.sh
+#!/usr/bin/env bash
+
+rpmbuild -ba ${rpmbuild_options} rpmbuild/SPECS/${PACKAGE}.spec
+BUILD
+run chmod +x build.sh
+if [ -n "${SCL:-}" ]; then
+ run cat <<WHICH_STRIP > which-strip.sh
+#!/usr/bin/env bash
+
+which strip
+WHICH_STRIP
+ run chmod +x which-strip.sh
+ run cat <<USE_SCL_STRIP >> ~/.rpmmacros
+%__strip $(run scl enable ${SCL} ./which-strip.sh)
+USE_SCL_STRIP
+ if [ "${DEBUG:-no}" = "yes" ]; then
+ run scl enable ${SCL} ./build.sh
+ else
+ run scl enable ${SCL} ./build.sh > /dev/null
+ fi
+else
+ if [ "${DEBUG:-no}" = "yes" ]; then
+ run ./build.sh
+ else
+ run ./build.sh > /dev/null
+ fi
+fi
+
+if which ccache > /dev/null 2>&1; then
+ ccache --show-stats
+fi
+
+run mv rpmbuild/RPMS/*/* "${rpm_dir}/"
+run mv rpmbuild/SRPMS/* "${srpm_dir}/"
+
+run chown -R "$(stat --format "%u:%g" "${repositories}")" "${repositories}"
diff --git a/src/arrow/dev/tasks/macros.jinja b/src/arrow/dev/tasks/macros.jinja
new file mode 100644
index 000000000..be265caa4
--- /dev/null
+++ b/src/arrow/dev/tasks/macros.jinja
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{%- macro github_header() -%}
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+on:
+ push:
+ branches:
+ - "*-github-*"
+{% endmacro %}
+
+{%- macro github_checkout_arrow() -%}
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow config core.symlinks true
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+{% endmacro %}
+
+{%- macro github_login_dockerhub() -%}
+ - name: Login to Dockerhub
+ uses: docker/login-action@v1
+ with:
+ username: {{ '${{ secrets.DOCKERHUB_USER }}' }}
+ password: {{ '${{ secrets.DOCKERHUB_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_login_ghcr() -%}
+ - name: Login to GitHub Container Registry
+ shell: bash
+ run: docker login ghcr.io -u {{ '${{ github.repository_owner }}' }} -p {{ '${{ secrets.CROSSBOW_GHCR_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_install_archery() -%}
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Install Archery
+ shell: bash
+ run: pip install -e arrow/dev/archery[all]
+{% endmacro %}
+
+{%- macro github_upload_releases(pattern) -%}
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Setup Crossbow
+ shell: bash
+ run: pip install -e arrow/dev/archery[crossbow-upload]
+ - name: Upload artifacts
+ shell: bash
+ run: |
+ archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ "{{ p }}" {{ "\\" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+ env:
+ CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }}
+{% endmacro %}
+
+{%- macro github_upload_gemfury(pattern) -%}
+ {%- if arrow.branch == 'master' -%}
+ - name: Upload package to Gemfury
+ shell: bash
+ run: |
+ path=$(ls {{ pattern }})
+ curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/
+ env:
+ CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
+ CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
+ {% endif %}
+{% endmacro %}
+
+{%- macro azure_checkout_arrow() -%}
+ - script: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ displayName: Clone arrow
+{% endmacro %}
+
+{%- macro azure_upload_releases(pattern) -%}
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.8'
+ - script: pip install -e arrow/dev/archery[crossbow-upload]
+ displayName: Install Crossbow
+ - bash: |
+ archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ "{{ p }}" {{ "\\" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+ env:
+ CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN)
+ displayName: Upload packages as a GitHub release
+{% endmacro %}
+
+{%- macro azure_upload_anaconda(pattern) -%}
+ {%- if arrow.branch == 'master' -%}
+ - task: CondaEnvironment@1
+ inputs:
+ packageSpecs: 'anaconda-client'
+ installOptions: '-c conda-forge'
+ updateConda: no
+ - script: |
+ conda install -y anaconda-client
+ anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force {{ pattern }}
+ displayName: Upload packages to Anaconda
+ {% endif %}
+{% endmacro %}
+
+{%- macro travis_checkout_arrow() -%}
+ - git clone --no-checkout {{ arrow.remote }} arrow
+ - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ - git -C arrow checkout FETCH_HEAD
+ - git -C arrow submodule update --init --recursive
+{% endmacro %}
+
+{%- macro travis_install_archery() -%}
+ - sudo -H pip3 install --upgrade pip
+ - sudo -H pip3 install docker-compose
+ - sudo -H pip3 install -e arrow/dev/archery[docker]
+{% endmacro %}
+
+{%- macro travis_docker_login() -%}
+ - echo "${DOCKERHUB_TOKEN}" | docker login --username "${DOCKERHUB_USER}" --password-stdin
+{% endmacro %}
+
+{%- macro travis_upload_releases(pattern) -%}
+ - sudo -H pip3 install pygit2==1.0
+ - sudo -H pip3 install -e arrow/dev/archery[crossbow-upload]
+ - |
+ archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ {% if pattern is string %}
+ "{{ pattern }}"
+ {% elif pattern is iterable %}
+ {% for p in pattern %}
+ "{{ p }}" {{ "\\" if not loop.last else "" }}
+ {% endfor %}
+ {% endif %}
+{% endmacro %}
+
+{%- macro travis_upload_gemfury(pattern) -%}
+ {%- if arrow.branch == 'master' -%}
+ - |
+ WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
+ curl \
+ -F "package=@${WHEEL_PATH}" \
+ "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
+ {% endif %}
+{% endmacro %}
diff --git a/src/arrow/dev/tasks/nightlies.sample.yml b/src/arrow/dev/tasks/nightlies.sample.yml
new file mode 100644
index 000000000..710f7c0ad
--- /dev/null
+++ b/src/arrow/dev/tasks/nightlies.sample.yml
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# this travis configuration can be used to submit cron scheduled tasks
+# 1. copy this file to one of crossbow's branch (master for example) with
+# filename .travis.yml
+# 2. setup daily cron jobs for that particular branch, see travis'
+# documentation https://docs.travis-ci.com/user/cron-jobs/
+
+branches:
+ # don't attempt to build branches intended for windows builds
+ except:
+ - /.*win.*/
+
+os: linux
+dist: trusty
+language: generic
+
+before_install:
+ # Install Miniconda.
+ - echo `pwd`
+ - |
+ echo ""
+ echo "Installing a fresh version of Miniconda."
+ MINICONDA_URL="https://repo.continuum.io/miniconda"
+ MINICONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
+ curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}"
+ bash $MINICONDA_FILE -b
+
+ # Configure conda.
+ - |
+ echo ""
+ echo "Configuring conda."
+ source /home/travis/miniconda3/bin/activate root
+ conda config --remove channels defaults
+ conda config --add channels defaults
+ conda config --add channels conda-forge
+ conda config --set show_channel_urls true
+
+install:
+ - pushd ..
+ # to build against a specific branch of a fork
+ # git clone -b <branch> https://github.com/<user>/arrow
+ - git clone https://github.com/apache/arrow
+ - pip install dev/archery[crossbow]
+
+script:
+ # submit packaging tasks
+ - |
+ if [ $TRAVIS_EVENT_TYPE = "cron" ]; then
+ archery crossbow submit -g conda -g wheel -g linux
+ else
+ archery crossbow submit --dry-run -g conda -g wheel -g linux
+ fi
diff --git a/src/arrow/dev/tasks/nuget-packages/github.linux.yml b/src/arrow/dev/tasks/nuget-packages/github.linux.yml
new file mode 100644
index 000000000..cd03a7bfe
--- /dev/null
+++ b/src/arrow/dev/tasks/nuget-packages/github.linux.yml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ package:
+ name: Package
+ runs-on: ubuntu-latest
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
+ - name: Prepare version
+ run: |
+ sed -i'' -E -e \
+ "s/^ <Version>.+<\/Version>/ <Version>{{ arrow.no_rc_semver_version }}<\/Version>/" \
+ arrow/csharp/Directory.Build.props
+ - name: Build package
+ run: |
+ pushd arrow
+ archery docker run {{ run }}
+ popd
+
+ {% set patterns = ["arrow/csharp/artifacts/**/*.nupkg",
+ "arrow/csharp/artifacts/**/*.snupkg"] %}
+ {{ macros.github_upload_releases(patterns)|indent }}
diff --git a/src/arrow/dev/tasks/python-sdist/github.yml b/src/arrow/dev/tasks/python-sdist/github.yml
new file mode 100644
index 000000000..68371876a
--- /dev/null
+++ b/src/arrow/dev/tasks/python-sdist/github.yml
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ build:
+ name: "Build sdist"
+ runs-on: ubuntu-20.04
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
+ - name: Build sdist
+ run: |
+ archery docker run python-sdist
+ {% if arrow.branch == 'master' %}
+ archery docker push python-sdist || :
+ {% endif %}
+ env:
+ PYARROW_VERSION: {{ arrow.no_rc_version }}
+
+ - name: Test sdist
+ run: archery docker run ubuntu-python-sdist-test
+ env:
+ PYARROW_VERSION: {{ arrow.no_rc_version }}
+
+ {{ macros.github_upload_releases("arrow/python/dist/*.tar.gz")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/dist/*.tar.gz")|indent }}
diff --git a/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml b/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml
new file mode 100644
index 000000000..dc2386482
--- /dev/null
+++ b/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ build:
+ name: "Build wheel for Manylinux {{ manylinux_version }}"
+ runs-on: ubuntu-latest
+ env:
+ # archery uses these environment variables
+ ARCH: amd64
+ PYTHON: "{{ python_version }}"
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_install_archery()|indent }}
+ {{ macros.github_login_dockerhub()|indent }}
+
+ - name: Build wheel
+ shell: bash
+ run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
+
+ # TODO(kszucs): auditwheel show
+ - name: Test wheel
+ shell: bash
+ run: |
+ archery docker run python-wheel-manylinux-test-imports
+ archery docker run python-wheel-manylinux-test-unittests
+
+ {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+
+ {% if arrow.branch == 'master' %}
+ - name: Push Docker Image
+ shell: bash
+ run: |
+ archery docker push python-wheel-manylinux-{{ manylinux_version }}
+ archery docker push python-wheel-manylinux-test-unittests
+ {% endif %}
diff --git a/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml b/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml
new file mode 100644
index 000000000..8078abfd5
--- /dev/null
+++ b/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+env:
+ ARROW_S3: {{ arrow_s3 }}
+ CC: "clang"
+ CXX: "clang++"
+ MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
+ PYARROW_BUILD_VERBOSE: 1
+ PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+ PYTHON_VERSION: "{{ python_version }}"
+ PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+ VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
+ VCPKG_FEATURE_FLAGS: "-manifests"
+ VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+ VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+ VCPKG_VERSION: "{{ vcpkg_version }}"
+
+jobs:
+ build:
+ name: Build wheel for OS X
+ runs-on: macos-10.15
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - name: Install System Dependencies
+ run: brew install bash bison coreutils ninja cmake
+
+ - uses: actions/cache@v2
+ id: vcpkg-cache
+ with:
+ path: vcpkg
+ key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/*.patch', 'arrow/ci/vcpkg/*osx*.cmake') }}" }}
+
+ - name: Install Vcpkg
+ if: steps.vcpkg-cache.outputs.cache-hit != 'true'
+ shell: bash
+ env:
+ MACOSX_DEPLOYMENT_TARGET: "10.15"
+ run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+ - name: Install Packages
+ run: |
+ $VCPKG_ROOT/vcpkg install \
+ abseil \
+ boost-filesystem \
+ brotli \
+ bzip2 \
+ c-ares \
+ curl \
+ flatbuffers \
+ gflags \
+ glog \
+ grpc \
+ lz4 \
+ openssl \
+ orc \
+ protobuf \
+ rapidjson \
+ re2 \
+ snappy \
+ thrift \
+ utf8proc \
+ zlib \
+ zstd
+
+ {% if arrow_s3 == "ON" %}
+ - name: Install AWS SDK C++
+ run: $VCPKG_ROOT/vcpkg install aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
+ {% endif %}
+
+ - name: Install Python {{ python_version }}
+ shell: bash
+ run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }}
+
+ - name: Build Wheel
+ shell: bash
+ run: |
+ $PYTHON -m virtualenv build-env
+ source build-env/bin/activate
+ pip install --upgrade pip wheel
+ arrow/ci/scripts/python_wheel_macos_build.sh x86_64 $(pwd)/arrow $(pwd)/build
+
+ - name: Test Wheel
+ shell: bash
+ run: |
+ $PYTHON -m virtualenv test-env
+ source test-env/bin/activate
+ pip install --upgrade pip wheel
+ arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+
+ {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml b/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml
new file mode 100644
index 000000000..e5456dbfc
--- /dev/null
+++ b/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -0,0 +1,157 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Prerequisits on the host:
+# - brew install bash bison coreutils ninja cmake
+# - sudo arrow/ci/scripts/install_python.sh macos 3.9
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+env:
+ ARROW_FLIGHT: OFF
+ ARROW_JEMALLOC: OFF
+ ARROW_SIMD_LEVEL: "{{ arrow_simd_level }}"
+ CC: "clang"
+ CMAKE_BUILD_TYPE: release
+ CMAKE_CXX_COMPILER_LAUNCHER: "ccache"
+ CXX: "clang++"
+ MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
+ PYARROW_BUILD_VERBOSE: 1
+ PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+ PYTHON_VERSION: "{{ python_version }}"
+ PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+ VCPKG_DEFAULT_TRIPLET: {{ arch }}-osx-static-release
+ VCPKG_FEATURE_FLAGS: "-manifests"
+ VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+ VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+ VCPKG_VERSION: "{{ vcpkg_version }}"
+
+jobs:
+ build:
+ name: Build wheel for OS X
+ runs-on: self-hosted
+ steps:
+ - name: Cleanup
+ shell: bash
+ run: rm -rf arrow vcpkg build crossbow-env build-env test-*-env
+
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - name: Add Brew's Bison to PATH
+ shell: bash
+ run: echo "/opt/homebrew/opt/bison/bin" >> $GITHUB_PATH
+
+ - name: Install Vcpkg
+ shell: bash
+ env:
+ MACOSX_DEPLOYMENT_TARGET: "11.0"
+ run: arch -arm64 arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+ - name: Install OpenSSL
+ shell: bash
+ run: arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+
+ {% if arch == "universal2" %}
+ # OpenSSL doesn't provide an universal2 configuration yet, so vcpkg is
+ # unable to propagate the list of architectures from VCPKG_OSX_ARCHIETCTURES.
+ # In order to prevent link time warnings (which may turn out to be errors)
+ # we compile OpenSSL separately for the two architectures and merge the
+ # binaries into universal2 ones using `lipo`.
+ - name: Create universal binaries for OpenSSL
+ shell: bash
+ run: |
+ for arch in arm64 x64; do
+ VCPKG_DEFAULT_TRIPLET=${arch}-osx-static-release arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+ done
+ for lib in libcrypto libssl; do
+ lipo -create $VCPKG_ROOT/installed/arm64-osx-static-release/lib/${lib}.a \
+ $VCPKG_ROOT/installed/x64-osx-static-release/lib/${lib}.a \
+ -output $VCPKG_ROOT/installed/universal2-osx-static-release/lib/${lib}.a
+ done
+ {% endif %}
+
+ - name: Install Packages
+ run: |
+ arch -arm64 $VCPKG_ROOT/vcpkg install \
+ aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
+ boost-filesystem \
+ brotli \
+ bzip2 \
+ c-ares \
+ curl \
+ flatbuffers \
+ gflags \
+ glog \
+ lz4 \
+ orc \
+ protobuf \
+ rapidjson \
+ re2 \
+ snappy \
+ thrift \
+ utf8proc \
+ zlib \
+ zstd
+
+ - name: Build Wheel
+ shell: bash
+ run: |
+ $PYTHON -m virtualenv build-env
+ source build-env/bin/activate
+ pip install --upgrade pip wheel
+ arch -arm64 arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
+
+ - name: Test Wheel on ARM64
+ shell: bash
+ env:
+ PYTEST_ADDOPTS: "-k 'not test_cancellation'"
+ run: |
+ $PYTHON -m virtualenv test-arm64-env
+ source test-arm64-env/bin/activate
+ pip install --upgrade pip wheel
+ arch -arm64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+
+ {% if arch == "universal2" %}
+ - name: Test Wheel on AMD64
+ shell: bash
+ env:
+ PYTEST_ADDOPTS: "-k 'not test_cancellation'"
+ run: |
+ $PYTHON -m virtualenv test-amd64-env
+ source test-amd64-env/bin/activate
+ pip install --upgrade pip wheel
+ arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+ {% endif %}
+
+ - name: Upload artifacts
+ shell: bash
+ run: |
+ $PYTHON -m virtualenv crossbow-env
+ source crossbow-env/bin/activate
+ arch -x86_64 pip install -e arrow/dev/archery[crossbow-upload]
+ arch -x86_64 archery crossbow \
+ --queue-path $(pwd) \
+ --queue-remote {{ queue_remote_url }} \
+ upload-artifacts \
+ --sha {{ task.branch }} \
+ --tag {{ task.tag }} \
+ "arrow/python/repaired_wheels/*.whl"
+ env:
+ CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
+
+ {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/src/arrow/dev/tasks/python-wheels/github.windows.yml b/src/arrow/dev/tasks/python-wheels/github.windows.yml
new file mode 100644
index 000000000..f9989aed0
--- /dev/null
+++ b/src/arrow/dev/tasks/python-wheels/github.windows.yml
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ build:
+ name: "Build wheel for Windows"
+ runs-on: windows-2019
+ env:
+ # archery uses this environment variable
+ PYTHON: "{{ python_version }}"
+ # this is a private repository at the moment (mostly because of licensing
+ # consideration of windows images with visual studio), but anyone can
+ # recreate the image by manually building it via:
+ # `archery build python-wheel-windows-vs2017`
+ # note that we don't run docker build since there wouldn't be a cache hit
+ # and rebuilding the dependencies takes a fair amount of time
+ REPO: ghcr.io/ursacomputing/arrow
+ # prefer the docker cli over docker-compose
+ ARCHERY_USE_DOCKER_CLI: 1
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+ {{ macros.github_login_ghcr()|indent }}
+ {{ macros.github_install_archery()|indent }}
+
+ - name: Build wheel
+ shell: cmd
+ run: archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+
+ # Two layers of the official python 3.6 windows image are not available for download.
+ # Docker pull returns with unexpected status resolving reader: 403 Forbidden.
+ # See https://issues.apache.org/jira/browse/ARROW-14424
+ {% if python_version != "3.6" %}
+ - name: Test wheel
+ shell: cmd
+ run: archery docker run python-wheel-windows-test
+ {% endif %}
+
+ {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
+ {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
diff --git a/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml b/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml
new file mode 100644
index 000000000..d32d89d83
--- /dev/null
+++ b/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+arch: arm64-graviton2
+virt: vm
+os: linux
+dist: focal
+group: edge
+language: minimal
+
+addons:
+ apt:
+ packages:
+ - libgit2-dev
+ - python3-pip
+
+services:
+ - docker
+
+# don't build twice
+if: tag IS blank
+
+env:
+ global:
+ - BUILD_REF={{ arrow.head }}
+ - TRAVIS_TAG={{ task.tag }}
+ # archery uses these environment variables
+ - ARCH=arm64v8
+ - PYTHON="{{ python_version }}"
+
+before_script:
+ - set -e
+ {{ macros.travis_checkout_arrow() }}
+ {{ macros.travis_docker_login() }}
+
+script:
+ # Install Archery and Crossbow dependencies
+ {{ macros.travis_install_archery() }}
+
+ # Build and Test packages
+ # output something every minutes to prevent travis from killing the build
+ - while sleep 1m; do echo "=====[ $SECONDS seconds still running ]====="; done &
+ - archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
+ - archery docker run python-wheel-manylinux-test-imports
+ - archery docker run python-wheel-manylinux-test-unittests
+ - kill %1
+
+after_success:
+ # Upload wheel as github artifact
+ {{ macros.travis_upload_releases("arrow/python/repaired_wheels/*.whl") }}
+ {{ macros.travis_upload_gemfury("arrow/python/repaired_wheels/*.whl") }}
+
+ {% if arrow.branch == 'master' %}
+ # Push the docker image to dockerhub
+ - archery docker push python-wheel-manylinux-{{ manylinux_version }}
+ - archery docker push python-wheel-manylinux-test-unittests
+ {% endif %}
diff --git a/src/arrow/dev/tasks/r/azure.linux.yml b/src/arrow/dev/tasks/r/azure.linux.yml
new file mode 100644
index 000000000..92e725f68
--- /dev/null
+++ b/src/arrow/dev/tasks/r/azure.linux.yml
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+jobs:
+ - job: linux
+ pool:
+ vmImage: ubuntu-latest
+ timeoutInMinutes: 360
+ steps:
+ - script: |
+ set -ex
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ displayName: Clone arrow
+
+ - script: |
+ set -ex
+ docker -v
+ docker-compose -v
+ cd arrow
+ export R_ORG={{ r_org }}
+ export R_IMAGE={{ r_image }}
+ export R_TAG={{ r_tag }}
+ export DEVTOOLSET_VERSION={{ devtoolset_version|default("-1") }}
+ docker-compose pull --ignore-pull-failures r
+ docker-compose build r
+ displayName: Docker build
+
+ - script: |
+ set -ex
+ cd arrow
+ export R_ORG={{ r_org }}
+ export R_IMAGE={{ r_image }}
+ export R_TAG={{ r_tag }}
+ export ARROW_R_DEV={{ not_cran|default("TRUE") }}
+ # Note that by default, ci/scripts/r_test.sh sets NOT_CRAN=true
+ # if ARROW_R_DEV=TRUE. Pass `-e NOT_CRAN=false` to turn that off.
+ docker-compose run {{ flags|default("") }} r
+ displayName: Docker run
+
+ - script: |
+ set -ex
+ cat arrow/r/check/arrow.Rcheck/00install.out
+ displayName: Dump install logs
+ condition: succeededOrFailed()
+ - script: |
+ set -ex
+ cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
+ displayName: Dump test logs
+ condition: succeededOrFailed()
diff --git a/src/arrow/dev/tasks/r/github.devdocs.yml b/src/arrow/dev/tasks/r/github.devdocs.yml
new file mode 100644
index 000000000..5591e6587
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.devdocs.yml
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ devdocs:
+ name: 'R devdocs {{ "${{ matrix.os }}" }}'
+ runs-on: {{ "${{ matrix.os }}" }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [macOS-latest, ubuntu-20.04]
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - uses: r-lib/actions/setup-r@v1
+ - uses: r-lib/actions/setup-pandoc@v1
+ - name: Install knitr, rmarkdown
+ run: |
+ install.packages(c("rmarkdown", "knitr", "sessioninfo"))
+ shell: Rscript {0}
+ - name: Session info
+ run: |
+ options(width = 100)
+ pkgs <- installed.packages()[, "Package"]
+ sessioninfo::session_info(pkgs, include_base = TRUE)
+ shell: Rscript {0}
+ - name: Remove system gfortran so that brew can install gcc successfully
+ run: rm -f /usr/local/bin/gfortran
+ - name: Write the install script
+ env:
+ RUN_DEVDOCS: TRUE
+ DEVDOCS_MACOS: {{ "${{contains(matrix.os, 'macOS')}}" }}
+ DEVDOCS_UBUNTU: {{ "${{contains(matrix.os, 'ubuntu')}}" }}
+ run: |
+ # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh
+ # which can be sourced to install arrow.
+ rmarkdown::render("arrow/r/vignettes/developing.Rmd")
+ shell: Rscript {0}
+ - name: Install from the devdocs
+ env:
+ LIBARROW_BINARY: FALSE
+ ARROW_R_DEV: TRUE
+ run: bash arrow/r/vignettes/script.sh
+ shell: bash
+ - name: Ensure that the Arrow package is loadable and we have the correct one
+ run: |
+ echo $LD_LIBRARY_PATH
+ R --no-save <<EOF
+ Sys.getenv("LD_LIBRARY_PATH")
+ library(arrow)
+ arrow_info()
+ EOF
+ shell: bash -l {0}
+ - name: Save the install script
+ uses: actions/upload-artifact@v2
+ with:
+ name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }}
+ path: arrow/r/vignettes/script.sh
+ if: always()
diff --git a/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml
new file mode 100644
index 000000000..e48b67ac6
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push
+
+jobs:
+ write-files:
+ name: "Write files"
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ env:
+ ARROW_R_DEV: "TRUE"
+ RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+ - uses: r-lib/actions/setup-r@v1
+ - name: Install dependencies
+ run: |
+ install.packages(c("remotes", "glue", "sys"))
+ remotes::install_deps("arrow/r", dependencies = TRUE)
+ shell: Rscript {0}
+ - name: Install Arrow
+ run: |
+ cd arrow/r
+ R CMD INSTALL .
+ shell: bash
+ - name: Write files
+ run: |
+ cd arrow/r
+ R -f extra-tests/write-files.R
+ shell: bash
+
+ - name: Upload the parquet artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: files
+ path: arrow/r/extra-tests/files
+
+ read-files:
+ name: "Read files with Arrow {{ '${{ matrix.config.old_arrow_version }}' }}"
+ needs: [write-files]
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ config:
+ # We use the R version that was released at the time of the arrow release in order
+ # to make sure we can download binaries from RSPM.
+ - { old_arrow_version: '5.0.0', r: '4.1' }
+ - { old_arrow_version: '4.0.0', r: '4.0' }
+ - { old_arrow_version: '3.0.0', r: '4.0' }
+ - { old_arrow_version: '2.0.0', r: '4.0' }
+ - { old_arrow_version: '1.0.1', r: '4.0' }
+ env:
+ ARROW_R_DEV: "TRUE"
+ RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+ OLD_ARROW_VERSION: {{ '${{ matrix.config.old_arrow_version }}' }}
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - uses: r-lib/actions/setup-r@v1
+ with:
+ r-version: {{ '${{ matrix.config.r }}' }}
+ - name: Install old Arrow
+ run: |
+ install.packages(c("remotes", "testthat"))
+ remotes::install_version("arrow", "{{ '${{ matrix.config.old_arrow_version }}' }}")
+ shell: Rscript {0}
+ - name: Setup our testing directory, copy only the tests to it.
+ run: |
+ mkdir -p extra-tests/files
+ cp arrow/r/extra-tests/helper*.R extra-tests/
+ cp arrow/r/extra-tests/test-*.R extra-tests/
+ - name: Download artifacts
+ uses: actions/download-artifact@v2
+ with:
+ name: files
+ path: extra-tests/files
+ - name: Test reading
+ run: |
+ testthat::test_dir("extra-tests")
+ shell: Rscript {0}
diff --git a/src/arrow/dev/tasks/r/github.linux.cran.yml b/src/arrow/dev/tasks/r/github.linux.cran.yml
new file mode 100644
index 000000000..03d22dcbf
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.cran.yml
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ as-cran:
+ name: "rhub/{{ MATRIX }}"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ # See https://hub.docker.com/r/rhub
+ r_image:
+ - debian-gcc-devel
+ - debian-gcc-patched
+ - debian-gcc-release
+ - fedora-gcc-devel
+ - fedora-clang-devel
+ env:
+ R_ORG: "rhub"
+ R_IMAGE: {{ MATRIX }}
+ R_TAG: "latest"
+ ARROW_R_DEV: "FALSE"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+ - name: Docker Pull
+ shell: bash
+ run: cd arrow && docker-compose pull --ignore-pull-failures r
+ - name: Docker Build
+ shell: bash
+ run: cd arrow && docker-compose build r
+ - name: Docker Run
+ shell: bash
+ run: cd arrow && docker-compose run r
+ - name: Dump install logs
+ run: cat arrow/r/check/arrow.Rcheck/00install.out
+ if: always()
+ - name: Dump test logs
+ run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
+ if: always()
+ - name: Save the test output
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-output
+ path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/src/arrow/dev/tasks/r/github.linux.offline.build.yml b/src/arrow/dev/tasks/r/github.linux.offline.build.yml
new file mode 100644
index 000000000..60685b18c
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.offline.build.yml
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ grab-dependencies:
+ name: "Download thirdparty dependencies"
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ env:
+ ARROW_R_DEV: "TRUE"
+ RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+ - uses: r-lib/actions/setup-r@v1
+ - name: Pull Arrow dependencies
+ run: |
+ cd arrow/r
+ # This is `make build`, but with no vignettes and not running `make doc`
+ cp ../NOTICE.txt inst/NOTICE.txt
+ rsync --archive --delete ../cpp tools/
+ cp -p ../.env tools/
+ cp -p ../NOTICE.txt tools/
+ cp -p ../LICENSE.txt tools/
+ R CMD build --no-build-vignettes --no-manual .
+ built_tar=$(ls -1 arrow*.tar.gz | head -n 1)
+ R -e "source('R/install-arrow.R'); create_package_with_all_dependencies(dest_file = 'arrow_with_deps.tar.gz', source_file = \"${built_tar}\")"
+ shell: bash
+ - name: Upload the third party dependency artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: thirdparty_deps
+ path: arrow/r/arrow_with_deps.tar.gz
+
+ intall-offline:
+ name: "Install offline"
+ needs: [grab-dependencies]
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ env:
+ ARROW_R_DEV: TRUE
+ RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - uses: r-lib/actions/setup-r@v1
+ - name: Download artifacts
+ uses: actions/download-artifact@v2
+ with:
+ name: thirdparty_deps
+ path: arrow/r/
+ - name: Install system dependencies
+ run: |
+ sudo apt-get update
+ sudo apt install libcurl4-openssl-dev libssl-dev
+ - name: Install dependencies
+ run: |
+ install.packages(c("remotes", "glue", "sys"))
+ remotes::install_deps("arrow/r", dependencies = TRUE)
+ shell: Rscript {0}
+ - name: Install
+ env:
+ TEST_OFFLINE_BUILD: true
+ LIBARROW_MINIMAL: false
+ run: |
+ cd arrow/r
+ R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz
+ - name: Run the tests
+ run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")'
+ - name: Dump test logs
+ run: cat arrow-tests/testthat.Rout*
+ if: always()
+ - name: Save the test output
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-output
+ path: arrow-tests/testthat.Rout*
+ if: always()
diff --git a/src/arrow/dev/tasks/r/github.linux.rchk.yml b/src/arrow/dev/tasks/r/github.linux.rchk.yml
new file mode 100644
index 000000000..72ff26969
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.rchk.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ as-cran:
+ name: "rchk"
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ env:
+ ARROW_R_DEV: "FALSE"
+ RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - uses: r-lib/actions/setup-r@v1
+ - uses: r-lib/actions/setup-pandoc@v1
+ - name: Install dependencies
+ run: |
+ install.packages("remotes")
+ remotes::install_deps("arrow/r", dependencies = TRUE)
+ shell: Rscript {0}
+ - name: Build arrow package
+ run: |
+ R CMD build --no-build-vignettes arrow/r
+ mkdir packages
+ mv arrow_*.tar.gz packages
+ - name: rchk
+ run: |
+ docker run -v `pwd`/packages:/rchk/packages kalibera/rchk:latest /rchk/packages/arrow_*.tar.gz |& tee rchk.out
+ - name: Confirm that rchk has no errors
+ # Suspicious call, [UP], and [PB] are all of the error types currently at
+ # https://github.com/kalibera/cran-checks/tree/master/rchk/results
+ # though this might not be exhaustive, there does not appear to be a way to have rchk return an error code
+ # CRAN also will remove some of the outputs (especially those related to Rcpp and strptime, e.g.
+ # ERROR: too many states (abstraction error?))
+ # https://github.com/kalibera/rchk
+ run: |
+ if [ $(grep -c "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -c "\[UP\]" rchk.out) -gt 0 ] || [ $(grep -c "\[PB\]" rchk.out) -gt 0 ]; then
+ echo "Found rchk errors"
+ cat rchk.out
+ exit 1
+ fi
+ if: always()
+ - name: Dump rchk output logs
+ run: cat rchk.out
+ if: always()
diff --git a/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml b/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml
new file mode 100644
index 000000000..80071171b
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ r-versions:
+ name: "rstudio/r-base:latest-focal"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ env:
+ R_ORG: "rstudio"
+ R_IMAGE: "r-base"
+ R_TAG: "latest-focal"
+ ARROW_R_DEV: "TRUE"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+ - name: Docker Pull
+ shell: bash
+ run: cd arrow && docker-compose pull --ignore-pull-failures r
+ - name: Docker Build
+ shell: bash
+ run: cd arrow && docker-compose build r-revdepcheck
+ - name: Docker Run
+ shell: bash
+ run: cd arrow && docker-compose run r-revdepcheck
+ - name: revdepcheck CRAN report
+ if: always()
+ shell: bash
+ run: cat arrow/r/revdep/cran.md
+ - name: revdepcheck failures
+ if: always()
+ shell: bash
+ run: cat arrow/r/revdep/failures.md
+ - name: revdepcheck problems
+ if: always()
+ shell: bash
+ run: cat arrow/r/revdep/problems.md
+ - name: Save the revdep output
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ name: revdepcheck-folder
+ path: arrow/r/revdep
diff --git a/src/arrow/dev/tasks/r/github.linux.versions.yml b/src/arrow/dev/tasks/r/github.linux.versions.yml
new file mode 100644
index 000000000..f383fe8d0
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.linux.versions.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ r-versions:
+ name: "rstudio/r-base:{{ MATRIX }}-bionic"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ # See https://hub.docker.com/r/rstudio/r-base
+ r_version:
+ # We test devel, release, and oldrel in regular CI.
+ # This is for older versions
+ # rlang and vctrs depend on R >= 3.3
+ - "3.3"
+ - "3.4"
+ - "3.5"
+ - "3.6"
+ env:
+ R_ORG: "rstudio"
+ R_IMAGE: "r-base"
+ R_TAG: "{{ MATRIX }}-bionic"
+ ARROW_R_DEV: "TRUE"
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Free Up Disk Space
+ shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - name: Fetch Submodules and Tags
+ shell: bash
+ run: cd arrow && ci/scripts/util_checkout.sh
+ - name: Docker Pull
+ shell: bash
+ run: cd arrow && docker-compose pull --ignore-pull-failures r
+ - name: Docker Build
+ shell: bash
+ run: cd arrow && docker-compose build r
+ - name: Docker Run
+ shell: bash
+ run: cd arrow && docker-compose run r
+ - name: Dump install logs
+ run: cat arrow/r/check/arrow.Rcheck/00install.out
+ if: always()
+ - name: Dump test logs
+ run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
+ if: always()
+ - name: Save the test output
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-output
+ path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/src/arrow/dev/tasks/r/github.macos-linux.local.yml b/src/arrow/dev/tasks/r/github.macos-linux.local.yml
new file mode 100644
index 000000000..79e3332af
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.macos-linux.local.yml
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ autobrew:
+ name: "install from local source"
+ runs-on: {{ "${{ matrix.os }}" }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [macOS-latest, ubuntu-20.04]
+
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Configure non-autobrew dependencies (macos)
+ run: |
+ cd arrow/r
+ brew install openssl
+ if: contains(matrix.os, 'macOS')
+ - name: Configure non-autobrew dependencies (linux)
+ run: |
+ cd arrow/r
+ sudo apt-get update
+ sudo apt install libcurl4-openssl-dev libssl-dev
+ if: contains(matrix.os, 'ubuntu')
+ - uses: r-lib/actions/setup-r@v1
+ - name: Install dependencies
+ run: |
+ install.packages("remotes")
+ remotes::install_deps("arrow/r", dependencies = TRUE)
+ remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo"))
+ shell: Rscript {0}
+ - name: Session info
+ run: |
+ options(width = 100)
+ pkgs <- installed.packages()[, "Package"]
+ sessioninfo::session_info(pkgs, include_base = TRUE)
+ shell: Rscript {0}
+ - name: Install
+ env:
+ _R_CHECK_CRAN_INCOMING_: false
+ ARROW_USE_PKG_CONFIG: false
+ FORCE_BUNDLED_BUILD: true
+ LIBARROW_MINIMAL: false
+ ARROW_R_DEV: TRUE
+ run: |
+ cd arrow/r
+ R CMD INSTALL . --install-tests
+ - name: Run the tests
+ run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")'
+ - name: Dump test logs
+ run: cat arrow-tests/testthat.Rout*
+ if: failure()
+ - name: Save the test output
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-output
+ path: arrow-tests/testthat.Rout*
+ if: always()
diff --git a/src/arrow/dev/tasks/r/github.macos.autobrew.yml b/src/arrow/dev/tasks/r/github.macos.autobrew.yml
new file mode 100644
index 000000000..1b8500f64
--- /dev/null
+++ b/src/arrow/dev/tasks/r/github.macos.autobrew.yml
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ autobrew:
+ name: "Autobrew"
+ runs-on: macOS-latest
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Configure autobrew script
+ run: |
+ cd arrow/r
+ # Put the formula inside r/ so that it's included in the package build
+ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb
+ # Pin the current commit in the formula to test so that we're not always pulling from master
+ sed -i.bak -E -e 's@https://github.com/apache/arrow.git"$@{{ arrow.remote }}.git", :revision => "{{ arrow.head }}"@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak
+ # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one
+ sed -i.bak -E -e 's@.git.git@.git@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak
+ # Get minio for S3 testing
+ brew install minio
+ - uses: r-lib/actions/setup-r@v1
+ - name: Install dependencies
+ run: |
+ install.packages("remotes")
+ remotes::install_deps("arrow/r", dependencies = TRUE)
+ remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo"))
+ shell: Rscript {0}
+ - name: Session info
+ run: |
+ options(width = 100)
+ pkgs <- installed.packages()[, "Package"]
+ sessioninfo::session_info(pkgs, include_base = TRUE)
+ shell: Rscript {0}
+ - name: Check
+ env:
+ _R_CHECK_CRAN_INCOMING_: false
+ ARROW_USE_PKG_CONFIG: false
+ run: arrow/ci/scripts/r_test.sh arrow
+ - name: Dump install logs
+ run: cat arrow/r/check/arrow.Rcheck/00install.out
+ if: always()
+ - name: Dump test logs
+ run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
+ if: always()
+ - name: Save the test output
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-output
+ path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/src/arrow/dev/tasks/tasks.yml b/src/arrow/dev/tasks/tasks.yml
new file mode 100644
index 000000000..8e67d3116
--- /dev/null
+++ b/src/arrow/dev/tasks/tasks.yml
@@ -0,0 +1,1308 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+groups:
+ # these groups are just for convenience
+ # makes it easier to submit related tasks
+
+{############################# Packaging tasks ###############################}
+
+ conda:
+ - conda-*
+
+ wheel:
+ - wheel-*
+
+ linux:
+ - almalinux-*
+ - amazon-linux-*
+ - centos-*
+ - debian-*
+ - ubuntu-*
+
+ linux-amd64:
+ - almalinux-*-amd64
+ - amazon-linux-*-amd64
+ - centos-*-amd64
+ - debian-*-amd64
+ - ubuntu-*-amd64
+
+ linux-arm64:
+ - almalinux-*-arm64
+ - centos-*-arm64
+ - debian-*-arm64
+ - ubuntu-*-arm64
+
+ homebrew:
+ - homebrew-*
+
+ packaging:
+ - almalinux-*
+ - amazon-linux-*
+ - centos-*
+ - debian-*
+ - java-jars
+ - nuget
+ - python-sdist
+ - ubuntu-*
+ - wheel-*
+
+{############################# Testing tasks #################################}
+
+ test:
+ - test-*
+
+ cpp:
+ - test-*cpp*
+
+ c-glib:
+ - test-*c-glib*
+
+ python:
+ - test-*python*
+
+ r:
+ - test*-r-*
+ - homebrew-r-autobrew
+ # r-conda tasks
+ - conda-linux-gcc-py*-cpu-r*
+ - conda-osx-clang-py*-r*
+ - conda-win-vs2017-py*-r*
+
+ ruby:
+ - test-*ruby*
+
+ vcpkg:
+ - test-*vcpkg*
+
+ integration:
+ - test-*dask*
+ - test-*hdfs*
+ - test-*kartothek*
+ - test-*pandas*
+ - test-*spark*
+ # TEMP disable because those are failing due to needing upstream fix (ARROW-13594)
+ # - test-*turbodbc*
+
+ example:
+ - example-*
+
+ example-cpp:
+ - example-*cpp*
+
+ verify-rc:
+ - verify-rc-*
+
+ verify-rc-binaries:
+ - verify-rc-binaries-*
+
+ verify-rc-wheels:
+ - verify-rc-wheels-*
+
+ verify-rc-source:
+ - verify-rc-source-*
+
+ verify-rc-source-macos:
+ - verify-rc-source-*-macos-*
+
+ verify-rc-source-linux:
+ - verify-rc-source-*-linux-*
+
+{######################## Tasks to run regularly #############################}
+
+ nightly:
+ - almalinux-*
+ - amazon-linux-*
+ - debian-*
+ - ubuntu-*
+ - centos-*
+ - conda-*
+ - java-jars
+ # List the homebrews explicitly because we don't care about running homebrew-cpp-autobrew
+ - homebrew-cpp
+ - homebrew-r-autobrew
+ - nuget
+ - test-*
+ - example-*
+ - wheel-*
+ - python-sdist
+
+tasks:
+ # arbitrary_task_name:
+ # template: path of jinja2 templated yml
+ # params: optional extra parameters
+ # artifacts: list of regex patterns, each needs to match a single github
+ # release asset, version variable is replaced in the pattern
+ # e.g.:
+ # - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2
+
+{############################## Conda Linux ##################################}
+
+ conda-clean:
+ ci: azure
+ template: conda-recipes/azure.clean.yml
+
+ # Important notes on the conda setup here:
+ #
+ # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in
+ # the same feedstock as the dependency matrix is the same for them as
+ # Python and the OS are the main dimension. The R package `r-arrow` is
+ # an independent feedstock as it doesn't have the Python but the
+ # R dimension. To limit the number of CI jobs, we are building `r-arrow`
+ # for R 4.0 with the Python 3.6 jobs and for R 4.1 with the Python 3.7 jobs.
+ # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically
+ # generated and to be synced regularly from the feedstock. We have no way
+ # yet to generate them inside the arrow repository automatically.
+
+ conda-linux-gcc-py36-cpu-r40:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
+ r_config: linux_64_r_base4.0
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-linux-gcc-py37-cpu-r41:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
+ r_config: linux_64_r_base4.1
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-linux-gcc-py38-cpu:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-linux-gcc-py39-cpu:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+
+{% for python_version, numpy_version in [("3.6", "1.17"),
+ ("3.7", "1.17"),
+ ("3.8", "1.17"),
+ ("3.9", "1.19"),
+ ("3.10", "1.21")] %}
+ {% set pyver = python_version | replace(".", "") %}
+
+ conda-linux-gcc-py{{ pyver }}-cuda:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_64_cuda_compiler_version10.2numpy{{ numpy_version }}python{{ python_version }}.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
+ - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
+
+ conda-linux-gcc-py{{ pyver }}-arm64:
+ ci: azure
+ template: conda-recipes/azure.linux.yml
+ params:
+ config: linux_aarch64_numpy{{ numpy_version }}python{{ python_version }}.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
+
+{% endfor %}
+
+ ############################## Conda OSX ####################################
+
+ conda-osx-clang-py36-r40:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_64_numpy1.17python3.6.____cpython
+ r_config: osx_64_r_base4.0
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-osx-clang-py37-r41:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_64_numpy1.17python3.7.____cpython
+ r_config: osx_64_r_base4.1
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-osx-clang-py38:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_64_numpy1.17python3.8.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-osx-clang-py39:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_64_numpy1.19python3.9.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-osx-arm64-clang-py38:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_arm64_python3.8.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-osx-arm64-clang-py39:
+ ci: azure
+ template: conda-recipes/azure.osx.yml
+ params:
+ config: osx_arm64_python3.9.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ ############################## Conda Windows ################################
+
+ conda-win-vs2017-py36-r40:
+ ci: azure
+ template: conda-recipes/azure.win.yml
+ params:
+ config: win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
+ r_config: win_64_r_base4.0
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-win-vs2017-py37-r41:
+ ci: azure
+ template: conda-recipes/azure.win.yml
+ params:
+ config: win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
+ r_config: win_64_r_base4.1
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-win-vs2017-py38:
+ ci: azure
+ template: conda-recipes/azure.win.yml
+ params:
+ config: win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+ conda-win-vs2017-py39:
+ ci: azure
+ template: conda-recipes/azure.win.yml
+ params:
+ config: win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
+ artifacts:
+ - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+ - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+
+{% for python_version, python_tag, abi_tag in [("3.6", "cp36", "cp36m"),
+ ("3.7", "cp37", "cp37m"),
+ ("3.8", "cp38", "cp38"),
+ ("3.9", "cp39", "cp39"),
+ ("3.10", "cp310", "cp310")] %}
+
+{############################## Wheel Linux ##################################}
+
+{% for ci, arch, arch_alias, x_y, manylinux in [("github", "amd64", "x86_64", "2_12", "2010"),
+ ("github", "amd64", "x86_64", "2_17", "2014"),
+ ("travis", "arm64", "aarch64", "2_17", "2014")] %}
+ wheel-manylinux{{ manylinux }}-{{ python_tag }}-{{ arch }}:
+ ci: {{ ci }}
+ template: python-wheels/{{ ci }}.linux.{{ arch }}.yml
+ params:
+ python_version: "{{ python_version }}"
+ manylinux_version: {{ manylinux }}
+ artifacts:
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-manylinux_{{ x_y }}_{{ arch_alias }}.manylinux{{ manylinux }}_{{ arch_alias }}.whl
+{% endfor %}
+
+{############################## Wheel OSX ####################################}
+
+# enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl
+{% for macos_version, macos_codename, arrow_s3 in [("10.9", "mavericks", "OFF"),
+ ("10.13", "high-sierra", "ON")] %}
+ {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
+
+ wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
+ ci: github
+ template: python-wheels/github.osx.amd64.yml
+ params:
+ vcpkg_version: "2021.04.30"
+ python_version: "{{ python_version }}"
+ macos_deployment_target: {{ macos_version }}
+ arrow_s3: {{ arrow_s3 }}
+ artifacts:
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
+
+{% endfor %}
+
+{############################## Wheel Windows ################################}
+
+ wheel-windows-{{ python_tag }}-amd64:
+ ci: github
+ template: python-wheels/github.windows.yml
+ params:
+ python_version: "{{ python_version }}"
+ artifacts:
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl
+
+{% endfor %}
+
+{############################## Wheel OSX M1 #################################}
+
+ # The python 3.8 universal2 installer has been built with macos deployment
+ # target 11.0, so we cannot build binaries with earlier deployment target
+ # otherwise distutils will raise a deployment target version mismatch error.
+ wheel-macos-big-sur-cp38-arm64:
+ ci: github
+ template: python-wheels/github.osx.arm64.yml
+ params:
+ arch: arm64
+ arrow_simd_level: "DEFAULT"
+ vcpkg_version: "2021.04.30"
+ python_version: "3.8"
+ macos_deployment_target: "11.0"
+
+ artifacts:
+ - pyarrow-{no_rc_version}-cp38-cp38-macosx_11_0_arm64.whl
+
+{% for python_version, python_tag in [("3.9", "cp39"), ("3.10", "cp310")] %}
+ wheel-macos-big-sur-{{ python_tag }}-arm64:
+ ci: github
+ template: python-wheels/github.osx.arm64.yml
+ params:
+ arch: arm64
+ arrow_simd_level: "DEFAULT"
+ vcpkg_version: "2021.04.30"
+ python_version: "{{ python_version }}"
+ macos_deployment_target: "11.0"
+ artifacts:
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+
+ wheel-macos-big-sur-{{ python_tag }}-universal2:
+ ci: github
+ template: python-wheels/github.osx.arm64.yml
+ params:
+ arch: universal2
+ # Universal2 builds for both rosetta and native, but we currently can't
+ # configure SIMD for both architectures at the same time
+ arrow_simd_level: "NONE"
+ vcpkg_version: "2021.04.30"
+ python_version: "{{ python_version }}"
+ macos_deployment_target: "10.13"
+ artifacts:
+ - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_10_13_universal2.whl
+{% endfor %}
+
+{############################ Python sdist ####################################}
+
+ python-sdist:
+ ci: github
+ template: python-sdist/github.yml
+ artifacts:
+ - pyarrow-{no_rc_version}.tar.gz
+
+{############################## Linux PKGS ####################################}
+
+{% for target in ["debian-buster",
+ "debian-bullseye",
+ "debian-bookworm",
+ "ubuntu-bionic",
+ "ubuntu-focal",
+ "ubuntu-hirsute",
+ "ubuntu-impish"] %}
+ {% for architecture in ["amd64", "arm64"] %}
+ {{ target }}-{{ architecture }}:
+ {% if architecture == "amd64" %}
+ ci: github
+ template: linux-packages/github.linux.amd64.yml
+ {% else %}
+ ci: travis
+ template: linux-packages/travis.linux.arm64.yml
+ {% endif %}
+ params:
+ {% if architecture == "amd64" %}
+ target: "{{ target }}"
+ {% else %}
+ target: "{{ target }}-arm64"
+ {% endif %}
+ task_namespace: "apt"
+ upload_extensions:
+ - .ddeb
+ - .deb
+ - .debian.tar.xz
+ - .dsc
+ - .orig.tar.gz
+ artifacts:
+ {% if architecture == "amd64" %}
+ - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+ - apache-arrow-apt-source_{no_rc_version}-1.dsc
+ - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+ - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
+ - apache-arrow_{no_rc_version}-1.debian.tar.xz
+ - apache-arrow_{no_rc_version}-1.dsc
+ - apache-arrow_{no_rc_version}.orig.tar.gz
+ {% endif %}
+ - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - gir1.2-arrow-flight-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dataset-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-dataset-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dataset600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-dataset600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-flight-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-flight-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-flight600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-python-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-python-flight600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-python600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-python600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libgandiva-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libgandiva-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libgandiva600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libgandiva600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libparquet-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libparquet-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libparquet600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libparquet600_{no_rc_version}-1_[a-z0-9]+.deb
+ {% if architecture == "amd64" %}
+ - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-cuda-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-cuda-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libarrow-cuda600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libarrow-cuda600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+ - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+ - libplasma-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libplasma-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+ - libplasma600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - libplasma600_{no_rc_version}-1_[a-z0-9]+.deb
+ - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+ - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
+ {% endif %}
+ {% endfor %}
+{% endfor %}
+
+{% for target in ["almalinux-8",
+ "amazon-linux-2",
+ "centos-7",
+ "centos-8"] %}
+ {% set is_rhel8_based = (target == "almalinux-8" or target == "centos-8") %}
+ {% for architecture in ["amd64", "arm64"] %}
+ {% if not (target in ["amazon-linux-2", "centos-7"] and architecture == "arm64") %}
+ {{ target }}-{{ architecture }}:
+ {% if architecture == "amd64" %}
+ ci: github
+ template: linux-packages/github.linux.amd64.yml
+ {% else %}
+ ci: travis
+ template: linux-packages/travis.linux.arm64.yml
+ {% endif %}
+ params:
+ {% if architecture == "amd64" %}
+ target: "{{ target }}"
+ {% else %}
+ target: "{{ target }}-aarch64"
+ {% endif %}
+ task_namespace: "yum"
+ upload_extensions:
+ - .rpm
+ artifacts:
+ {% if architecture == "amd64" %}
+ - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.noarch.rpm
+ - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.src.rpm
+ {% endif %}
+ - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-dataset-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-dataset-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-dataset-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-dataset-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if target != "amazon-linux-2" %}
+ - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - arrow-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - arrow-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ {% if architecture == "amd64" %}
+ - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm
+ {% endif %}
+ {% if is_rhel8_based and architecture == "amd64" %}
+ - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - gandiva-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - parquet-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - parquet-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - parquet-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - parquet-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - parquet-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - parquet-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - parquet-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - plasma-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - plasma-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - plasma-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - plasma-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% if is_rhel8_based %}
+ - plasma-store-server-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ - plasma-store-server-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+ {% endif %}
+ {% endfor %}
+{% endfor %}
+
+ ############################## Homebrew Tasks ################################
+
+ homebrew-cpp:
+ ci: github
+ template: homebrew-formulae/github.macos.yml
+ params:
+ formula: apache-arrow.rb
+
+ homebrew-cpp-autobrew:
+ ci: github
+ template: homebrew-formulae/github.macos.yml
+ params:
+ formula: autobrew/apache-arrow.rb
+
+ homebrew-r-autobrew:
+ # This tests that the autobrew formula + script work in practice
+ ci: github
+ template: r/github.macos.autobrew.yml
+
+ ############################## Arrow JAR's ##################################
+
+ java-jars:
+ # Build jar's that contains cpp libraries dependencies
+ ci: github
+ template: java-jars/github.yml
+ artifacts:
+ - arrow-algorithm-{no_rc_version}-tests.jar
+ - arrow-algorithm-{no_rc_version}.jar
+ - arrow-algorithm-{no_rc_version}.pom
+ - arrow-avro-{no_rc_version}-tests.jar
+ - arrow-avro-{no_rc_version}.jar
+ - arrow-avro-{no_rc_version}.pom
+ - arrow-c-data-{no_rc_version}-tests.jar
+ - arrow-c-data-{no_rc_version}.jar
+ - arrow-c-data-{no_rc_version}.pom
+ - arrow-compression-{no_rc_version}-tests.jar
+ - arrow-compression-{no_rc_version}.jar
+ - arrow-compression-{no_rc_version}.pom
+ - arrow-dataset-{no_rc_version}-tests.jar
+ - arrow-dataset-{no_rc_version}.jar
+ - arrow-dataset-{no_rc_version}.pom
+ - arrow-format-{no_rc_version}-tests.jar
+ - arrow-format-{no_rc_version}.jar
+ - arrow-format-{no_rc_version}.pom
+ - arrow-gandiva-{no_rc_version}-tests.jar
+ - arrow-gandiva-{no_rc_version}.jar
+ - arrow-gandiva-{no_rc_version}.pom
+ - arrow-java-root-{no_rc_version}.pom
+ - arrow-jdbc-{no_rc_version}-tests.jar
+ - arrow-jdbc-{no_rc_version}.jar
+ - arrow-jdbc-{no_rc_version}.pom
+ - arrow-memory-{no_rc_version}.pom
+ - arrow-memory-core-{no_rc_version}-tests.jar
+ - arrow-memory-core-{no_rc_version}.jar
+ - arrow-memory-core-{no_rc_version}.pom
+ - arrow-memory-netty-{no_rc_version}-tests.jar
+ - arrow-memory-netty-{no_rc_version}.jar
+ - arrow-memory-netty-{no_rc_version}.pom
+ - arrow-memory-unsafe-{no_rc_version}-tests.jar
+ - arrow-memory-unsafe-{no_rc_version}.jar
+ - arrow-memory-unsafe-{no_rc_version}.pom
+ - arrow-orc-{no_rc_version}-tests.jar
+ - arrow-orc-{no_rc_version}.jar
+ - arrow-orc-{no_rc_version}.pom
+ - arrow-performance-{no_rc_version}-tests.jar
+ - arrow-performance-{no_rc_version}.jar
+ - arrow-performance-{no_rc_version}.pom
+ - arrow-plasma-{no_rc_version}-tests.jar
+ - arrow-plasma-{no_rc_version}.jar
+ - arrow-plasma-{no_rc_version}.pom
+ - arrow-tools-{no_rc_version}-jar-with-dependencies.jar
+ - arrow-tools-{no_rc_version}-tests.jar
+ - arrow-tools-{no_rc_version}.jar
+ - arrow-tools-{no_rc_version}.pom
+ - arrow-vector-{no_rc_version}-shade-format-flatbuffers.jar
+ - arrow-vector-{no_rc_version}-tests.jar
+ - arrow-vector-{no_rc_version}.jar
+ - arrow-vector-{no_rc_version}.pom
+ - flight-core-{no_rc_version}-jar-with-dependencies.jar
+ - flight-core-{no_rc_version}-shaded-ext.jar
+ - flight-core-{no_rc_version}-shaded.jar
+ - flight-core-{no_rc_version}-tests.jar
+ - flight-core-{no_rc_version}.jar
+ - flight-core-{no_rc_version}.pom
+ - flight-grpc-{no_rc_version}-tests.jar
+ - flight-grpc-{no_rc_version}.jar
+ - flight-grpc-{no_rc_version}.pom
+
+ ############################## NuGet packages ###############################
+
+ nuget:
+ ci: github
+ template: nuget-packages/github.linux.yml
+ params:
+ run: ubuntu-csharp
+ artifacts:
+ - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.nupkg
+ - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.snupkg
+ - Apache.Arrow.Flight.{no_rc_version}.nupkg
+ - Apache.Arrow.Flight.{no_rc_version}.snupkg
+ - Apache.Arrow.{no_rc_version}.nupkg
+ - Apache.Arrow.{no_rc_version}.snupkg
+
+ ########################### Release verification ############################
+
+{% for target in ["binary", "yum", "apt"] %}
+ verify-rc-binaries-{{ target }}-amd64:
+ ci: github
+ template: verify-rc/github.linux.amd64.yml
+ params:
+ env:
+ TEST_DEFAULT: 0
+ TEST_{{ target|upper }}: 1
+ artifact: "binaries"
+{% endfor %}
+
+{% for platform, arch, runner in [("linux", "amd64", "ubuntu-20.04"),
+ ("macos", "amd64", "macos-10.15")] %}
+ {% for target in ["cpp",
+ "csharp",
+ "go",
+ "integration",
+ "java",
+ "js",
+ "python",
+ "ruby"] %}
+
+ verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
+ ci: github
+ template: verify-rc/github.{{ platform }}.{{ arch }}.yml
+ params:
+ env:
+ INSTALL_NODE: 0
+ TEST_DEFAULT: 0
+ TEST_{{ target|upper }}: 1
+ artifact: "source"
+ github_runner: {{ runner }}
+ {% endfor %}
+{% endfor %}
+
+{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %}
+ {% for target in ["cpp",
+ "csharp",
+ "go",
+ "integration",
+ "js",
+ "python",
+ "ruby"] %}
+
+ verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
+ ci: github
+ template: verify-rc/github.{{ platform }}.{{ arch }}.yml
+ params:
+ env:
+ ARROW_FLIGHT: 0
+ ARROW_GANDIVA: 0
+ INSTALL_NODE: 0
+ TEST_DEFAULT: 0
+ TEST_INTEGRATION_JAVA: 0
+ TEST_{{ target|upper }}: 1
+ artifact: "source"
+ github_runner: {{ runner }}
+ {% endfor %}
+{% endfor %}
+
+ verify-rc-wheels-linux-amd64:
+ ci: github
+ template: verify-rc/github.linux.amd64.yml
+ params:
+ env:
+ TEST_DEFAULT: 0
+ artifact: "wheels"
+
+ verify-rc-wheels-macos-10.15-amd64:
+ ci: github
+ template: verify-rc/github.macos.amd64.yml
+ params:
+ github_runner: "macos-10.15"
+ env:
+ TEST_DEFAULT: 0
+ artifact: "wheels"
+
+ # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available:
+ # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+ verify-rc-wheels-macos-11-amd64:
+ ci: github
+ template: verify-rc/github.macos.arm64.yml
+ params:
+ github_runner: "self-hosted"
+ arch_emulation: "x86_64"
+ env:
+ TEST_DEFAULT: 0
+ artifact: "wheels"
+
+ verify-rc-wheels-macos-11-arm64:
+ ci: github
+ template: verify-rc/github.macos.arm64.yml
+ params:
+ github_runner: "self-hosted"
+ arch_emulation: "arm64"
+ env:
+ TEST_DEFAULT: 0
+ artifact: "wheels"
+
+ verify-rc-source-windows:
+ ci: github
+ template: verify-rc/github.win.yml
+ params:
+ script: "verify-release-candidate.bat"
+
+ verify-rc-wheels-windows:
+ ci: github
+ template: verify-rc/github.win.yml
+ params:
+ script: "verify-release-candidate-wheels.bat"
+
+{############################## Docker tests #################################}
+
+{% for image in ["conda-cpp",
+ "debian-c-glib",
+ "ubuntu-c-glib",
+ "debian-ruby",
+ "ubuntu-ruby"] %}
+ test-{{ image }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ image: {{ image }}
+{% endfor %}
+
+ # Use azure to run valgrind tests to prevent OOM
+ test-conda-cpp-valgrind:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ run: conda-cpp-valgrind
+
+{% for ubuntu_version in ["18.04", "20.04"] %}
+ test-ubuntu-{{ ubuntu_version }}-cpp:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: {{ ubuntu_version }}
+ image: ubuntu-cpp
+{% endfor %}
+
+ test-ubuntu-20.04-cpp-bundled:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 20.04
+ image: ubuntu-cpp-bundled
+
+ test-debian-11-cpp:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ DEBIAN: 11
+ image: debian-cpp
+
+ test-fedora-33-cpp:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ FEDORA: 33
+ image: fedora-cpp
+
+ test-ubuntu-18.04-cpp-release:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 18.04
+ flags: "-e ARROW_BUILD_TYPE=release"
+ image: ubuntu-cpp
+
+ test-ubuntu-18.04-cpp-static:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 18.04
+ flags: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static"
+ image: ubuntu-cpp
+
+{% for cpp_standard in [14, 17] %}
+ test-ubuntu-20.04-cpp-{{ cpp_standard }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 20.04
+ flags: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }}"
+ image: ubuntu-cpp
+{% endfor %}
+
+ test-ubuntu-20.04-cpp-thread-sanitizer:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ # clang-tools and llvm version need to be synchronized so as
+ # to have the right llvm-symbolizer version
+ CLANG_TOOLS: 11
+ LLVM: 11
+ UBUNTU: 20.04
+ image: ubuntu-cpp-thread-sanitizer
+
+{% for python_version in ["3.6", "3.7", "3.8", "3.9", "3.10"] %}
+ test-conda-python-{{ python_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: "{{ python_version }}"
+ image: conda-python
+{% endfor %}
+
+ test-conda-python-3.8-hypothesis:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ HYPOTHESIS_PROFILE: ci
+ PYARROW_TEST_HYPOTHESIS: ON
+ PYTHON: 3.8
+ # limit to execute hypothesis tests only
+ PYTEST_ARGS: "-m hypothesis"
+ image: conda-python-pandas
+
+ test-debian-11-python-3:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ DEBIAN: 11
+ run: debian-python
+
+ test-ubuntu-18.04-python-3:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ UBUNTU: 18.04
+ run: ubuntu-python
+
+ test-fedora-33-python-3:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ FEDORA: 33
+ run: fedora-python
+
+ test-r-linux-valgrind:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ ARROW_R_DEV: "TRUE"
+ UBUNTU: 18.04
+ run: ubuntu-r-valgrind
+
+ test-r-linux-rchk:
+ ci: github
+ template: r/github.linux.rchk.yml
+
+ test-r-linux-as-cran:
+ ci: github
+ template: r/github.linux.cran.yml
+ params:
+ MATRIX: {{ "${{ matrix.r_image }}" }}
+
+ test-r-arrow-backwards-compatibility:
+ ci: github
+ template: r/github.linux.arrow.version.back.compat.yml
+
+ test-r-versions:
+ ci: github
+ template: r/github.linux.versions.yml
+ params:
+ MATRIX: {{ "${{ matrix.r_version }}" }}
+
+ test-r-install-local:
+ ci: github
+ template: r/github.macos-linux.local.yml
+
+ test-r-devdocs:
+ ci: github
+ template: r/github.devdocs.yml
+
+ test-r-depsource-auto:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: rocker
+ r_image: r-base
+ r_tag: latest
+ flags: '-e ARROW_DEPENDENCY_SOURCE=AUTO'
+
+ test-r-depsource-system:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 21.04
+ CLANG_TOOLS: 9 # can remove this when >=9 is the default
+ flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_DEPENDENCY_SOURCE=SYSTEM'
+ image: ubuntu-r-only-r
+
+ test-r-offline-minimal:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: rocker
+ r_image: r-base
+ r_tag: latest
+ flags: '-e TEST_OFFLINE_BUILD=true'
+
+ test-r-offline-maximal:
+ ci: github
+ template: r/github.linux.offline.build.yml
+
+
+{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"),
+ ("rocker", "r-base", "latest"),
+ ("rstudio", "r-base", "3.6-bionic"),
+ ("rstudio", "r-base", "3.6-centos8"),
+ ("rstudio", "r-base", "3.6-opensuse15"),
+ ("rstudio", "r-base", "3.6-opensuse42")] %}
+ test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: {{ r_org }}
+ r_image: {{ r_image }}
+ r_tag: {{ r_tag }}
+{% endfor %}
+
+ # This is with R built with --enable-lto
+ # CRAN also does R CMD INSTALL --use-LTO
+ # which overrides the UseLTO field in r/DESCRIPTION
+ test-r-rhub-debian-gcc-devel-lto-latest:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: rhub
+ r_image: debian-gcc-devel-lto
+ r_tag: latest
+ flags: '-e NOT_CRAN=false -e INSTALL_ARGS=--use-LTO'
+
+ # This one has -flto=auto
+ test-r-ubuntu-21.04:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 21.04
+ CLANG_TOOLS: 9 # can remove this when >=9 is the default
+ flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE'
+ image: ubuntu-r-only-r
+
+ # This also has -flto=auto
+ test-r-gcc-11:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ UBUNTU: 21.04
+ CLANG_TOOLS: 9 # can remove this when >=9 is the default
+ GCC_VERSION: 11
+ # S3 support is not buildable with gcc11 right now
+ flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_S3=OFF'
+ image: ubuntu-r-only-r
+
+ test-r-rstudio-r-base-3.6-centos7-devtoolset-8:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: rstudio
+ r_image: r-base
+ r_tag: 3.6-centos7
+ devtoolset_version: 8
+
+ test-r-minimal-build:
+ ci: azure
+ template: r/azure.linux.yml
+ params:
+ r_org: rocker
+ r_image: r-base
+ r_tag: latest
+ flags: "-e LIBARROW_MINIMAL=TRUE"
+
+ test-ubuntu-18.04-r-sanitizer:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ UBUNTU: 18.04
+ run: ubuntu-r-sanitizer
+
+ revdep-r-check:
+ ci: github
+ template: r/github.linux.revdepcheck.yml
+
+ test-debian-11-go-1.15:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ DEBIAN: 11
+ GO: 1.15
+ run: debian-go
+
+ test-ubuntu-20.10-docs:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ env:
+ UBUNTU: "20.10"
+ run: ubuntu-docs
+
+ test-ubuntu-default-docs:
+ ci: azure
+ template: docker-tests/azure.linux.yml
+ params:
+ run: ubuntu-docs
+
+ ############################## vcpkg tests ##################################
+
+ test-build-vcpkg-win:
+ ci: github
+ template: vcpkg-tests/github.windows.yml
+
+ ############################## Integration tests ############################
+
+{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.6", "0.23", "1.16", True),
+ ("3.7", "0.24", "1.19", True),
+ ("3.7", "latest", "latest", False),
+ ("3.8", "latest", "latest", False),
+ ("3.8", "nightly", "nightly", False),
+ ("3.9", "master", "nightly", False)] %}
+ test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: {{ python_version }}
+ PANDAS: {{ pandas_version }}
+ NUMPY: {{ numpy_version }}
+ {% if cache_leaf %}
+ # use the latest pandas release, so prevent reusing any cached layers
+ flags: --no-leaf-cache
+ {% endif %}
+ image: conda-python-pandas
+{% endfor %}
+
+{% for dask_version in ["latest", "master"] %}
+ test-conda-python-3.9-dask-{{ dask_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: 3.9
+ DASK: {{ dask_version }}
+ # use the latest dask release, so prevent reusing any cached layers
+ flags: --no-leaf-cache
+ image: conda-python-dask
+{% endfor %}
+
+# TEMP disable because those are failing due to needing upstream fix (ARROW-13594)
+# {% for turbodbc_version in ["latest", "master"] %}
+# test-conda-python-3.7-turbodbc-{{ turbodbc_version }}:
+# ci: github
+# template: docker-tests/github.linux.yml
+# params:
+# env:
+# PYTHON: 3.7
+# TURBODBC: {{ turbodbc_version }}
+# # use the latest turbodbc release, so prevent reusing any cached layers
+# flags: --no-leaf-cache
+# image: conda-python-turbodbc
+# {% endfor %}
+
+{% for kartothek_version in ["latest", "master"] %}
+ test-conda-python-3.7-kartothek-{{ kartothek_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: 3.7
+ KARTOTHEK: {{ kartothek_version }}
+ flags: --no-leaf-cache
+ image: conda-python-kartothek
+{% endfor %}
+
+{% for hdfs_version in ["2.9.2", "3.2.1"] %}
+ test-conda-python-3.7-hdfs-{{ hdfs_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: 3.7
+ HDFS: {{ hdfs_version }}
+ image: conda-python-hdfs
+{% endfor %}
+
+{% for python_version, spark_version, test_pyarrow_only in [("3.7", "v3.1.2", "false"),
+ ("3.8", "v3.2.0", "false"),
+ ("3.9", "master", "false")] %}
+ test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ env:
+ PYTHON: {{ python_version }}
+ SPARK: {{ spark_version }}
+ TEST_PYARROW_ONLY: {{ test_pyarrow_only }}
+ # use the branch-3.0 of spark, so prevent reusing any layers
+ flags: --no-leaf-cache
+ image: conda-python-spark
+{% endfor %}
+
+ # Remove the "skipped-" prefix in ARROW-8475
+ skipped-test-conda-cpp-hiveserver2:
+ ci: github
+ template: docker-tests/github.linux.yml
+ params:
+ image: conda-cpp-hiveserver2
+
+{% for kind in ["static", "static-system-dependency"] %}
+ example-cpp-minimal-build-{{ kind }}:
+ ci: github
+ template: cpp-examples/github.linux.yml
+ params:
+ type: minimal_build
+ run: {{ kind }}
+{% endfor %}
diff --git a/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
new file mode 100644
index 000000000..6423720c2
--- /dev/null
+++ b/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
@@ -0,0 +1,86 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@rem Run VsDevCmd.bat to set Visual Studio environment variables for building
+@rem on the command line. This is the path for Visual Studio Enterprise 2019
+
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64
+
+
+@rem Install build dependencies with vcpkg
+
+@rem TODO(ianmcook): change --x-manifest-root to --manifest-root after it
+@rem changes in vcpkg
+
+vcpkg install ^
+ --triplet x64-windows ^
+ --x-manifest-root cpp ^
+ --feature-flags=versions ^
+ --clean-after-build ^
+ || exit /B 1
+
+
+@rem Set environment variables
+
+set ARROW_TEST_DATA=%cd%\testing\data
+set PARQUET_TEST_DATA=%cd%\cpp\submodules\parquet-testing\data
+
+
+@rem Build Arrow C++ library
+
+mkdir cpp\build
+pushd cpp\build
+
+@rem TODO(ianmcook): test using --parallel %NUMBER_OF_PROCESSORS% with
+@rem cmake --build instead of specifying -DARROW_CXXFLAGS="/MP" here
+@rem (see https://gitlab.kitware.com/cmake/cmake/-/issues/20564)
+
+@rem TODO(ianmcook): Add -DARROW_BUILD_BENCHMARKS=ON after the issue described
+@rem at https://github.com/google/benchmark/issues/1046 is resolved
+
+cmake -G "Visual Studio 16 2019" -A x64 ^
+ -DARROW_BOOST_USE_SHARED=ON ^
+ -DARROW_BUILD_SHARED=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=ON ^
+ -DARROW_CXXFLAGS="/MP" ^
+ -DARROW_DATASET=ON ^
+ -DARROW_DEPENDENCY_SOURCE=VCPKG ^
+ -DARROW_FLIGHT=OFF ^
+ -DARROW_MIMALLOC=ON ^
+ -DARROW_PARQUET=ON ^
+ -DARROW_PYTHON=OFF ^
+ -DARROW_WITH_BROTLI=ON ^
+ -DARROW_WITH_BZ2=ON ^
+ -DARROW_WITH_LZ4=ON ^
+ -DARROW_WITH_SNAPPY=ON ^
+ -DARROW_WITH_ZLIB=ON ^
+ -DARROW_WITH_ZSTD=ON ^
+ -DCMAKE_BUILD_TYPE=release ^
+ -DCMAKE_UNITY_BUILD=ON ^
+ .. || exit /B 1
+
+cmake --build . --target INSTALL --config Release || exit /B 1
+
+
+@rem Test Arrow C++ library
+
+ctest --output-on-failure ^
+ --parallel %NUMBER_OF_PROCESSORS% ^
+ --timeout 300 || exit /B 1
+
+popd
diff --git a/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml b/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml
new file mode 100644
index 000000000..ad3e793a6
--- /dev/null
+++ b/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+ push:
+ branches:
+ - "*-github-*"
+
+jobs:
+ test-vcpkg-win:
+ name: Install build deps with vcpkg and build Arrow C++
+ runs-on: windows-2019
+ steps:
+ - name: Checkout Arrow
+ run: |
+ git clone --no-checkout {{ arrow.remote }} arrow
+ git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+ git -C arrow checkout FETCH_HEAD
+ git -C arrow submodule update --init --recursive
+ - name: Remove and Reinstall vcpkg
+ # When running vcpkg in Github Actions on Windows, remove the
+ # preinstalled vcpkg and install the newest version from source.
+ # Versions of vcpkg rapidly stop working until updated, and
+ # the safest and most reliable way to update vcpkg is simply
+ # to remove and reinstall it.
+ shell: cmd
+ run: |
+ CALL vcpkg integrate remove 2>NUL
+ CALL C:
+ CALL cd \
+ CALL rmdir /s /q vcpkg 2>NUL
+ CALL git clone https://github.com/microsoft/vcpkg.git vcpkg
+ CALL cd vcpkg
+ CALL bootstrap-vcpkg.bat -win64 -disableMetrics
+ CALL vcpkg integrate install
+ CALL setx PATH "%PATH%;C:\vcpkg"
+ - name: Install Dependencies with vcpkg and Build Arrow C++
+ shell: cmd
+ run: |
+ CALL cd arrow
+ CALL dev\tasks\vcpkg-tests\cpp-build-vcpkg.bat
diff --git a/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml b/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml
new file mode 100644
index 000000000..8a4613a49
--- /dev/null
+++ b/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ verify:
+ name: "Verify release candidate Ubuntu {{ artifact }}"
+ runs-on: {{ github_runner|default("ubuntu-20.04") }}
+ {% if env is defined %}
+ env:
+ {% for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {% endfor %}
+ {% endif %}
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - name: Install System Dependencies
+ run: |
+ # TODO: don't require removing newer llvms
+ sudo apt-get --purge remove -y llvm-9 clang-9
+ sudo apt-get update -y
+ sudo apt-get install -y \
+ autoconf-archive \
+ binfmt-support \
+ bison \
+ curl \
+ flex \
+ gtk-doc-tools \
+ jq \
+ libboost-all-dev \
+ libgirepository1.0-dev \
+ ninja-build \
+ qemu-user-static \
+ wget
+
+ if [ "$TEST_JAVA" = "1" ]; then
+ # Maven
+ MAVEN_VERSION=3.6.3
+ wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip
+ unzip apache-maven-$MAVEN_VERSION-bin.zip
+ mkdir -p $HOME/java
+ mv apache-maven-$MAVEN_VERSION $HOME/java
+ export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH
+ fi
+
+ if [ "$TEST_RUBY" = "1" ]; then
+ ruby --version
+ sudo gem install bundler
+ fi
+ - uses: actions/setup-node@v2-beta
+ with:
+ node-version: '14'
+ - name: Run verification
+ shell: bash
+ run: |
+ arrow/dev/release/verify-release-candidate.sh \
+ {{ artifact }} \
+ {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml b/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml
new file mode 100644
index 000000000..d39cda382
--- /dev/null
+++ b/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ verify:
+ name: "Verify release candidate macOS {{ artifact }}"
+ runs-on: {{ github_runner|default("macos-latest") }}
+ {% if env is defined %}
+ env:
+ {% for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {% endfor %}
+ {% endif %}
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - name: Install System Dependencies
+ shell: bash
+ run: |
+ brew update
+ brew bundle --file=arrow/cpp/Brewfile
+ brew bundle --file=arrow/c_glib/Brewfile
+ - uses: actions/setup-node@v2-beta
+ with:
+ node-version: '14'
+ - name: Run verification
+ shell: bash
+ run: |
+ arrow/dev/release/verify-release-candidate.sh \
+ {{ artifact }} \
+ {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml b/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml
new file mode 100644
index 000000000..26139ed60
--- /dev/null
+++ b/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ verify:
+ name: "Verify release candidate macOS {{ artifact }}"
+ runs-on: {{ github_runner }}
+ {% if env is defined %}
+ env:
+ {% for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {% endfor %}
+ {% endif %}
+
+ steps:
+ - name: Cleanup
+ shell: bash
+ run: rm -rf arrow
+
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - name: Run verification
+ shell: bash
+ run: |
+ export PATH="$(brew --prefix node@14)/bin:$PATH"
+ export PATH="$(brew --prefix ruby)/bin:$PATH"
+ export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
+ arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \
+ {{ artifact }} \
+ {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/src/arrow/dev/tasks/verify-rc/github.win.yml b/src/arrow/dev/tasks/verify-rc/github.win.yml
new file mode 100644
index 000000000..5406327e8
--- /dev/null
+++ b/src/arrow/dev/tasks/verify-rc/github.win.yml
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+ verify:
+ name: "Verify release candidate Windows source"
+ runs-on: windows-2016
+ {% if env is defined %}
+ env:
+ {% for key, value in env.items() %}
+ {{ key }}: {{ value }}
+ {% endfor %}
+ {% endif %}
+
+ steps:
+ {{ macros.github_checkout_arrow()|indent }}
+
+ - uses: conda-incubator/setup-miniconda@v2
+ - name: Install System Dependencies
+ run: |
+ choco install boost-msvc-14.1
+ choco install wget
+ - name: Run verification
+ shell: cmd
+ run: |
+ cd arrow
+ dev/release/{{ script }} {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/src/arrow/dev/test_merge_arrow_pr.py b/src/arrow/dev/test_merge_arrow_pr.py
new file mode 100644
index 000000000..8fe188350
--- /dev/null
+++ b/src/arrow/dev/test_merge_arrow_pr.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+
+import pytest
+
+import merge_arrow_pr
+
+
+FakeIssue = namedtuple('issue', ['fields'])
+FakeFields = namedtuple('fields', ['status', 'summary', 'assignee',
+ 'components', 'fixVersions'])
+FakeAssignee = namedtuple('assignee', ['displayName'])
+FakeStatus = namedtuple('status', ['name'])
+FakeComponent = namedtuple('component', ['name'])
+FakeVersion = namedtuple('version', ['name', 'raw'])
+
+RAW_VERSION_JSON = [
+ {'name': 'JS-0.4.0', 'released': False},
+ {'name': '0.11.0', 'released': False},
+ {'name': '0.12.0', 'released': False},
+ {'name': '0.10.0', 'released': True},
+ {'name': '0.9.0', 'released': True}
+]
+
+
+SOURCE_VERSIONS = [FakeVersion(raw['name'], raw)
+ for raw in RAW_VERSION_JSON]
+
+TRANSITIONS = [{'name': 'Resolve Issue', 'id': 1}]
+
+jira_id = 'ARROW-1234'
+status = FakeStatus('In Progress')
+fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'),
+ [FakeComponent('C++'), FakeComponent('Format')],
+ [])
+FAKE_ISSUE_1 = FakeIssue(fields)
+
+
+class FakeJIRA:
+
+ def __init__(self, issue=None, project_versions=None, transitions=None,
+ current_fix_versions=None):
+ self._issue = issue
+ self._project_versions = project_versions
+ self._transitions = transitions
+
+ def issue(self, jira_id):
+ return self._issue
+
+ def transitions(self, jira_id):
+ return self._transitions
+
+ def transition_issue(self, jira_id, transition_id, comment=None,
+ fixVersions=None):
+ self.captured_transition = {
+ 'jira_id': jira_id,
+ 'transition_id': transition_id,
+ 'comment': comment,
+ 'fixVersions': fixVersions
+ }
+
+ def get_candidate_fix_versions(self):
+ return SOURCE_VERSIONS, ['0.12.0']
+
+ def project_versions(self, project):
+ return self._project_versions
+
+
+class FakeCLI:
+
+ def __init__(self, responses=()):
+ self.responses = responses
+ self.position = 0
+
+ def prompt(self, prompt):
+ response = self.responses[self.position]
+ self.position += 1
+ return response
+
+ def fail(self, msg):
+ raise Exception(msg)
+
+
+def test_jira_fix_versions():
+ jira = FakeJIRA(project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+ all_versions, default_versions = issue.get_candidate_fix_versions()
+ assert all_versions == SOURCE_VERSIONS
+ assert default_versions == ['0.11.0']
+
+
+def test_jira_no_suggest_patch_release():
+ versions_json = [
+ {'name': '0.11.1', 'released': False},
+ {'name': '0.12.0', 'released': False},
+ ]
+
+ versions = [FakeVersion(raw['name'], raw) for raw in versions_json]
+
+ jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS)
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+ all_versions, default_versions = issue.get_candidate_fix_versions()
+ assert all_versions == versions
+ assert default_versions == ['0.12.0']
+
+
+def test_jira_parquet_no_suggest_non_cpp():
+ # ARROW-7351
+ versions_json = [
+ {'name': 'cpp-1.5.0', 'released': True},
+ {'name': 'cpp-1.6.0', 'released': False},
+ {'name': 'cpp-1.7.0', 'released': False},
+ {'name': '1.11.0', 'released': False},
+ {'name': '1.12.0', 'released': False}
+ ]
+
+ versions = [FakeVersion(raw['name'], raw)
+ for raw in versions_json]
+
+ jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS)
+ issue = merge_arrow_pr.JiraIssue(jira, 'PARQUET-1713', 'PARQUET',
+ FakeCLI())
+ all_versions, default_versions = issue.get_candidate_fix_versions()
+ assert all_versions == versions
+ assert default_versions == ['cpp-1.6.0']
+
+
+def test_jira_invalid_issue():
+ class Mock:
+
+ def issue(self, jira_id):
+ raise Exception("not found")
+
+ with pytest.raises(Exception):
+ merge_arrow_pr.JiraIssue(Mock(), 'ARROW-1234', 'ARROW', FakeCLI())
+
+
+def test_jira_resolve():
+ jira = FakeJIRA(issue=FAKE_ISSUE_1,
+ project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ my_comment = 'my comment'
+ fix_versions = [SOURCE_VERSIONS[1].raw]
+
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+ issue.resolve(fix_versions, my_comment)
+
+ assert jira.captured_transition == {
+ 'jira_id': 'ARROW-1234',
+ 'transition_id': 1,
+ 'comment': my_comment,
+ 'fixVersions': fix_versions
+ }
+
+
+def test_jira_resolve_non_mainline():
+ jira = FakeJIRA(issue=FAKE_ISSUE_1,
+ project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ my_comment = 'my comment'
+ fix_versions = [SOURCE_VERSIONS[0].raw]
+
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+ issue.resolve(fix_versions, my_comment)
+
+ assert jira.captured_transition == {
+ 'jira_id': 'ARROW-1234',
+ 'transition_id': 1,
+ 'comment': my_comment,
+ 'fixVersions': fix_versions
+ }
+
+
+def test_jira_resolve_released_fix_version():
+ # ARROW-5083
+ jira = FakeJIRA(issue=FAKE_ISSUE_1,
+ project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ cmd = FakeCLI(responses=['0.9.0'])
+ fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, jira)
+ assert fix_versions_json == [RAW_VERSION_JSON[-1]]
+
+
+def test_multiple_authors_bad_input():
+ a0 = 'Jimbob Crawfish <jimbob.crawfish@gmail.com>'
+ a1 = 'Jarvis McCratchett <jarvis.mccratchett@hotmail.com>'
+ a2 = 'Hank Miller <hank.miller@protonmail.com>'
+ distinct_authors = [a0, a1]
+
+ cmd = FakeCLI(responses=[''])
+ primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
+ cmd, distinct_authors)
+ assert primary_author == a0
+ assert new_distinct_authors == [a0, a1]
+
+ cmd = FakeCLI(responses=['oops', a1])
+ primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
+ cmd, distinct_authors)
+ assert primary_author == a1
+ assert new_distinct_authors == [a1, a0]
+
+ cmd = FakeCLI(responses=[a2])
+ primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
+ cmd, distinct_authors)
+ assert primary_author == a2
+ assert new_distinct_authors == [a2, a0, a1]
+
+
+def test_jira_already_resolved():
+ status = FakeStatus('Resolved')
+ fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'),
+ [FakeComponent('Java')], [])
+ issue = FakeIssue(fields)
+
+ jira = FakeJIRA(issue=issue,
+ project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ fix_versions = [SOURCE_VERSIONS[0].raw]
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+
+ with pytest.raises(Exception,
+ match="ARROW-1234 already has status 'Resolved'"):
+ issue.resolve(fix_versions, "")
+
+
+def test_no_unset_point_release_fix_version():
+ # ARROW-6915: We have had the problem of issues marked with a point release
+ # having their fix versions overwritten by the merge tool. This verifies
+ # that existing patch release versions are carried over
+ status = FakeStatus('In Progress')
+
+ versions_json = {
+ '0.14.2': {'name': '0.14.2', 'id': 1},
+ '0.15.1': {'name': '0.15.1', 'id': 2},
+ '0.16.0': {'name': '0.16.0', 'id': 3},
+ '0.17.0': {'name': '0.17.0', 'id': 4}
+ }
+
+ fields = FakeFields(status, 'summary', FakeAssignee('someone'),
+ [FakeComponent('Java')],
+ [FakeVersion(v, versions_json[v])
+ for v in ['0.17.0', '0.15.1', '0.14.2']])
+ issue = FakeIssue(fields)
+
+ jira = FakeJIRA(issue=issue, project_versions=SOURCE_VERSIONS,
+ transitions=TRANSITIONS)
+
+ issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI())
+ issue.resolve([versions_json['0.16.0']], "a comment")
+
+ assert jira.captured_transition == {
+ 'jira_id': 'ARROW-1234',
+ 'transition_id': 1,
+ 'comment': 'a comment',
+ 'fixVersions': [versions_json[v]
+ for v in ['0.16.0', '0.15.1', '0.14.2']]
+ }
+
+ issue.resolve([versions_json['0.15.1']], "a comment")
+
+ assert jira.captured_transition == {
+ 'jira_id': 'ARROW-1234',
+ 'transition_id': 1,
+ 'comment': 'a comment',
+ 'fixVersions': [versions_json[v] for v in ['0.15.1', '0.14.2']]
+ }
+
+
+def test_jira_output_no_components():
+ # ARROW-5472
+ status = 'Interesting work'
+ components = []
+ output = merge_arrow_pr.format_jira_output(
+ 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'),
+ components)
+
+ assert output == """=== JIRA ARROW-1234 ===
+Summary\t\tInteresting work
+Assignee\tFoo Bar
+Components\tNO COMPONENTS!!!
+Status\t\tResolved
+URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234"""
+
+ output = merge_arrow_pr.format_jira_output(
+ 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'),
+ [FakeComponent('C++'), FakeComponent('Python')])
+
+ assert output == """=== JIRA ARROW-1234 ===
+Summary\t\tInteresting work
+Assignee\tFoo Bar
+Components\tC++, Python
+Status\t\tResolved
+URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234"""