diff options
Diffstat (limited to 'src/arrow/dev')
386 files changed, 41634 insertions, 0 deletions
diff --git a/src/arrow/dev/.gitignore b/src/arrow/dev/.gitignore new file mode 100644 index 000000000..b0792939f --- /dev/null +++ b/src/arrow/dev/.gitignore @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Python virtual environments for dev tools +.venv*/ + diff --git a/src/arrow/dev/README.md b/src/arrow/dev/README.md new file mode 100644 index 000000000..258792b80 --- /dev/null +++ b/src/arrow/dev/README.md @@ -0,0 +1,189 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> + +# Arrow Developer Scripts + +This directory contains scripts useful to developers when packaging, +testing, or committing to Arrow. + +Merging a pull request requires being a committer on the project. In addition +you need to have linked your GitHub and ASF accounts on +https://gitbox.apache.org/setup/ to be able to push to GitHub as the main +remote. + +NOTE: It may take some time (a few hours) between when you complete +the setup at GitBox, and when your GitHub account will be added as a +committer. + +## How to merge a Pull request + +Please don't merge PRs using the Github Web interface. Instead, set up +your git clone such as to have a remote named ``apache`` pointing to the +official Arrow repository: +``` +git remote add apache git@github.com:apache/arrow.git +``` + +and then run the following command: +``` +./dev/merge_arrow_pr.sh +``` + +This creates a new Python virtual environment under `dev/.venv[PY_VERSION]` +and installs all the necessary dependencies to run the Arrow merge script. +After installed, it runs the merge script. + +(we don't provide a wrapper script for Windows yet, so under Windows you'll +have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py` +directly) + +The merge script uses the GitHub REST API; if you encounter rate limit issues, +you may set a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal +Access Token. + +You can specify the username and the password of your JIRA account in +`APACHE_JIRA_USERNAME` and `APACHE_JIRA_PASSWORD` environment variables. +If these aren't supplied, the script will ask you the values of them. + +Note that the directory name of your Arrow git clone must be called `arrow`. + +example output: +``` +Which pull request would you like to merge? (e.g. 34): +``` +Type the pull request number (from https://github.com/apache/arrow/pulls) and hit enter. +``` +=== Pull Request #X === +title Blah Blah Blah +source repo/branch +target master +url https://api.github.com/repos/apache/arrow/pulls/X + +Proceed with merging pull request #3? (y/n): +``` +If this looks good, type y and hit enter. +``` +From git-wip-us.apache.org:/repos/asf/arrow.git + * [new branch] master -> PR_TOOL_MERGE_PR_3_MASTER +Switched to branch 'PR_TOOL_MERGE_PR_3_MASTER' + +Merge complete (local ref PR_TOOL_MERGE_PR_3_MASTER). Push to apache? (y/n): +``` +A local branch with the merge has been created. +type y and hit enter to push it to apache master +``` +Counting objects: 67, done. +Delta compression using up to 4 threads. +Compressing objects: 100% (26/26), done. +Writing objects: 100% (36/36), 5.32 KiB, done. +Total 36 (delta 17), reused 0 (delta 0) +To git-wip-us.apache.org:/repos/arrow-mr.git + b767ac4..485658a PR_TOOL_MERGE_PR_X_MASTER -> master +Restoring head pointer to b767ac4e +Note: checking out 'b767ac4e'. + +You are in 'detached HEAD' state. You can look around, make experimental +changes and commit them, and you can discard any commits you make in this +state without impacting any branches by performing another checkout. + +If you want to create a new branch to retain commits you create, you may +do so (now or later) by using -b with the checkout command again. Example: + + git checkout -b new_branch_name + +HEAD is now at b767ac4... Update README.md +Deleting local branch PR_TOOL_MERGE_PR_X +Deleting local branch PR_TOOL_MERGE_PR_X_MASTER +Pull request #X merged! +Merge hash: 485658a5 + +Would you like to pick 485658a5 into another branch? (y/n): +``` +For now just say n as we have 1 branch + +## Verifying Release Candidates + +We have provided a script to assist with verifying release candidates: + +```shell +bash dev/release/verify-release-candidate.sh 0.7.0 0 +``` + +Currently this only works on Linux (patches to expand to macOS welcome!). Read +the script for information about system dependencies. + +On Windows, we have a script that verifies C++ and Python (requires Visual +Studio 2015): + +``` +dev/release/verify-release-candidate.bat apache-arrow-0.7.0.tar.gz +``` + +### Verifying the JavaScript release + +For JavaScript-specific releases, use a different verification script: + +```shell +bash dev/release/js-verify-release-candidate.sh 0.7.0 0 +``` + +# Integration testing + +Build the following base image used by multiple tests: + +```shell +docker build -t arrow_integration_xenial_base -f docker_common/Dockerfile.xenial.base . +``` + +## HDFS C++ / Python support + +```shell +docker-compose build conda-cpp +docker-compose build conda-python +docker-compose build conda-python-hdfs +docker-compose run --rm conda-python-hdfs +``` + +## Apache Spark Integration Tests + +Tests can be run to ensure that the current snapshot of Java and Python Arrow +works with Spark. This will run a docker image to build Arrow C++ +and Python in a Conda environment, build and install Arrow Java to the local +Maven repository, build Spark with the new Arrow artifact, and run Arrow +related unit tests in Spark for Java and Python. Any errors will exit with a +non-zero value. To run, use the following command: + +```shell +docker-compose build conda-cpp +docker-compose build conda-python +docker-compose build conda-python-spark +docker-compose run --rm conda-python-spark +``` + +If you already are building Spark, these commands will map your local Maven +repo to the image and save time by not having to download all dependencies. +Be aware, that docker write files as root, which can cause problems for maven +on the host. + +```shell +docker-compose run --rm -v $HOME/.m2:/root/.m2 conda-python-spark +``` + +NOTE: If the Java API has breaking changes, a patched version of Spark might +need to be used to successfully build. diff --git a/src/arrow/dev/archery/MANIFEST.in b/src/arrow/dev/archery/MANIFEST.in new file mode 100644 index 000000000..90fe034c2 --- /dev/null +++ b/src/arrow/dev/archery/MANIFEST.in @@ -0,0 +1,4 @@ +include ../../LICENSE.txt +include ../../NOTICE.txt + +include archery/reports/* diff --git a/src/arrow/dev/archery/README.md b/src/arrow/dev/archery/README.md new file mode 100644 index 000000000..eff654416 --- /dev/null +++ b/src/arrow/dev/archery/README.md @@ -0,0 +1,49 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> + +# Developing with Archery + +Archery is documented on the Arrow website: + +* [Daily development using Archery](https://arrow.apache.org/docs/developers/archery.html) +* [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/crossbow.html) +* [Using Archer and Docker](https://arrow.apache.org/docs/developers/docker.html) + +# Installing Archery + +See the pages linked aboved for more details. As a general overview, Archery +comes in a number of subpackages, each needing to be installed if you want +to use the functionality of it: + +* lint – lint (and in some cases auto-format) code in the Arrow repo + To install: `pip install -e "arrow/dev/archery[lint]"` +* benchmark – to run Arrow benchmarks using Archery + To install: `pip install -e "arrow/dev/archery[benchmark]"` +* docker – to run docker-compose based tasks more easily + To install: `pip install -e "arrow/dev/archery[docker]"` +* release – release related helpers + To install: `pip install -e "arrow/dev/archery[release]"` +* crossbow – to trigger + interact with the crossbow build system + To install: `pip install -e "arrow/dev/archery[crossbow]"` +* crossbow-upload + To install: `pip install -e "arrow/dev/archery[crossbow-upload]"` + +Additionally, if you would prefer to install everything at once, +`pip install -e "arrow/dev/archery[all]"` is an alias for all of +the above subpackages.
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/__init__.py b/src/arrow/dev/archery/archery/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/src/arrow/dev/archery/archery/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/src/arrow/dev/archery/archery/benchmark/__init__.py b/src/arrow/dev/archery/archery/benchmark/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/src/arrow/dev/archery/archery/benchmark/codec.py b/src/arrow/dev/archery/archery/benchmark/codec.py new file mode 100644 index 000000000..4157890d1 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/codec.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import json + +from ..benchmark.core import Benchmark, BenchmarkSuite +from ..benchmark.runner import BenchmarkRunner, StaticBenchmarkRunner +from ..benchmark.compare import BenchmarkComparator + + +class JsonEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, Benchmark): + return BenchmarkCodec.encode(o) + + if isinstance(o, BenchmarkSuite): + return BenchmarkSuiteCodec.encode(o) + + if isinstance(o, BenchmarkRunner): + return BenchmarkRunnerCodec.encode(o) + + if isinstance(o, BenchmarkComparator): + return BenchmarkComparatorCodec.encode(o) + + return json.JSONEncoder.default(self, o) + + +class BenchmarkCodec: + @staticmethod + def encode(b): + return { + "name": b.name, + "unit": b.unit, + "less_is_better": b.less_is_better, + "values": b.values, + "time_unit": b.time_unit, + "times": b.times, + "counters": b.counters, + } + + @staticmethod + def decode(dct, **kwargs): + return Benchmark(**dct, **kwargs) + + +class BenchmarkSuiteCodec: + @staticmethod + def encode(bs): + return { + "name": bs.name, + "benchmarks": [BenchmarkCodec.encode(b) for b in bs.benchmarks] + } + + @staticmethod + def decode(dct, **kwargs): + benchmarks = [BenchmarkCodec.decode(b) + for b in dct.pop("benchmarks", [])] + return BenchmarkSuite(benchmarks=benchmarks, **dct, **kwargs) + + +class BenchmarkRunnerCodec: + @staticmethod + def encode(br): + return {"suites": [BenchmarkSuiteCodec.encode(s) for s in br.suites]} + + @staticmethod + def decode(dct, **kwargs): + suites = [BenchmarkSuiteCodec.decode(s) + for s in dct.pop("suites", [])] + return StaticBenchmarkRunner(suites=suites, **dct, **kwargs) + + +class BenchmarkComparatorCodec: + @staticmethod + def encode(bc): + comparator = bc.formatted + + suite_name = bc.suite_name + if suite_name: + comparator["suite"] = suite_name + + return comparator diff --git a/src/arrow/dev/archery/archery/benchmark/compare.py b/src/arrow/dev/archery/archery/benchmark/compare.py new file mode 100644 index 000000000..622b80179 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/compare.py @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# Define a global regression threshold as 5%. This is purely subjective and +# flawed. This does not track cumulative regression. +DEFAULT_THRESHOLD = 0.05 + + +def items_per_seconds_fmt(value): + if value < 1000: + return "{} items/sec".format(value) + if value < 1000**2: + return "{:.3f}K items/sec".format(value / 1000) + if value < 1000**3: + return "{:.3f}M items/sec".format(value / 1000**2) + else: + return "{:.3f}G items/sec".format(value / 1000**3) + + +def bytes_per_seconds_fmt(value): + if value < 1024: + return "{} bytes/sec".format(value) + if value < 1024**2: + return "{:.3f} KiB/sec".format(value / 1024) + if value < 1024**3: + return "{:.3f} MiB/sec".format(value / 1024**2) + if value < 1024**4: + return "{:.3f} GiB/sec".format(value / 1024**3) + else: + return "{:.3f} TiB/sec".format(value / 1024**4) + + +def change_fmt(value): + return "{:.3%}".format(value) + + +def formatter_for_unit(unit): + if unit == "bytes_per_second": + return bytes_per_seconds_fmt + elif unit == "items_per_second": + return items_per_seconds_fmt + else: + return lambda x: x + + +class BenchmarkComparator: + """ Compares two benchmarks. + + Encodes the logic of comparing two benchmarks and taking a decision on + if it induce a regression. + """ + + def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD, + suite_name=None): + self.contender = contender + self.baseline = baseline + self.threshold = threshold + self.suite_name = suite_name + + @property + def name(self): + return self.baseline.name + + @property + def less_is_better(self): + return self.baseline.less_is_better + + @property + def unit(self): + return self.baseline.unit + + @property + def change(self): + new = self.contender.value + old = self.baseline.value + + if old == 0 and new == 0: + return 0.0 + if old == 0: + return 0.0 + + return float(new - old) / abs(old) + + @property + def confidence(self): + """ Indicate if a comparison of benchmarks should be trusted. """ + return True + + @property + def regression(self): + change = self.change + adjusted_change = change if self.less_is_better else -change + return (self.confidence and adjusted_change > self.threshold) + + @property + def formatted(self): + fmt = formatter_for_unit(self.unit) + return { + "benchmark": self.name, + "change": change_fmt(self.change), + "regression": self.regression, + "baseline": fmt(self.baseline.value), + "contender": fmt(self.contender.value), + "unit": self.unit, + "less_is_better": self.less_is_better, + "counters": str(self.baseline.counters) + } + + def compare(self, comparator=None): + return { + "benchmark": self.name, + "change": self.change, + "regression": self.regression, + "baseline": self.baseline.value, + "contender": self.contender.value, + "unit": self.unit, + "less_is_better": self.less_is_better, + "counters": self.baseline.counters + } + + def __call__(self, **kwargs): + return self.compare(**kwargs) + + +def pairwise_compare(contender, baseline): + dict_contender = {e.name: e for e in contender} + dict_baseline = {e.name: e for e in baseline} + + for name in (dict_contender.keys() & dict_baseline.keys()): + yield name, (dict_contender[name], dict_baseline[name]) + + +class RunnerComparator: + """ Compares suites/benchmarks from runners. + + It is up to the caller that ensure that runners are compatible (both from + the same language implementation). + """ + + def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD): + self.contender = contender + self.baseline = baseline + self.threshold = threshold + + @property + def comparisons(self): + contender = self.contender.suites + baseline = self.baseline.suites + suites = pairwise_compare(contender, baseline) + + for suite_name, (suite_cont, suite_base) in suites: + benchmarks = pairwise_compare( + suite_cont.benchmarks, suite_base.benchmarks) + + for _, (bench_cont, bench_base) in benchmarks: + yield BenchmarkComparator(bench_cont, bench_base, + threshold=self.threshold, + suite_name=suite_name) diff --git a/src/arrow/dev/archery/archery/benchmark/core.py b/src/arrow/dev/archery/archery/benchmark/core.py new file mode 100644 index 000000000..5a92271a3 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/core.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def median(values): + n = len(values) + if n == 0: + raise ValueError("median requires at least one value") + elif n % 2 == 0: + return (values[(n // 2) - 1] + values[n // 2]) / 2 + else: + return values[n // 2] + + +class Benchmark: + def __init__(self, name, unit, less_is_better, values, time_unit, + times, counters=None): + self.name = name + self.unit = unit + self.less_is_better = less_is_better + self.values = sorted(values) + self.time_unit = time_unit + self.times = sorted(times) + self.median = median(self.values) + self.counters = counters or {} + + @property + def value(self): + return self.median + + def __repr__(self): + return "Benchmark[name={},value={}]".format(self.name, self.value) + + +class BenchmarkSuite: + def __init__(self, name, benchmarks): + self.name = name + self.benchmarks = benchmarks + + def __repr__(self): + return "BenchmarkSuite[name={}, benchmarks={}]".format( + self.name, self.benchmarks + ) diff --git a/src/arrow/dev/archery/archery/benchmark/google.py b/src/arrow/dev/archery/archery/benchmark/google.py new file mode 100644 index 000000000..ebcc52636 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/google.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from itertools import filterfalse, groupby, tee +import json +import subprocess +from tempfile import NamedTemporaryFile + +from .core import Benchmark +from ..utils.command import Command + + +def partition(pred, iterable): + # adapted from python's examples + t1, t2 = tee(iterable) + return list(filter(pred, t1)), list(filterfalse(pred, t2)) + + +class GoogleBenchmarkCommand(Command): + """ Run a google benchmark binary. + + This assumes the binary supports the standard command line options, + notably `--benchmark_filter`, `--benchmark_format`, etc... + """ + + def __init__(self, benchmark_bin, benchmark_filter=None): + self.bin = benchmark_bin + self.benchmark_filter = benchmark_filter + + def list_benchmarks(self): + argv = ["--benchmark_list_tests"] + if self.benchmark_filter: + argv.append("--benchmark_filter={}".format(self.benchmark_filter)) + result = self.run(*argv, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return str.splitlines(result.stdout.decode("utf-8")) + + def results(self, repetitions=1): + with NamedTemporaryFile() as out: + argv = ["--benchmark_repetitions={}".format(repetitions), + "--benchmark_out={}".format(out.name), + "--benchmark_out_format=json"] + + if self.benchmark_filter: + argv.append( + "--benchmark_filter={}".format(self.benchmark_filter) + ) + + self.run(*argv, check=True) + return json.load(out) + + +class GoogleBenchmarkObservation: + """ Represents one run of a single (google c++) benchmark. + + Aggregates are reported by Google Benchmark executables alongside + other observations whenever repetitions are specified (with + `--benchmark_repetitions` on the bare benchmark, or with the + archery option `--repetitions`). Aggregate observations are not + included in `GoogleBenchmark.runs`. + + RegressionSumKernel/32768/0 1 us 1 us 25.8077GB/s + RegressionSumKernel/32768/0 1 us 1 us 25.7066GB/s + RegressionSumKernel/32768/0 1 us 1 us 25.1481GB/s + RegressionSumKernel/32768/0 1 us 1 us 25.846GB/s + RegressionSumKernel/32768/0 1 us 1 us 25.6453GB/s + RegressionSumKernel/32768/0_mean 1 us 1 us 25.6307GB/s + RegressionSumKernel/32768/0_median 1 us 1 us 25.7066GB/s + RegressionSumKernel/32768/0_stddev 0 us 0 us 288.046MB/s + """ + + def __init__(self, name, real_time, cpu_time, time_unit, run_type, + size=None, bytes_per_second=None, items_per_second=None, + **counters): + self._name = name + self.real_time = real_time + self.cpu_time = cpu_time + self.time_unit = time_unit + self.run_type = run_type + self.size = size + self.bytes_per_second = bytes_per_second + self.items_per_second = items_per_second + self.counters = counters + + @property + def is_aggregate(self): + """ Indicate if the observation is a run or an aggregate. """ + return self.run_type == "aggregate" + + @property + def is_realtime(self): + """ Indicate if the preferred value is realtime instead of cputime. """ + return self.name.find("/real_time") != -1 + + @property + def name(self): + name = self._name + return name.rsplit("_", maxsplit=1)[0] if self.is_aggregate else name + + @property + def time(self): + return self.real_time if self.is_realtime else self.cpu_time + + @property + def value(self): + """ Return the benchmark value.""" + return self.bytes_per_second or self.items_per_second or self.time + + @property + def unit(self): + if self.bytes_per_second: + return "bytes_per_second" + elif self.items_per_second: + return "items_per_second" + else: + return self.time_unit + + def __repr__(self): + return str(self.value) + + +class GoogleBenchmark(Benchmark): + """ A set of GoogleBenchmarkObservations. """ + + def __init__(self, name, runs): + """ Initialize a GoogleBenchmark. + + Parameters + ---------- + name: str + Name of the benchmark + runs: list(GoogleBenchmarkObservation) + Repetitions of GoogleBenchmarkObservation run. + + """ + self.name = name + # exclude google benchmark aggregate artifacts + _, runs = partition(lambda b: b.is_aggregate, runs) + self.runs = sorted(runs, key=lambda b: b.value) + unit = self.runs[0].unit + time_unit = self.runs[0].time_unit + less_is_better = not unit.endswith("per_second") + values = [b.value for b in self.runs] + times = [b.real_time for b in self.runs] + # Slight kludge to extract the UserCounters for each benchmark + counters = self.runs[0].counters + super().__init__(name, unit, less_is_better, values, time_unit, times, + counters) + + def __repr__(self): + return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs) + + @classmethod + def from_json(cls, payload): + def group_key(x): + return x.name + + benchmarks = map(lambda x: GoogleBenchmarkObservation(**x), payload) + groups = groupby(sorted(benchmarks, key=group_key), group_key) + return [cls(k, list(bs)) for k, bs in groups] diff --git a/src/arrow/dev/archery/archery/benchmark/jmh.py b/src/arrow/dev/archery/archery/benchmark/jmh.py new file mode 100644 index 000000000..f531b6de1 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/jmh.py @@ -0,0 +1,201 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from itertools import filterfalse, groupby, tee +import json +import subprocess +from tempfile import NamedTemporaryFile + +from .core import Benchmark +from ..utils.command import Command +from ..utils.maven import Maven + + +def partition(pred, iterable): + # adapted from python's examples + t1, t2 = tee(iterable) + return list(filter(pred, t1)), list(filterfalse(pred, t2)) + + +class JavaMicrobenchmarkHarnessCommand(Command): + """ Run a Java Micro Benchmark Harness + + This assumes the binary supports the standard command line options, + notably `-Dbenchmark_filter` + """ + + def __init__(self, build, benchmark_filter=None): + self.benchmark_filter = benchmark_filter + self.build = build + self.maven = Maven() + + """ Extract benchmark names from output between "Benchmarks:" and "[INFO]". + Assume the following output: + ... + Benchmarks: + org.apache.arrow.vector.IntBenchmarks.setIntDirectly + ... + org.apache.arrow.vector.IntBenchmarks.setWithValueHolder + org.apache.arrow.vector.IntBenchmarks.setWithWriter + ... + [INFO] + """ + + def list_benchmarks(self): + argv = [] + if self.benchmark_filter: + argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter)) + result = self.build.list( + *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + lists = [] + benchmarks = False + for line in str.splitlines(result.stdout.decode("utf-8")): + if not benchmarks: + if line.startswith("Benchmarks:"): + benchmarks = True + else: + if line.startswith("org.apache.arrow"): + lists.append(line) + if line.startswith("[INFO]"): + break + return lists + + def results(self, repetitions): + with NamedTemporaryFile(suffix=".json") as out: + argv = ["-Dbenchmark.runs={}".format(repetitions), + "-Dbenchmark.resultfile={}".format(out.name), + "-Dbenchmark.resultformat=json"] + if self.benchmark_filter: + argv.append( + "-Dbenchmark.filter={}".format(self.benchmark_filter) + ) + + self.build.benchmark(*argv, check=True) + return json.load(out) + + +class JavaMicrobenchmarkHarnessObservation: + """ Represents one run of a single Java Microbenchmark Harness + """ + + def __init__(self, benchmark, primaryMetric, + forks, warmupIterations, measurementIterations, **counters): + self.name = benchmark + self.primaryMetric = primaryMetric + self.score = primaryMetric["score"] + self.score_unit = primaryMetric["scoreUnit"] + self.forks = forks + self.warmups = warmupIterations + self.runs = measurementIterations + self.counters = { + "mode": counters["mode"], + "threads": counters["threads"], + "warmups": warmupIterations, + "warmupTime": counters["warmupTime"], + "measurements": measurementIterations, + "measurementTime": counters["measurementTime"], + "jvmArgs": counters["jvmArgs"] + } + self.reciprocal_value = True if self.score_unit.endswith( + "/op") else False + if self.score_unit.startswith("ops/"): + idx = self.score_unit.find("/") + self.normalizePerSec(self.score_unit[idx+1:]) + elif self.score_unit.endswith("/op"): + idx = self.score_unit.find("/") + self.normalizePerSec(self.score_unit[:idx]) + else: + self.normalizeFactor = 1 + + @property + def value(self): + """ Return the benchmark value.""" + val = 1 / self.score if self.reciprocal_value else self.score + return val * self.normalizeFactor + + def normalizePerSec(self, unit): + if unit == "ns": + self.normalizeFactor = 1000 * 1000 * 1000 + elif unit == "us": + self.normalizeFactor = 1000 * 1000 + elif unit == "ms": + self.normalizeFactor = 1000 + elif unit == "min": + self.normalizeFactor = 1 / 60 + elif unit == "hr": + self.normalizeFactor = 1 / (60 * 60) + elif unit == "day": + self.normalizeFactor = 1 / (60 * 60 * 24) + else: + self.normalizeFactor = 1 + + @property + def unit(self): + if self.score_unit.startswith("ops/"): + return "items_per_second" + elif self.score_unit.endswith("/op"): + return "items_per_second" + else: + return "?" + + def __repr__(self): + return str(self.value) + + +class JavaMicrobenchmarkHarness(Benchmark): + """ A set of JavaMicrobenchmarkHarnessObservations. """ + + def __init__(self, name, runs): + """ Initialize a JavaMicrobenchmarkHarness. + + Parameters + ---------- + name: str + Name of the benchmark + forks: int + warmups: int + runs: int + runs: list(JavaMicrobenchmarkHarnessObservation) + Repetitions of JavaMicrobenchmarkHarnessObservation run. + + """ + self.name = name + self.runs = sorted(runs, key=lambda b: b.value) + unit = self.runs[0].unit + time_unit = "N/A" + less_is_better = not unit.endswith("per_second") + values = [b.value for b in self.runs] + times = [] + # Slight kludge to extract the UserCounters for each benchmark + counters = self.runs[0].counters + super().__init__(name, unit, less_is_better, values, time_unit, times, + counters) + + def __repr__(self): + return "JavaMicrobenchmark[name={},runs={}]".format( + self.name, self.runs) + + @classmethod + def from_json(cls, payload): + def group_key(x): + return x.name + + benchmarks = map( + lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload) + groups = groupby(sorted(benchmarks, key=group_key), group_key) + return [cls(k, list(bs)) for k, bs in groups] diff --git a/src/arrow/dev/archery/archery/benchmark/runner.py b/src/arrow/dev/archery/archery/benchmark/runner.py new file mode 100644 index 000000000..fc6d354b1 --- /dev/null +++ b/src/arrow/dev/archery/archery/benchmark/runner.py @@ -0,0 +1,313 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import glob +import json +import os +import re + +from .core import BenchmarkSuite +from .google import GoogleBenchmarkCommand, GoogleBenchmark +from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness +from ..lang.cpp import CppCMakeDefinition, CppConfiguration +from ..lang.java import JavaMavenDefinition, JavaConfiguration +from ..utils.cmake import CMakeBuild +from ..utils.maven import MavenBuild +from ..utils.logger import logger + + +def regex_filter(re_expr): + if re_expr is None: + return lambda s: True + re_comp = re.compile(re_expr) + return lambda s: re_comp.search(s) + + +DEFAULT_REPETITIONS = 1 + + +class BenchmarkRunner: + def __init__(self, suite_filter=None, benchmark_filter=None, + repetitions=DEFAULT_REPETITIONS): + self.suite_filter = suite_filter + self.benchmark_filter = benchmark_filter + self.repetitions = repetitions + + @property + def suites(self): + raise NotImplementedError("BenchmarkRunner must implement suites") + + @staticmethod + def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): + raise NotImplementedError( + "BenchmarkRunner must implement from_rev_or_path") + + +class StaticBenchmarkRunner(BenchmarkRunner): + """ Run suites from a (static) set of suites. """ + + def __init__(self, suites, **kwargs): + self._suites = suites + super().__init__(**kwargs) + + @property + def list_benchmarks(self): + for suite in self._suites: + for benchmark in suite.benchmarks: + yield "{}.{}".format(suite.name, benchmark.name) + + @property + def suites(self): + suite_fn = regex_filter(self.suite_filter) + benchmark_fn = regex_filter(self.benchmark_filter) + + for suite in (s for s in self._suites if suite_fn(s.name)): + benchmarks = [b for b in suite.benchmarks if benchmark_fn(b.name)] + yield BenchmarkSuite(suite.name, benchmarks) + + @classmethod + def is_json_result(cls, path_or_str): + builder = None + try: + builder = cls.from_json(path_or_str) + except BaseException: + pass + + return builder is not None + + @staticmethod + def from_json(path_or_str, **kwargs): + # .codec imported here to break recursive imports + from .codec import BenchmarkRunnerCodec + if os.path.isfile(path_or_str): + with open(path_or_str) as f: + loaded = json.load(f) + else: + loaded = json.loads(path_or_str) + return BenchmarkRunnerCodec.decode(loaded, **kwargs) + + def __repr__(self): + return "BenchmarkRunner[suites={}]".format(list(self.suites)) + + +class CppBenchmarkRunner(BenchmarkRunner): + """ Run suites from a CMakeBuild. """ + + def __init__(self, build, **kwargs): + """ Initialize a CppBenchmarkRunner. """ + self.build = build + super().__init__(**kwargs) + + @staticmethod + def default_configuration(**kwargs): + """ Returns the default benchmark configuration. """ + return CppConfiguration( + build_type="release", with_tests=False, with_benchmarks=True, + with_compute=True, + with_csv=True, + with_dataset=True, + with_json=True, + with_parquet=True, + with_python=False, + with_brotli=True, + with_bz2=True, + with_lz4=True, + with_snappy=True, + with_zlib=True, + with_zstd=True, + **kwargs) + + @property + def suites_binaries(self): + """ Returns a list of benchmark binaries for this build. """ + # Ensure build is up-to-date to run benchmarks + self.build() + # Not the best method, but works for now + glob_expr = os.path.join(self.build.binaries_dir, "*-benchmark") + return {os.path.basename(b): b for b in glob.glob(glob_expr)} + + def suite(self, name, suite_bin): + """ Returns the resulting benchmarks for a given suite. """ + suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter) + + # Ensure there will be data + benchmark_names = suite_cmd.list_benchmarks() + if not benchmark_names: + return None + + results = suite_cmd.results(repetitions=self.repetitions) + benchmarks = GoogleBenchmark.from_json(results.get("benchmarks")) + return BenchmarkSuite(name, benchmarks) + + @property + def list_benchmarks(self): + for suite_name, suite_bin in self.suites_binaries.items(): + suite_cmd = GoogleBenchmarkCommand(suite_bin) + for benchmark_name in suite_cmd.list_benchmarks(): + yield "{}.{}".format(suite_name, benchmark_name) + + @property + def suites(self): + """ Returns all suite for a runner. """ + suite_matcher = regex_filter(self.suite_filter) + + suite_and_binaries = self.suites_binaries + for suite_name in suite_and_binaries: + if not suite_matcher(suite_name): + logger.debug("Ignoring suite {}".format(suite_name)) + continue + + suite_bin = suite_and_binaries[suite_name] + suite = self.suite(suite_name, suite_bin) + + # Filter may exclude all benchmarks + if not suite: + logger.debug("Suite {} executed but no results" + .format(suite_name)) + continue + + yield suite + + @staticmethod + def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): + """ Returns a BenchmarkRunner from a path or a git revision. + + First, it checks if `rev_or_path` is a valid path (or string) of a json + object that can deserialize to a BenchmarkRunner. If so, it initialize + a StaticBenchmarkRunner from it. This allows memoizing the result of a + run in a file or a string. + + Second, it checks if `rev_or_path` points to a valid CMake build + directory. If so, it creates a CppBenchmarkRunner with this existing + CMakeBuild. + + Otherwise, it assumes `rev_or_path` is a revision and clone/checkout + the given revision and create a fresh CMakeBuild. + """ + build = None + if StaticBenchmarkRunner.is_json_result(rev_or_path): + return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) + elif CMakeBuild.is_build_dir(rev_or_path): + build = CMakeBuild.from_path(rev_or_path) + return CppBenchmarkRunner(build, **kwargs) + else: + # Revisions can references remote via the `/` character, ensure + # that the revision is path friendly + path_rev = rev_or_path.replace("/", "_") + root_rev = os.path.join(root, path_rev) + os.mkdir(root_rev) + + clone_dir = os.path.join(root_rev, "arrow") + # Possibly checkout the sources at given revision, no need to + # perform cleanup on cloned repository as root_rev is reclaimed. + src_rev, _ = src.at_revision(rev_or_path, clone_dir) + cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) + build_dir = os.path.join(root_rev, "build") + return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) + + +class JavaBenchmarkRunner(BenchmarkRunner): + """ Run suites for Java. """ + + # default repetitions is 5 for Java microbenchmark harness + def __init__(self, build, **kwargs): + """ Initialize a JavaBenchmarkRunner. """ + self.build = build + super().__init__(**kwargs) + + @staticmethod + def default_configuration(**kwargs): + """ Returns the default benchmark configuration. """ + return JavaConfiguration(**kwargs) + + def suite(self, name): + """ Returns the resulting benchmarks for a given suite. """ + # update .m2 directory, which installs target jars + self.build.build() + + suite_cmd = JavaMicrobenchmarkHarnessCommand( + self.build, self.benchmark_filter) + + # Ensure there will be data + benchmark_names = suite_cmd.list_benchmarks() + if not benchmark_names: + return None + + results = suite_cmd.results(repetitions=self.repetitions) + benchmarks = JavaMicrobenchmarkHarness.from_json(results) + return BenchmarkSuite(name, benchmarks) + + @property + def list_benchmarks(self): + """ Returns all suite names """ + # Ensure build is up-to-date to run benchmarks + self.build.build() + + suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build) + benchmark_names = suite_cmd.list_benchmarks() + for benchmark_name in benchmark_names: + yield "{}".format(benchmark_name) + + @property + def suites(self): + """ Returns all suite for a runner. """ + suite_name = "JavaBenchmark" + suite = self.suite(suite_name) + + # Filter may exclude all benchmarks + if not suite: + logger.debug("Suite {} executed but no results" + .format(suite_name)) + return + + yield suite + + @staticmethod + def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs): + """ Returns a BenchmarkRunner from a path or a git revision. + + First, it checks if `rev_or_path` is a valid path (or string) of a json + object that can deserialize to a BenchmarkRunner. If so, it initialize + a StaticBenchmarkRunner from it. This allows memoizing the result of a + run in a file or a string. + + Second, it checks if `rev_or_path` points to a valid Maven build + directory. If so, it creates a JavaBenchmarkRunner with this existing + MavenBuild. + + Otherwise, it assumes `rev_or_path` is a revision and clone/checkout + the given revision and create a fresh MavenBuild. + """ + if StaticBenchmarkRunner.is_json_result(rev_or_path): + return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) + elif MavenBuild.is_build_dir(rev_or_path): + maven_def = JavaMavenDefinition(rev_or_path, maven_conf) + return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs) + else: + # Revisions can references remote via the `/` character, ensure + # that the revision is path friendly + path_rev = rev_or_path.replace("/", "_") + root_rev = os.path.join(root, path_rev) + os.mkdir(root_rev) + + clone_dir = os.path.join(root_rev, "arrow") + # Possibly checkout the sources at given revision, no need to + # perform cleanup on cloned repository as root_rev is reclaimed. + src_rev, _ = src.at_revision(rev_or_path, clone_dir) + maven_def = JavaMavenDefinition(src_rev.java, maven_conf) + build_dir = os.path.join(root_rev, "arrow/java") + return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs) diff --git a/src/arrow/dev/archery/archery/bot.py b/src/arrow/dev/archery/archery/bot.py new file mode 100644 index 000000000..e8fbbdd04 --- /dev/null +++ b/src/arrow/dev/archery/archery/bot.py @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import shlex +from pathlib import Path +from functools import partial +import tempfile + +import click +import github + +from .utils.git import git +from .utils.logger import logger +from .crossbow import Repo, Queue, Config, Target, Job, CommentReport + + +class EventError(Exception): + pass + + +class CommandError(Exception): + + def __init__(self, message): + self.message = message + + +class _CommandMixin: + + def get_help_option(self, ctx): + def show_help(ctx, param, value): + if value and not ctx.resilient_parsing: + raise click.UsageError(ctx.get_help()) + option = super().get_help_option(ctx) + option.callback = show_help + return option + + def __call__(self, message, **kwargs): + args = shlex.split(message) + try: + with self.make_context(self.name, args=args, obj=kwargs) as ctx: + return self.invoke(ctx) + except click.ClickException as e: + raise CommandError(e.format_message()) + + +class Command(_CommandMixin, click.Command): + pass + + +class Group(_CommandMixin, click.Group): + + def command(self, *args, **kwargs): + kwargs.setdefault('cls', Command) + return super().command(*args, **kwargs) + + def group(self, *args, **kwargs): + kwargs.setdefault('cls', Group) + return super().group(*args, **kwargs) + + def parse_args(self, ctx, args): + if not args and self.no_args_is_help and not ctx.resilient_parsing: + raise click.UsageError(ctx.get_help()) + return super().parse_args(ctx, args) + + +command = partial(click.command, cls=Command) +group = partial(click.group, cls=Group) + + +class CommentBot: + + def __init__(self, name, handler, token=None): + # TODO(kszucs): validate + assert isinstance(name, str) + assert callable(handler) + self.name = name + self.handler = handler + self.github = github.Github(token) + + def parse_command(self, payload): + # only allow users of apache org to submit commands, for more see + # https://developer.github.com/v4/enum/commentauthorassociation/ + allowed_roles = {'OWNER', 'MEMBER', 'CONTRIBUTOR'} + mention = '@{}'.format(self.name) + comment = payload['comment'] + + if payload['sender']['login'] == self.name: + raise EventError("Don't respond to itself") + elif payload['action'] not in {'created', 'edited'}: + raise EventError("Don't respond to comment deletion") + elif comment['author_association'] not in allowed_roles: + raise EventError( + "Don't respond to comments from non-authorized users" + ) + elif not comment['body'].lstrip().startswith(mention): + raise EventError("The bot is not mentioned") + + # Parse the comment, removing the bot mentioned (and everything + # before it) + command = payload['comment']['body'].split(mention)[-1] + + # then split on newlines and keep only the first line + # (ignoring all other lines) + return command.split("\n")[0].strip() + + def handle(self, event, payload): + try: + command = self.parse_command(payload) + except EventError as e: + logger.error(e) + # see the possible reasons in the validate method + return + + if event == 'issue_comment': + return self.handle_issue_comment(command, payload) + elif event == 'pull_request_review_comment': + return self.handle_review_comment(command, payload) + else: + raise ValueError("Unexpected event type {}".format(event)) + + def handle_issue_comment(self, command, payload): + repo = self.github.get_repo(payload['repository']['id'], lazy=True) + issue = repo.get_issue(payload['issue']['number']) + + try: + pull = issue.as_pull_request() + except github.GithubException: + return issue.create_comment( + "The comment bot only listens to pull request comments!" + ) + + comment = pull.get_issue_comment(payload['comment']['id']) + try: + self.handler(command, issue=issue, pull_request=pull, + comment=comment) + except CommandError as e: + logger.error(e) + pull.create_issue_comment("```\n{}\n```".format(e.message)) + except Exception as e: + logger.exception(e) + comment.create_reaction('-1') + else: + comment.create_reaction('+1') + + def handle_review_comment(self, payload): + raise NotImplementedError() + + +@group(name='@github-actions') +@click.pass_context +def actions(ctx): + """Ursabot""" + ctx.ensure_object(dict) + + +@actions.group() +@click.option('--crossbow', '-c', default='ursacomputing/crossbow', + help='Crossbow repository on github to use') +@click.pass_obj +def crossbow(obj, crossbow): + """ + Trigger crossbow builds for this pull request + """ + obj['crossbow_repo'] = crossbow + + +def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request): + """ + Clone the repositories and initialize crossbow objects. + + Parameters + ---------- + dest : Path + Filesystem path to clone the repositories to. + crossbow_repo : str + Github repository name, like kszucs/crossbow. + pull_request : pygithub.PullRequest + Object containing information about the pull request the comment bot + was triggered from. + """ + arrow_path = dest / 'arrow' + queue_path = dest / 'crossbow' + + # clone arrow and checkout the pull request's branch + pull_request_ref = 'pull/{}/head:{}'.format( + pull_request.number, pull_request.head.ref + ) + git.clone(pull_request.base.repo.clone_url, str(arrow_path)) + git.fetch('origin', pull_request_ref, git_dir=arrow_path) + git.checkout(pull_request.head.ref, git_dir=arrow_path) + + # clone crossbow repository + crossbow_url = 'https://github.com/{}'.format(crossbow_repo) + git.clone(crossbow_url, str(queue_path)) + + # initialize crossbow objects + github_token = os.environ['CROSSBOW_GITHUB_TOKEN'] + arrow = Repo(arrow_path) + queue = Queue(queue_path, github_token=github_token, require_https=True) + + return (arrow, queue) + + +@crossbow.command() +@click.argument('tasks', nargs=-1, required=False) +@click.option('--group', '-g', 'groups', multiple=True, + help='Submit task groups as defined in tests.yml') +@click.option('--param', '-p', 'params', multiple=True, + help='Additional task parameters for rendering the CI templates') +@click.option('--arrow-version', '-v', default=None, + help='Set target version explicitly.') +@click.pass_obj +def submit(obj, tasks, groups, params, arrow_version): + """ + Submit crossbow testing tasks. + + See groups defined in arrow/dev/tasks/tasks.yml + """ + crossbow_repo = obj['crossbow_repo'] + pull_request = obj['pull_request'] + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + arrow, queue = _clone_arrow_and_crossbow( + dest=Path(tmpdir), + crossbow_repo=crossbow_repo, + pull_request=pull_request, + ) + # load available tasks configuration and groups from yaml + config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml") + config.validate() + + # initialize the crossbow build's target repository + target = Target.from_repo(arrow, version=arrow_version, + remote=pull_request.head.repo.clone_url, + branch=pull_request.head.ref) + + # parse additional job parameters + params = dict([p.split("=") for p in params]) + + # instantiate the job object + job = Job.from_config(config=config, target=target, tasks=tasks, + groups=groups, params=params) + + # add the job to the crossbow queue and push to the remote repository + queue.put(job, prefix="actions") + queue.push() + + # render the response comment's content + report = CommentReport(job, crossbow_repo=crossbow_repo) + + # send the response + pull_request.create_issue_comment(report.show()) diff --git a/src/arrow/dev/archery/archery/cli.py b/src/arrow/dev/archery/archery/cli.py new file mode 100644 index 000000000..d408be3cc --- /dev/null +++ b/src/arrow/dev/archery/archery/cli.py @@ -0,0 +1,943 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import namedtuple +from io import StringIO +import click +import errno +import json +import logging +import os +import pathlib +import sys + +from .benchmark.codec import JsonEncoder +from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD +from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner +from .compat import _import_pandas +from .lang.cpp import CppCMakeDefinition, CppConfiguration +from .utils.cli import ArrowBool, validate_arrow_sources, add_optional_command +from .utils.lint import linter, python_numpydoc, LintValidationException +from .utils.logger import logger, ctx as log_ctx +from .utils.source import ArrowSources +from .utils.tmpdir import tmpdir + +# Set default logging to INFO in command line. +logging.basicConfig(level=logging.INFO) + + +BOOL = ArrowBool() + + +@click.group() +@click.option("--debug", type=BOOL, is_flag=True, default=False, + help="Increase logging with debugging output.") +@click.option("--pdb", type=BOOL, is_flag=True, default=False, + help="Invoke pdb on uncaught exception.") +@click.option("-q", "--quiet", type=BOOL, is_flag=True, default=False, + help="Silence executed commands.") +@click.pass_context +def archery(ctx, debug, pdb, quiet): + """ Apache Arrow developer utilities. + + See sub-commands help with `archery <cmd> --help`. + + """ + # Ensure ctx.obj exists + ctx.ensure_object(dict) + + log_ctx.quiet = quiet + if debug: + logger.setLevel(logging.DEBUG) + + ctx.debug = debug + + if pdb: + import pdb + sys.excepthook = lambda t, v, e: pdb.pm() + + +build_dir_type = click.Path(dir_okay=True, file_okay=False, resolve_path=True) +# Supported build types +build_type = click.Choice(["debug", "relwithdebinfo", "release"], + case_sensitive=False) +# Supported warn levels +warn_level_type = click.Choice(["everything", "checkin", "production"], + case_sensitive=False) + +simd_level = click.Choice(["NONE", "SSE4_2", "AVX2", "AVX512"], + case_sensitive=True) + + +def cpp_toolchain_options(cmd): + options = [ + click.option("--cc", metavar="<compiler>", help="C compiler."), + click.option("--cxx", metavar="<compiler>", help="C++ compiler."), + click.option("--cxx-flags", help="C++ compiler flags."), + click.option("--cpp-package-prefix", + help=("Value to pass for ARROW_PACKAGE_PREFIX and " + "use ARROW_DEPENDENCY_SOURCE=SYSTEM")) + ] + return _apply_options(cmd, options) + + +def java_toolchain_options(cmd): + options = [ + click.option("--java-home", metavar="<java_home>", + help="Path to Java Developers Kit."), + click.option("--java-options", help="java compiler options."), + ] + return _apply_options(cmd, options) + + +def _apply_options(cmd, options): + for option in options: + cmd = option(cmd) + return cmd + + +@archery.command(short_help="Initialize an Arrow C++ build") +@click.option("--src", metavar="<arrow_src>", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory") +# toolchain +@cpp_toolchain_options +@click.option("--build-type", default=None, type=build_type, + help="CMake's CMAKE_BUILD_TYPE") +@click.option("--warn-level", default="production", type=warn_level_type, + help="Controls compiler warnings -W(no-)error.") +@click.option("--use-gold-linker", default=True, type=BOOL, + help="Toggles ARROW_USE_LD_GOLD option.") +@click.option("--simd-level", default="SSE4_2", type=simd_level, + help="Toggles ARROW_SIMD_LEVEL option.") +# Tests and benchmarks +@click.option("--with-tests", default=True, type=BOOL, + help="Build with tests.") +@click.option("--with-benchmarks", default=None, type=BOOL, + help="Build with benchmarks.") +@click.option("--with-examples", default=None, type=BOOL, + help="Build with examples.") +@click.option("--with-integration", default=None, type=BOOL, + help="Build with integration test executables.") +# Static checks +@click.option("--use-asan", default=None, type=BOOL, + help="Toggle ARROW_USE_ASAN sanitizer.") +@click.option("--use-tsan", default=None, type=BOOL, + help="Toggle ARROW_USE_TSAN sanitizer.") +@click.option("--use-ubsan", default=None, type=BOOL, + help="Toggle ARROW_USE_UBSAN sanitizer.") +@click.option("--with-fuzzing", default=None, type=BOOL, + help="Toggle ARROW_FUZZING.") +# Components +@click.option("--with-compute", default=None, type=BOOL, + help="Build the Arrow compute module.") +@click.option("--with-csv", default=None, type=BOOL, + help="Build the Arrow CSV parser module.") +@click.option("--with-cuda", default=None, type=BOOL, + help="Build the Arrow CUDA extensions.") +@click.option("--with-dataset", default=None, type=BOOL, + help="Build the Arrow dataset module.") +@click.option("--with-filesystem", default=None, type=BOOL, + help="Build the Arrow filesystem layer.") +@click.option("--with-flight", default=None, type=BOOL, + help="Build with Flight rpc support.") +@click.option("--with-gandiva", default=None, type=BOOL, + help="Build with Gandiva expression compiler support.") +@click.option("--with-hdfs", default=None, type=BOOL, + help="Build the Arrow HDFS bridge.") +@click.option("--with-hiveserver2", default=None, type=BOOL, + help="Build the HiveServer2 client and arrow adapater.") +@click.option("--with-ipc", default=None, type=BOOL, + help="Build the Arrow IPC extensions.") +@click.option("--with-json", default=None, type=BOOL, + help="Build the Arrow JSON parser module.") +@click.option("--with-jni", default=None, type=BOOL, + help="Build the Arrow JNI lib.") +@click.option("--with-mimalloc", default=None, type=BOOL, + help="Build the Arrow mimalloc based allocator.") +@click.option("--with-parquet", default=None, type=BOOL, + help="Build with Parquet file support.") +@click.option("--with-plasma", default=None, type=BOOL, + help="Build with Plasma object store support.") +@click.option("--with-python", default=None, type=BOOL, + help="Build the Arrow CPython extesions.") +@click.option("--with-r", default=None, type=BOOL, + help="Build the Arrow R extensions. This is not a CMake option, " + "it will toggle required options") +@click.option("--with-s3", default=None, type=BOOL, + help="Build Arrow with S3 support.") +# Compressions +@click.option("--with-brotli", default=None, type=BOOL, + help="Build Arrow with brotli compression.") +@click.option("--with-bz2", default=None, type=BOOL, + help="Build Arrow with bz2 compression.") +@click.option("--with-lz4", default=None, type=BOOL, + help="Build Arrow with lz4 compression.") +@click.option("--with-snappy", default=None, type=BOOL, + help="Build Arrow with snappy compression.") +@click.option("--with-zlib", default=None, type=BOOL, + help="Build Arrow with zlib compression.") +@click.option("--with-zstd", default=None, type=BOOL, + help="Build Arrow with zstd compression.") +# CMake extra feature +@click.option("--cmake-extras", type=str, multiple=True, + help="Extra flags/options to pass to cmake invocation. " + "Can be stacked") +@click.option("--install-prefix", type=str, + help="Destination directory where files are installed. Expand to" + "CMAKE_INSTALL_PREFIX. Defaults to to $CONDA_PREFIX if the" + "variable exists.") +# misc +@click.option("-f", "--force", type=BOOL, is_flag=True, default=False, + help="Delete existing build directory if found.") +@click.option("--targets", type=str, multiple=True, + help="Generator targets to run. Can be stacked.") +@click.argument("build_dir", type=build_dir_type) +@click.pass_context +def build(ctx, src, build_dir, force, targets, **kwargs): + """ Initialize a C++ build directory. + + The build command creates a directory initialized with Arrow's cpp source + cmake and configuration. It can also optionally invoke the generator to + test the build (and used in scripts). + + Note that archery will carry the caller environment. It will also not touch + an existing directory, one must use the `--force` option to remove the + existing directory. + + Examples: + + \b + # Initialize build with clang8 and avx2 support in directory `clang8-build` + \b + archery build --cc=clang-8 --cxx=clang++-8 --cxx-flags=-mavx2 clang8-build + + \b + # Builds and run test + archery build --targets=all --targets=test build + """ + # Arrow's cpp cmake configuration + conf = CppConfiguration(**kwargs) + # This is a closure around cmake invocation, e.g. calling `def.build()` + # yields a directory ready to be run with the generator + cmake_def = CppCMakeDefinition(src.cpp, conf) + # Create build directory + build = cmake_def.build(build_dir, force=force) + + for target in targets: + build.run(target) + + +LintCheck = namedtuple('LintCheck', ('option_name', 'help')) + +lint_checks = [ + LintCheck('clang-format', "Format C++ files with clang-format."), + LintCheck('clang-tidy', "Lint C++ files with clang-tidy."), + LintCheck('cpplint', "Lint C++ files with cpplint."), + LintCheck('iwyu', "Lint changed C++ files with Include-What-You-Use."), + LintCheck('python', + "Format and lint Python files with autopep8 and flake8."), + LintCheck('numpydoc', "Lint Python files with numpydoc."), + LintCheck('cmake-format', "Format CMake files with cmake-format.py."), + LintCheck('rat', + "Check all sources files for license texts via Apache RAT."), + LintCheck('r', "Lint R files."), + LintCheck('docker', "Lint Dockerfiles with hadolint."), +] + + +def decorate_lint_command(cmd): + """ + Decorate the lint() command function to add individual per-check options. + """ + for check in lint_checks: + option = click.option("--{0}/--no-{0}".format(check.option_name), + default=None, help=check.help) + cmd = option(cmd) + return cmd + + +@archery.command(short_help="Check Arrow source tree for errors") +@click.option("--src", metavar="<arrow_src>", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory") +@click.option("--fix", is_flag=True, type=BOOL, default=False, + help="Toggle fixing the lint errors if the linter supports it.") +@click.option("--iwyu_all", is_flag=True, type=BOOL, default=False, + help="Run IWYU on all C++ files if enabled") +@click.option("-a", "--all", is_flag=True, default=False, + help="Enable all checks.") +@decorate_lint_command +@click.pass_context +def lint(ctx, src, fix, iwyu_all, **checks): + if checks.pop('all'): + # "--all" is given => enable all non-selected checks + for k, v in checks.items(): + if v is None: + checks[k] = True + if not any(checks.values()): + raise click.UsageError( + "Need to enable at least one lint check (try --help)") + try: + linter(src, fix, iwyu_all=iwyu_all, **checks) + except LintValidationException: + sys.exit(1) + + +@archery.command(short_help="Lint python docstring with NumpyDoc") +@click.argument('symbols', nargs=-1) +@click.option("--src", metavar="<arrow_src>", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory") +@click.option("--allow-rule", "-a", multiple=True, + help="Allow only these rules") +@click.option("--disallow-rule", "-d", multiple=True, + help="Disallow these rules") +def numpydoc(src, symbols, allow_rule, disallow_rule): + """ + Pass list of modules or symbols as arguments to restrict the validation. + + By default all modules of pyarrow are tried to be validated. + + Examples + -------- + archery numpydoc pyarrow.dataset + archery numpydoc pyarrow.csv pyarrow.json pyarrow.parquet + archery numpydoc pyarrow.array + """ + disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'} + try: + results = python_numpydoc(symbols, allow_rules=allow_rule, + disallow_rules=disallow_rule) + for result in results: + result.ok() + except LintValidationException: + sys.exit(1) + + +@archery.group() +@click.pass_context +def benchmark(ctx): + """ Arrow benchmarking. + + Use the diff sub-command to benchmark revisions, and/or build directories. + """ + pass + + +def benchmark_common_options(cmd): + def check_language(ctx, param, value): + if value not in {"cpp", "java"}: + raise click.BadParameter("cpp or java is supported now") + return value + + options = [ + click.option("--src", metavar="<arrow_src>", show_default=True, + default=None, callback=validate_arrow_sources, + help="Specify Arrow source directory"), + click.option("--preserve", type=BOOL, default=False, show_default=True, + is_flag=True, + help="Preserve workspace for investigation."), + click.option("--output", metavar="<output>", + type=click.File("w", encoding="utf8"), default="-", + help="Capture output result into file."), + click.option("--language", metavar="<lang>", type=str, default="cpp", + show_default=True, callback=check_language, + help="Specify target language for the benchmark"), + click.option("--build-extras", type=str, multiple=True, + help="Extra flags/options to pass to mvn build. " + "Can be stacked. For language=java"), + click.option("--benchmark-extras", type=str, multiple=True, + help="Extra flags/options to pass to mvn benchmark. " + "Can be stacked. For language=java"), + click.option("--cmake-extras", type=str, multiple=True, + help="Extra flags/options to pass to cmake invocation. " + "Can be stacked. For language=cpp") + ] + + cmd = java_toolchain_options(cmd) + cmd = cpp_toolchain_options(cmd) + return _apply_options(cmd, options) + + +def benchmark_filter_options(cmd): + options = [ + click.option("--suite-filter", metavar="<regex>", show_default=True, + type=str, default=None, + help="Regex filtering benchmark suites."), + click.option("--benchmark-filter", metavar="<regex>", + show_default=True, type=str, default=None, + help="Regex filtering benchmarks.") + ] + return _apply_options(cmd, options) + + +@benchmark.command(name="list", short_help="List benchmark suite") +@click.argument("rev_or_path", metavar="[<rev_or_path>]", + default="WORKSPACE", required=False) +@benchmark_common_options +@click.pass_context +def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, + java_home, java_options, build_extras, benchmark_extras, + language, **kwargs): + """ List benchmark suite. + """ + with tmpdir(preserve=preserve) as root: + logger.debug("Running benchmark {}".format(rev_or_path)) + + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) + + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf) + + elif language == "java": + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) + + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf) + + for b in runner_base.list_benchmarks: + click.echo(b, file=output) + + +@benchmark.command(name="run", short_help="Run benchmark suite") +@click.argument("rev_or_path", metavar="[<rev_or_path>]", + default="WORKSPACE", required=False) +@benchmark_common_options +@benchmark_filter_options +@click.option("--repetitions", type=int, default=-1, + help=("Number of repetitions of each benchmark. Increasing " + "may improve result precision. " + "[default: 1 for cpp, 5 for java")) +@click.pass_context +def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, + java_home, java_options, build_extras, benchmark_extras, + language, suite_filter, benchmark_filter, repetitions, + **kwargs): + """ Run benchmark suite. + + This command will run the benchmark suite for a single build. This is + used to capture (and/or publish) the results. + + The caller can optionally specify a target which is either a git revision + (commit, tag, special values like HEAD) or a cmake build directory. + + When a commit is referenced, a local clone of the arrow sources (specified + via --src) is performed and the proper branch is created. This is done in + a temporary directory which can be left intact with the `--preserve` flag. + + The special token "WORKSPACE" is reserved to specify the current git + workspace. This imply that no clone will be performed. + + Examples: + + \b + # Run the benchmarks on current git workspace + \b + archery benchmark run + + \b + # Run the benchmarks on current previous commit + \b + archery benchmark run HEAD~1 + + \b + # Run the benchmarks on current previous commit + \b + archery benchmark run --output=run.json + """ + with tmpdir(preserve=preserve) as root: + logger.debug("Running benchmark {}".format(rev_or_path)) + + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) + + repetitions = repetitions if repetitions != -1 else 1 + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf, + repetitions=repetitions, + suite_filter=suite_filter, benchmark_filter=benchmark_filter) + + elif language == "java": + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) + + repetitions = repetitions if repetitions != -1 else 5 + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + + json.dump(runner_base, output, cls=JsonEncoder) + + +@benchmark.command(name="diff", short_help="Compare benchmark suites") +@benchmark_common_options +@benchmark_filter_options +@click.option("--threshold", type=float, default=DEFAULT_THRESHOLD, + show_default=True, + help="Regression failure threshold in percentage.") +@click.option("--repetitions", type=int, default=1, show_default=True, + help=("Number of repetitions of each benchmark. Increasing " + "may improve result precision. " + "[default: 1 for cpp, 5 for java")) +@click.option("--no-counters", type=BOOL, default=False, is_flag=True, + help="Hide counters field in diff report.") +@click.argument("contender", metavar="[<contender>", + default=ArrowSources.WORKSPACE, required=False) +@click.argument("baseline", metavar="[<baseline>]]", default="origin/master", + required=False) +@click.pass_context +def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, + suite_filter, benchmark_filter, repetitions, no_counters, + java_home, java_options, build_extras, benchmark_extras, + threshold, contender, baseline, **kwargs): + """Compare (diff) benchmark runs. + + This command acts like git-diff but for benchmark results. + + The caller can optionally specify both the contender and the baseline. If + unspecified, the contender will default to the current workspace (like git) + and the baseline will default to master. + + Each target (contender or baseline) can either be a git revision + (commit, tag, special values like HEAD) or a cmake build directory. This + allow comparing git commits, and/or different compilers and/or compiler + flags. + + When a commit is referenced, a local clone of the arrow sources (specified + via --src) is performed and the proper branch is created. This is done in + a temporary directory which can be left intact with the `--preserve` flag. + + The special token "WORKSPACE" is reserved to specify the current git + workspace. This imply that no clone will be performed. + + Examples: + + \b + # Compare workspace (contender) with master (baseline) + \b + archery benchmark diff + + \b + # Compare master (contender) with latest version (baseline) + \b + export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) + \b + archery benchmark diff master "$LAST" + + \b + # Compare g++7 (contender) with clang++-8 (baseline) builds + \b + archery build --with-benchmarks=true \\ + --cxx-flags=-ftree-vectorize \\ + --cc=gcc-7 --cxx=g++-7 gcc7-build + \b + archery build --with-benchmarks=true \\ + --cxx-flags=-flax-vector-conversions \\ + --cc=clang-8 --cxx=clang++-8 clang8-build + \b + archery benchmark diff gcc7-build clang8-build + + \b + # Compare default targets but scoped to the suites matching + # `^arrow-compute-aggregate` and benchmarks matching `(Sum|Mean)Kernel`. + \b + archery benchmark diff --suite-filter="^arrow-compute-aggregate" \\ + --benchmark-filter="(Sum|Mean)Kernel" + + \b + # Capture result in file `result.json` + \b + archery benchmark diff --output=result.json + \b + # Equivalently with no stdout clutter. + archery --quiet benchmark diff > result.json + + \b + # Comparing with a cached results from `archery benchmark run` + \b + archery benchmark run --output=run.json HEAD~1 + \b + # This should not recompute the benchmark from run.json + archery --quiet benchmark diff WORKSPACE run.json > result.json + """ + with tmpdir(preserve=preserve) as root: + logger.debug("Comparing {} (contender) with {} (baseline)" + .format(contender, baseline)) + + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) + + repetitions = repetitions if repetitions != -1 else 1 + runner_cont = CppBenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + repetitions=repetitions, + suite_filter=suite_filter, + benchmark_filter=benchmark_filter) + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + repetitions=repetitions, + suite_filter=suite_filter, + benchmark_filter=benchmark_filter) + + elif language == "java": + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) + + repetitions = repetitions if repetitions != -1 else 5 + runner_cont = JavaBenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + + runner_comp = RunnerComparator(runner_cont, runner_base, threshold) + + # TODO(kszucs): test that the output is properly formatted jsonlines + comparisons_json = _get_comparisons_as_json(runner_comp.comparisons) + ren_counters = language == "java" + formatted = _format_comparisons_with_pandas(comparisons_json, + no_counters, ren_counters) + output.write(formatted) + output.write('\n') + + +def _get_comparisons_as_json(comparisons): + buf = StringIO() + for comparator in comparisons: + json.dump(comparator, buf, cls=JsonEncoder) + buf.write("\n") + + return buf.getvalue() + + +def _format_comparisons_with_pandas(comparisons_json, no_counters, + ren_counters): + pd = _import_pandas() + df = pd.read_json(StringIO(comparisons_json), lines=True) + # parse change % so we can sort by it + df['change %'] = df.pop('change').str[:-1].map(float) + first_regression = len(df) - df['regression'].sum() + + fields = ['benchmark', 'baseline', 'contender', 'change %'] + if not no_counters: + fields += ['counters'] + + df = df[fields] + if ren_counters: + df = df.rename(columns={'counters': 'configurations'}) + df = df.sort_values(by='change %', ascending=False) + + def labelled(title, df): + if len(df) == 0: + return '' + title += ': ({})'.format(len(df)) + df_str = df.to_string(index=False) + bar = '-' * df_str.index('\n') + return '\n'.join([bar, title, bar, df_str]) + + return '\n\n'.join([labelled('Non-regressions', df[:first_regression]), + labelled('Regressions', df[first_regression:])]) + + +# ---------------------------------------------------------------------- +# Integration testing + +def _set_default(opt, default): + if opt is None: + return default + return opt + + +@archery.command(short_help="Execute protocol and Flight integration tests") +@click.option('--with-all', is_flag=True, default=False, + help=('Include all known languages by default ' + 'in integration tests')) +@click.option('--random-seed', type=int, default=12345, + help="Seed for PRNG when generating test data") +@click.option('--with-cpp', type=bool, default=False, + help='Include C++ in integration tests') +@click.option('--with-csharp', type=bool, default=False, + help='Include C# in integration tests') +@click.option('--with-java', type=bool, default=False, + help='Include Java in integration tests') +@click.option('--with-js', type=bool, default=False, + help='Include JavaScript in integration tests') +@click.option('--with-go', type=bool, default=False, + help='Include Go in integration tests') +@click.option('--with-rust', type=bool, default=False, + help='Include Rust in integration tests', + envvar="ARCHERY_INTEGRATION_WITH_RUST") +@click.option('--write_generated_json', default=False, + help='Generate test JSON to indicated path') +@click.option('--run-flight', is_flag=True, default=False, + help='Run Flight integration tests') +@click.option('--debug', is_flag=True, default=False, + help='Run executables in debug mode as relevant') +@click.option('--serial', is_flag=True, default=False, + help='Run tests serially, rather than in parallel') +@click.option('--tempdir', default=None, + help=('Directory to use for writing ' + 'integration test temporary files')) +@click.option('stop_on_error', '-x', '--stop-on-error', + is_flag=True, default=False, + help='Stop on first error') +@click.option('--gold-dirs', multiple=True, + help="gold integration test file paths") +@click.option('-k', '--match', + help=("Substring for test names to include in run, " + "e.g. -k primitive")) +def integration(with_all=False, random_seed=12345, **args): + from .integration.runner import write_js_test_json, run_all_tests + import numpy as np + + # FIXME(bkietz) Include help strings for individual testers. + # For example, CPPTester's ARROW_CPP_EXE_PATH environment variable. + + # Make runs involving data generation deterministic + np.random.seed(random_seed) + + gen_path = args['write_generated_json'] + + languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust'] + + enabled_languages = 0 + for lang in languages: + param = 'with_{}'.format(lang) + if with_all: + args[param] = with_all + + if args[param]: + enabled_languages += 1 + + if gen_path: + try: + os.makedirs(gen_path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + write_js_test_json(gen_path) + else: + if enabled_languages == 0: + raise Exception("Must enable at least 1 language to test") + run_all_tests(**args) + + +@archery.command() +@click.option('--event-name', '-n', required=True) +@click.option('--event-payload', '-p', type=click.File('r', encoding='utf8'), + default='-', required=True) +@click.option('--arrow-token', envvar='ARROW_GITHUB_TOKEN', + help='OAuth token for responding comment in the arrow repo') +def trigger_bot(event_name, event_payload, arrow_token): + from .bot import CommentBot, actions + + event_payload = json.loads(event_payload.read()) + + bot = CommentBot(name='github-actions', handler=actions, token=arrow_token) + bot.handle(event_name, event_payload) + + +@archery.group('release') +@click.option("--src", metavar="<arrow_src>", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory.") +@click.option("--jira-cache", type=click.Path(), default=None, + help="File path to cache queried JIRA issues per version.") +@click.pass_obj +def release(obj, src, jira_cache): + """Release releated commands.""" + from .release import Jira, CachedJira + + jira = Jira() + if jira_cache is not None: + jira = CachedJira(jira_cache, jira=jira) + + obj['jira'] = jira + obj['repo'] = src.path + + +@release.command('curate') +@click.argument('version') +@click.pass_obj +def release_curate(obj, version): + """Release curation.""" + from .release import Release + + release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo']) + curation = release.curate() + + click.echo(curation.render('console')) + + +@release.group('changelog') +def release_changelog(): + """Release changelog.""" + pass + + +@release_changelog.command('add') +@click.argument('version') +@click.pass_obj +def release_changelog_add(obj, version): + """Prepend the changelog with the current release""" + from .release import Release + + jira, repo = obj['jira'], obj['repo'] + + # just handle the current version + release = Release.from_jira(version, jira=jira, repo=repo) + if release.is_released: + raise ValueError('This version has been already released!') + + changelog = release.changelog() + changelog_path = pathlib.Path(repo) / 'CHANGELOG.md' + + current_content = changelog_path.read_text() + new_content = changelog.render('markdown') + current_content + + changelog_path.write_text(new_content) + click.echo("CHANGELOG.md is updated!") + + +@release_changelog.command('generate') +@click.argument('version') +@click.argument('output', type=click.File('w', encoding='utf8'), default='-') +@click.pass_obj +def release_changelog_generate(obj, version, output): + """Generate the changelog of a specific release.""" + from .release import Release + + jira, repo = obj['jira'], obj['repo'] + + # just handle the current version + release = Release.from_jira(version, jira=jira, repo=repo) + + changelog = release.changelog() + output.write(changelog.render('markdown')) + + +@release_changelog.command('regenerate') +@click.pass_obj +def release_changelog_regenerate(obj): + """Regeneretate the whole CHANGELOG.md file""" + from .release import Release + + jira, repo = obj['jira'], obj['repo'] + changelogs = [] + + for version in jira.project_versions('ARROW'): + if not version.released: + continue + release = Release.from_jira(version, jira=jira, repo=repo) + click.echo('Querying changelog for version: {}'.format(version)) + changelogs.append(release.changelog()) + + click.echo('Rendering new CHANGELOG.md file...') + changelog_path = pathlib.Path(repo) / 'CHANGELOG.md' + with changelog_path.open('w') as fp: + for cl in changelogs: + fp.write(cl.render('markdown')) + + +@release.command('cherry-pick') +@click.argument('version') +@click.option('--dry-run/--execute', default=True, + help="Display the git commands instead of executing them.") +@click.option('--recreate/--continue', default=True, + help="Recreate the maintenance branch or only apply unapplied " + "patches.") +@click.pass_obj +def release_cherry_pick(obj, version, dry_run, recreate): + """ + Cherry pick commits. + """ + from .release import Release, MinorRelease, PatchRelease + + release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo']) + if not isinstance(release, (MinorRelease, PatchRelease)): + raise click.UsageError('Cherry-pick command only supported for minor ' + 'and patch releases') + + if not dry_run: + release.cherry_pick_commits(recreate_branch=recreate) + click.echo('Executed the following commands:\n') + + click.echo( + 'git checkout {} -b {}'.format(release.previous.tag, release.branch) + ) + for commit in release.commits_to_pick(): + click.echo('git cherry-pick {}'.format(commit.hexsha)) + + +@archery.group("linking") +@click.pass_obj +def linking(obj): + """ + Quick and dirty utilities for checking library linkage. + """ + pass + + +@linking.command("check-dependencies") +@click.argument("paths", nargs=-1) +@click.option("--allow", "-a", "allowed", multiple=True, + help="Name of the allowed libraries") +@click.option("--disallow", "-d", "disallowed", multiple=True, + help="Name of the disallowed libraries") +@click.pass_obj +def linking_check_dependencies(obj, allowed, disallowed, paths): + from .linking import check_dynamic_library_dependencies, DependencyError + + allowed, disallowed = set(allowed), set(disallowed) + try: + for path in map(pathlib.Path, paths): + check_dynamic_library_dependencies(path, allowed=allowed, + disallowed=disallowed) + except DependencyError as e: + raise click.ClickException(str(e)) + + +add_optional_command("docker", module=".docker.cli", function="docker", + parent=archery) +add_optional_command("crossbow", module=".crossbow.cli", function="crossbow", + parent=archery) + + +if __name__ == "__main__": + archery(obj={}) diff --git a/src/arrow/dev/archery/archery/compat.py b/src/arrow/dev/archery/archery/compat.py new file mode 100644 index 000000000..bb0b15428 --- /dev/null +++ b/src/arrow/dev/archery/archery/compat.py @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pathlib +import sys + + +def _is_path_like(path): + # PEP519 filesystem path protocol is available from python 3.6, so pathlib + # doesn't implement __fspath__ for earlier versions + return (isinstance(path, str) or + hasattr(path, '__fspath__') or + isinstance(path, pathlib.Path)) + + +def _ensure_path(path): + if isinstance(path, pathlib.Path): + return path + else: + return pathlib.Path(_stringify_path(path)) + + +def _stringify_path(path): + """ + Convert *path* to a string or unicode path if possible. + """ + if isinstance(path, str): + return path + + # checking whether path implements the filesystem protocol + try: + return path.__fspath__() # new in python 3.6 + except AttributeError: + # fallback pathlib ckeck for earlier python versions than 3.6 + if isinstance(path, pathlib.Path): + return str(path) + + raise TypeError("not a path-like object") + + +def _import_pandas(): + # ARROW-13425: avoid importing PyArrow from Pandas + sys.modules['pyarrow'] = None + import pandas as pd + return pd diff --git a/src/arrow/dev/archery/archery/crossbow/__init__.py b/src/arrow/dev/archery/archery/crossbow/__init__.py new file mode 100644 index 000000000..bc72e81f0 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/__init__.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .core import Config, Repo, Queue, Target, Job # noqa +from .reports import CommentReport, ConsoleReport, EmailReport # noqa diff --git a/src/arrow/dev/archery/archery/crossbow/cli.py b/src/arrow/dev/archery/archery/crossbow/cli.py new file mode 100644 index 000000000..1d0610343 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/cli.py @@ -0,0 +1,365 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path + +import click + +from .core import Config, Repo, Queue, Target, Job, CrossbowError +from .reports import EmailReport, ConsoleReport +from ..utils.source import ArrowSources + + +_default_arrow_path = ArrowSources.find().path +_default_queue_path = _default_arrow_path.parent / "crossbow" +_default_config_path = _default_arrow_path / "dev" / "tasks" / "tasks.yml" + + +@click.group() +@click.option('--github-token', '-t', default=None, + envvar="CROSSBOW_GITHUB_TOKEN", + help='OAuth token for GitHub authentication') +@click.option('--arrow-path', '-a', + type=click.Path(), default=_default_arrow_path, + help='Arrow\'s repository path. Defaults to the repository of ' + 'this script') +@click.option('--queue-path', '-q', + type=click.Path(), default=_default_queue_path, + help='The repository path used for scheduling the tasks. ' + 'Defaults to crossbow directory placed next to arrow') +@click.option('--queue-remote', '-qr', default=None, + help='Force to use this remote URL for the Queue repository') +@click.option('--output-file', metavar='<output>', + type=click.File('w', encoding='utf8'), default='-', + help='Capture output result into file.') +@click.pass_context +def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote, + output_file): + """ + Schedule packaging tasks or nightly builds on CI services. + """ + ctx.ensure_object(dict) + ctx.obj['output'] = output_file + ctx.obj['arrow'] = Repo(arrow_path) + ctx.obj['queue'] = Queue(queue_path, remote_url=queue_remote, + github_token=github_token, require_https=True) + + +@crossbow.command() +@click.option('--config-path', '-c', + type=click.Path(exists=True), default=_default_config_path, + help='Task configuration yml. Defaults to tasks.yml') +@click.pass_obj +def check_config(obj, config_path): + # load available tasks configuration and groups from yaml + config = Config.load_yaml(config_path) + config.validate() + + output = obj['output'] + config.show(output) + + +@crossbow.command() +@click.argument('tasks', nargs=-1, required=False) +@click.option('--group', '-g', 'groups', multiple=True, + help='Submit task groups as defined in task.yml') +@click.option('--param', '-p', 'params', multiple=True, + help='Additional task parameters for rendering the CI templates') +@click.option('--job-prefix', default='build', + help='Arbitrary prefix for branch names, e.g. nightly') +@click.option('--config-path', '-c', + type=click.Path(exists=True), default=_default_config_path, + help='Task configuration yml. Defaults to tasks.yml') +@click.option('--arrow-version', '-v', default=None, + help='Set target version explicitly.') +@click.option('--arrow-remote', '-r', default=None, + help='Set GitHub remote explicitly, which is going to be cloned ' + 'on the CI services. Note, that no validation happens ' + 'locally. Examples: https://github.com/apache/arrow or ' + 'https://github.com/kszucs/arrow.') +@click.option('--arrow-branch', '-b', default=None, + help='Give the branch name explicitly, e.g. master, ARROW-1949.') +@click.option('--arrow-sha', '-t', default=None, + help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' + 'apache-arrow-0.11.1.') +@click.option('--fetch/--no-fetch', default=True, + help='Fetch references (branches and tags) from the remote') +@click.option('--dry-run/--commit', default=False, + help='Just display the rendered CI configurations without ' + 'committing them') +@click.option('--no-push/--push', default=False, + help='Don\'t push the changes') +@click.pass_obj +def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, + arrow_remote, arrow_branch, arrow_sha, fetch, dry_run, no_push): + output = obj['output'] + queue, arrow = obj['queue'], obj['arrow'] + + # load available tasks configuration and groups from yaml + config = Config.load_yaml(config_path) + try: + config.validate() + except CrossbowError as e: + raise click.ClickException(str(e)) + + # Override the detected repo url / remote, branch and sha - this aims to + # make release procedure a bit simpler. + # Note, that the target resivion's crossbow templates must be + # compatible with the locally checked out version of crossbow (which is + # in case of the release procedure), because the templates still + # contain some business logic (dependency installation, deployments) + # which will be reduced to a single command in the future. + target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch, + head=arrow_sha, version=arrow_version) + + # parse additional job parameters + params = dict([p.split("=") for p in params]) + + # instantiate the job object + try: + job = Job.from_config(config=config, target=target, tasks=tasks, + groups=groups, params=params) + except CrossbowError as e: + raise click.ClickException(str(e)) + + job.show(output) + if dry_run: + return + + if fetch: + queue.fetch() + queue.put(job, prefix=job_prefix) + + if no_push: + click.echo('Branches and commits created but not pushed: `{}`' + .format(job.branch)) + else: + queue.push() + click.echo('Pushed job identifier is: `{}`'.format(job.branch)) + + +@crossbow.command() +@click.argument('task', required=True) +@click.option('--config-path', '-c', + type=click.Path(exists=True), default=_default_config_path, + help='Task configuration yml. Defaults to tasks.yml') +@click.option('--arrow-version', '-v', default=None, + help='Set target version explicitly.') +@click.option('--arrow-remote', '-r', default=None, + help='Set GitHub remote explicitly, which is going to be cloned ' + 'on the CI services. Note, that no validation happens ' + 'locally. Examples: https://github.com/apache/arrow or ' + 'https://github.com/kszucs/arrow.') +@click.option('--arrow-branch', '-b', default=None, + help='Give the branch name explicitly, e.g. master, ARROW-1949.') +@click.option('--arrow-sha', '-t', default=None, + help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' + 'apache-arrow-0.11.1.') +@click.option('--param', '-p', 'params', multiple=True, + help='Additional task parameters for rendering the CI templates') +@click.pass_obj +def render(obj, task, config_path, arrow_version, arrow_remote, arrow_branch, + arrow_sha, params): + """ + Utility command to check the rendered CI templates. + """ + from .core import _flatten + + def highlight(code): + try: + from pygments import highlight + from pygments.lexers import YamlLexer + from pygments.formatters import TerminalFormatter + return highlight(code, YamlLexer(), TerminalFormatter()) + except ImportError: + return code + + arrow = obj['arrow'] + + target = Target.from_repo(arrow, remote=arrow_remote, branch=arrow_branch, + head=arrow_sha, version=arrow_version) + config = Config.load_yaml(config_path) + params = dict([p.split("=") for p in params]) + params["queue_remote_url"] = "https://github.com/org/crossbow" + job = Job.from_config(config=config, target=target, tasks=[task], + params=params) + + for task_name, rendered_files in job.render_tasks().items(): + for path, content in _flatten(rendered_files).items(): + click.echo('#' * 80) + click.echo('### {:^72} ###'.format("/".join(path))) + click.echo('#' * 80) + click.echo(highlight(content)) + + +@crossbow.command() +@click.argument('job-name', required=True) +@click.option('--fetch/--no-fetch', default=True, + help='Fetch references (branches and tags) from the remote') +@click.option('--task-filter', '-f', 'task_filters', multiple=True, + help='Glob pattern for filtering relevant tasks') +@click.pass_obj +def status(obj, job_name, fetch, task_filters): + output = obj['output'] + queue = obj['queue'] + if fetch: + queue.fetch() + job = queue.get(job_name) + + report = ConsoleReport(job, task_filters=task_filters) + report.show(output) + + +@crossbow.command() +@click.argument('prefix', required=True) +@click.option('--fetch/--no-fetch', default=True, + help='Fetch references (branches and tags) from the remote') +@click.pass_obj +def latest_prefix(obj, prefix, fetch): + queue = obj['queue'] + if fetch: + queue.fetch() + latest = queue.latest_for_prefix(prefix) + click.echo(latest.branch) + + +@crossbow.command() +@click.argument('job-name', required=True) +@click.option('--sender-name', '-n', + help='Name to use for report e-mail.') +@click.option('--sender-email', '-e', + help='E-mail to use for report e-mail.') +@click.option('--recipient-email', '-r', + help='Where to send the e-mail report') +@click.option('--smtp-user', '-u', + help='E-mail address to use for SMTP login') +@click.option('--smtp-password', '-P', + help='SMTP password to use for report e-mail.') +@click.option('--smtp-server', '-s', default='smtp.gmail.com', + help='SMTP server to use for report e-mail.') +@click.option('--smtp-port', '-p', default=465, + help='SMTP port to use for report e-mail.') +@click.option('--poll/--no-poll', default=False, + help='Wait for completion if there are tasks pending') +@click.option('--poll-max-minutes', default=180, + help='Maximum amount of time waiting for job completion') +@click.option('--poll-interval-minutes', default=10, + help='Number of minutes to wait to check job status again') +@click.option('--send/--dry-run', default=False, + help='Just display the report, don\'t send it') +@click.option('--fetch/--no-fetch', default=True, + help='Fetch references (branches and tags) from the remote') +@click.pass_obj +def report(obj, job_name, sender_name, sender_email, recipient_email, + smtp_user, smtp_password, smtp_server, smtp_port, poll, + poll_max_minutes, poll_interval_minutes, send, fetch): + """ + Send an e-mail report showing success/failure of tasks in a Crossbow run + """ + output = obj['output'] + queue = obj['queue'] + if fetch: + queue.fetch() + + job = queue.get(job_name) + report = EmailReport( + job=job, + sender_name=sender_name, + sender_email=sender_email, + recipient_email=recipient_email + ) + + if poll: + job.wait_until_finished( + poll_max_minutes=poll_max_minutes, + poll_interval_minutes=poll_interval_minutes + ) + + if send: + report.send( + smtp_user=smtp_user, + smtp_password=smtp_password, + smtp_server=smtp_server, + smtp_port=smtp_port + ) + else: + report.show(output) + + +@crossbow.command() +@click.argument('job-name', required=True) +@click.option('-t', '--target-dir', + default=_default_arrow_path / 'packages', + type=click.Path(file_okay=False, dir_okay=True), + help='Directory to download the build artifacts') +@click.option('--dry-run/--execute', default=False, + help='Just display process, don\'t download anything') +@click.option('--fetch/--no-fetch', default=True, + help='Fetch references (branches and tags) from the remote') +@click.option('--task-filter', '-f', 'task_filters', multiple=True, + help='Glob pattern for filtering relevant tasks') +@click.option('--validate-patterns/--skip-pattern-validation', default=True, + help='Whether to validate artifact name patterns or not') +@click.pass_obj +def download_artifacts(obj, job_name, target_dir, dry_run, fetch, + validate_patterns, task_filters): + """Download build artifacts from GitHub releases""" + output = obj['output'] + + # fetch the queue repository + queue = obj['queue'] + if fetch: + queue.fetch() + + # query the job's artifacts + job = queue.get(job_name) + + # create directory to download the assets to + target_dir = Path(target_dir).absolute() / job_name + target_dir.mkdir(parents=True, exist_ok=True) + + # download the assets while showing the job status + def asset_callback(task_name, task, asset): + if asset is not None: + path = target_dir / task_name / asset.name + path.parent.mkdir(exist_ok=True) + if not dry_run: + asset.download(path) + + click.echo('Downloading {}\'s artifacts.'.format(job_name)) + click.echo('Destination directory is {}'.format(target_dir)) + click.echo() + + report = ConsoleReport(job, task_filters=task_filters) + report.show( + output, + asset_callback=asset_callback, + validate_patterns=validate_patterns + ) + + +@crossbow.command() +@click.argument('patterns', nargs=-1, required=True) +@click.option('--sha', required=True, help='Target committish') +@click.option('--tag', required=True, help='Target tag') +@click.option('--method', default='curl', help='Use cURL to upload') +@click.pass_obj +def upload_artifacts(obj, tag, sha, patterns, method): + queue = obj['queue'] + queue.github_overwrite_release_assets( + tag_name=tag, target_commitish=sha, method=method, patterns=patterns + ) diff --git a/src/arrow/dev/archery/archery/crossbow/core.py b/src/arrow/dev/archery/archery/crossbow/core.py new file mode 100644 index 000000000..0f2309e47 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/core.py @@ -0,0 +1,1172 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import re +import fnmatch +import glob +import time +import logging +import mimetypes +import subprocess +import textwrap +from io import StringIO +from pathlib import Path +from datetime import date + +import jinja2 +from ruamel.yaml import YAML + +try: + import github3 + _have_github3 = True +except ImportError: + github3 = object + _have_github3 = False + +try: + import pygit2 +except ImportError: + PygitRemoteCallbacks = object +else: + PygitRemoteCallbacks = pygit2.RemoteCallbacks + +from ..utils.source import ArrowSources + + +for pkg in ["requests", "urllib3", "github3"]: + logging.getLogger(pkg).setLevel(logging.WARNING) + +logger = logging.getLogger("crossbow") + + +class CrossbowError(Exception): + pass + + +def _flatten(mapping): + """Converts a hierarchical mapping to a flat dictionary""" + result = {} + for k, v in mapping.items(): + if isinstance(v, dict): + for ik, iv in _flatten(v).items(): + ik = ik if isinstance(ik, tuple) else (ik,) + result[(k,) + ik] = iv + elif isinstance(v, list): + for ik, iv in enumerate(_flatten(v)): + ik = ik if isinstance(ik, tuple) else (ik,) + result[(k,) + ik] = iv + else: + result[(k,)] = v + return result + + +def _unflatten(mapping): + """Converts a flat tuple => object mapping to hierarchical one""" + result = {} + for path, value in mapping.items(): + parents, leaf = path[:-1], path[-1] + # create the hierarchy until we reach the leaf value + temp = result + for parent in parents: + temp.setdefault(parent, {}) + temp = temp[parent] + # set the leaf value + temp[leaf] = value + + return result + + +def _unflatten_tree(files): + """Converts a flat path => object mapping to a hierarchical directories + + Input: + { + 'path/to/file.a': a_content, + 'path/to/file.b': b_content, + 'path/file.c': c_content + } + Output: + { + 'path': { + 'to': { + 'file.a': a_content, + 'file.b': b_content + }, + 'file.c': c_content + } + } + """ + files = {tuple(k.split('/')): v for k, v in files.items()} + return _unflatten(files) + + +def _render_jinja_template(searchpath, template, params): + def format_all(items, pattern): + return [pattern.format(item) for item in items] + + loader = jinja2.FileSystemLoader(searchpath) + env = jinja2.Environment(loader=loader, trim_blocks=True, + lstrip_blocks=True, + undefined=jinja2.StrictUndefined) + env.filters['format_all'] = format_all + template = env.get_template(template) + return template.render(**params) + + +# configurations for setting up branch skipping +# - appveyor has a feature to skip builds without an appveyor.yml +# - travis reads from the master branch and applies the rules +# - circle requires the configuration to be present on all branch, even ones +# that are configured to be skipped +# - azure skips branches without azure-pipelines.yml by default +# - github skips branches without .github/workflows/ by default + +_default_travis_yml = """ +branches: + only: + - master + - /.*-travis-.*/ + +os: linux +dist: trusty +language: generic +""" + +_default_circle_yml = """ +version: 2 + +jobs: + build: + machine: true + +workflows: + version: 2 + build: + jobs: + - build: + filters: + branches: + only: + - /.*-circle-.*/ +""" + +_default_tree = { + '.travis.yml': _default_travis_yml, + '.circleci/config.yml': _default_circle_yml +} + + +class GitRemoteCallbacks(PygitRemoteCallbacks): + + def __init__(self, token): + self.token = token + self.attempts = 0 + super().__init__() + + def push_update_reference(self, refname, message): + pass + + def update_tips(self, refname, old, new): + pass + + def credentials(self, url, username_from_url, allowed_types): + # its a libgit2 bug, that it infinitely retries the authentication + self.attempts += 1 + + if self.attempts >= 5: + # pygit2 doesn't propagate the exception properly + msg = 'Wrong oauth personal access token' + print(msg) + raise CrossbowError(msg) + + if (allowed_types & + pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT): + return pygit2.UserPass(self.token, 'x-oauth-basic') + else: + return None + + +def _git_ssh_to_https(url): + return url.replace('git@github.com:', 'https://github.com/') + + +class Repo: + """ + Base class for interaction with local git repositories + + A high level wrapper used for both reading revision information from + arrow's repository and pushing continuous integration tasks to the queue + repository. + + Parameters + ---------- + require_https : boolean, default False + Raise exception for SSH origin URLs + """ + + def __init__(self, path, github_token=None, remote_url=None, + require_https=False): + self.path = Path(path) + self.github_token = github_token + self.require_https = require_https + self._remote_url = remote_url + self._pygit_repo = None + self._github_repo = None # set by as_github_repo() + self._updated_refs = [] + + def __str__(self): + tpl = textwrap.dedent(''' + Repo: {remote}@{branch} + Commit: {head} + ''') + return tpl.format( + remote=self.remote_url, + branch=self.branch.branch_name, + head=self.head + ) + + @property + def repo(self): + if self._pygit_repo is None: + self._pygit_repo = pygit2.Repository(str(self.path)) + return self._pygit_repo + + @property + def origin(self): + remote = self.repo.remotes['origin'] + if self.require_https and remote.url.startswith('git@github.com'): + raise CrossbowError("Change SSH origin URL to HTTPS to use " + "Crossbow: {}".format(remote.url)) + return remote + + def fetch(self): + refspec = '+refs/heads/*:refs/remotes/origin/*' + self.origin.fetch([refspec]) + + def push(self, refs=None, github_token=None): + github_token = github_token or self.github_token + if github_token is None: + raise RuntimeError( + 'Could not determine GitHub token. Please set the ' + 'CROSSBOW_GITHUB_TOKEN environment variable to a ' + 'valid GitHub access token or pass one to --github-token.' + ) + callbacks = GitRemoteCallbacks(github_token) + refs = refs or [] + try: + self.origin.push(refs + self._updated_refs, callbacks=callbacks) + except pygit2.GitError: + raise RuntimeError('Failed to push updated references, ' + 'potentially because of credential issues: {}' + .format(self._updated_refs)) + else: + self.updated_refs = [] + + @property + def head(self): + """Currently checked out commit's sha""" + return self.repo.head + + @property + def branch(self): + """Currently checked out branch""" + try: + return self.repo.branches[self.repo.head.shorthand] + except KeyError: + return None # detached + + @property + def remote(self): + """Currently checked out branch's remote counterpart""" + try: + return self.repo.remotes[self.branch.upstream.remote_name] + except (AttributeError, KeyError): + return None # cannot detect + + @property + def remote_url(self): + """Currently checked out branch's remote counterpart URL + + If an SSH github url is set, it will be replaced by the https + equivalent usable with GitHub OAuth token. + """ + try: + return self._remote_url or _git_ssh_to_https(self.remote.url) + except AttributeError: + return None + + @property + def user_name(self): + try: + return next(self.repo.config.get_multivar('user.name')) + except StopIteration: + return os.environ.get('GIT_COMMITTER_NAME', 'unknown') + + @property + def user_email(self): + try: + return next(self.repo.config.get_multivar('user.email')) + except StopIteration: + return os.environ.get('GIT_COMMITTER_EMAIL', 'unknown') + + @property + def signature(self): + return pygit2.Signature(self.user_name, self.user_email, + int(time.time())) + + def create_tree(self, files): + builder = self.repo.TreeBuilder() + + for filename, content in files.items(): + if isinstance(content, dict): + # create a subtree + tree_id = self.create_tree(content) + builder.insert(filename, tree_id, pygit2.GIT_FILEMODE_TREE) + else: + # create a file + blob_id = self.repo.create_blob(content) + builder.insert(filename, blob_id, pygit2.GIT_FILEMODE_BLOB) + + tree_id = builder.write() + return tree_id + + def create_commit(self, files, parents=None, message='', + reference_name=None): + if parents is None: + # by default use the main branch as the base of the new branch + # required to reuse github actions cache across crossbow tasks + commit, _ = self.repo.resolve_refish("master") + parents = [commit.id] + tree_id = self.create_tree(files) + + author = committer = self.signature + commit_id = self.repo.create_commit(reference_name, author, committer, + message, tree_id, parents) + return self.repo[commit_id] + + def create_branch(self, branch_name, files, parents=None, message='', + signature=None): + # create commit with the passed tree + commit = self.create_commit(files, parents=parents, message=message) + + # create branch pointing to the previously created commit + branch = self.repo.create_branch(branch_name, commit) + + # append to the pushable references + self._updated_refs.append('refs/heads/{}'.format(branch_name)) + + return branch + + def create_tag(self, tag_name, commit_id, message=''): + tag_id = self.repo.create_tag(tag_name, commit_id, + pygit2.GIT_OBJ_COMMIT, self.signature, + message) + + # append to the pushable references + self._updated_refs.append('refs/tags/{}'.format(tag_name)) + + return self.repo[tag_id] + + def file_contents(self, commit_id, file): + commit = self.repo[commit_id] + entry = commit.tree[file] + blob = self.repo[entry.id] + return blob.data + + def _parse_github_user_repo(self): + m = re.match(r'.*\/([^\/]+)\/([^\/\.]+)(\.git)?$', self.remote_url) + if m is None: + raise CrossbowError( + "Unable to parse the github owner and repository from the " + "repository's remote url '{}'".format(self.remote_url) + ) + user, repo = m.group(1), m.group(2) + return user, repo + + def as_github_repo(self, github_token=None): + """Converts it to a repository object which wraps the GitHub API""" + if self._github_repo is None: + if not _have_github3: + raise ImportError('Must install github3.py') + github_token = github_token or self.github_token + username, reponame = self._parse_github_user_repo() + session = github3.session.GitHubSession( + default_connect_timeout=10, + default_read_timeout=30 + ) + github = github3.GitHub(session=session) + github.login(token=github_token) + self._github_repo = github.repository(username, reponame) + return self._github_repo + + def github_commit(self, sha): + repo = self.as_github_repo() + return repo.commit(sha) + + def github_release(self, tag): + repo = self.as_github_repo() + try: + return repo.release_from_tag(tag) + except github3.exceptions.NotFoundError: + return None + + def github_upload_asset_requests(self, release, path, name, mime, + max_retries=None, retry_backoff=None): + if max_retries is None: + max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8)) + if retry_backoff is None: + retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5)) + + for i in range(max_retries): + try: + with open(path, 'rb') as fp: + result = release.upload_asset(name=name, asset=fp, + content_type=mime) + except github3.exceptions.ResponseError as e: + logger.error('Attempt {} has failed with message: {}.' + .format(i + 1, str(e))) + logger.error('Error message {}'.format(e.msg)) + logger.error('List of errors provided by Github:') + for err in e.errors: + logger.error(' - {}'.format(err)) + + if e.code == 422: + # 422 Validation Failed, probably raised because + # ReleaseAsset already exists, so try to remove it before + # reattempting the asset upload + for asset in release.assets(): + if asset.name == name: + logger.info('Release asset {} already exists, ' + 'removing it...'.format(name)) + asset.delete() + logger.info('Asset {} removed.'.format(name)) + break + except github3.exceptions.ConnectionError as e: + logger.error('Attempt {} has failed with message: {}.' + .format(i + 1, str(e))) + else: + logger.info('Attempt {} has finished.'.format(i + 1)) + return result + + time.sleep(retry_backoff) + + raise RuntimeError('Github asset uploading has failed!') + + def github_upload_asset_curl(self, release, path, name, mime): + upload_url, _ = release.upload_url.split('{?') + upload_url += '?name={}'.format(name) + + command = [ + 'curl', + '--fail', + '-H', "Authorization: token {}".format(self.github_token), + '-H', "Content-Type: {}".format(mime), + '--data-binary', '@{}'.format(path), + upload_url + ] + return subprocess.run(command, shell=False, check=True) + + def github_overwrite_release_assets(self, tag_name, target_commitish, + patterns, method='requests'): + # Since github has changed something the asset uploading via requests + # got instable, so prefer the cURL alternative. + # Potential cause: + # sigmavirus24/github3.py/issues/779#issuecomment-379470626 + repo = self.as_github_repo() + if not tag_name: + raise CrossbowError('Empty tag name') + if not target_commitish: + raise CrossbowError('Empty target commit for the release tag') + + # remove the whole release if it already exists + try: + release = repo.release_from_tag(tag_name) + except github3.exceptions.NotFoundError: + pass + else: + release.delete() + + release = repo.create_release(tag_name, target_commitish) + for pattern in patterns: + for path in glob.glob(pattern, recursive=True): + name = os.path.basename(path) + size = os.path.getsize(path) + mime = mimetypes.guess_type(name)[0] or 'application/zip' + + logger.info( + 'Uploading asset `{}` with mimetype {} and size {}...' + .format(name, mime, size) + ) + + if method == 'requests': + self.github_upload_asset_requests(release, path, name=name, + mime=mime) + elif method == 'curl': + self.github_upload_asset_curl(release, path, name=name, + mime=mime) + else: + raise CrossbowError( + 'Unsupported upload method {}'.format(method) + ) + + +class Queue(Repo): + + def _latest_prefix_id(self, prefix): + pattern = re.compile(r'[\w\/-]*{}-(\d+)'.format(prefix)) + matches = list(filter(None, map(pattern.match, self.repo.branches))) + if matches: + latest = max(int(m.group(1)) for m in matches) + else: + latest = -1 + return latest + + def _next_job_id(self, prefix): + """Auto increments the branch's identifier based on the prefix""" + latest_id = self._latest_prefix_id(prefix) + return '{}-{}'.format(prefix, latest_id + 1) + + def latest_for_prefix(self, prefix): + latest_id = self._latest_prefix_id(prefix) + if latest_id < 0: + raise RuntimeError( + 'No job has been submitted with prefix {} yet'.format(prefix) + ) + job_name = '{}-{}'.format(prefix, latest_id) + return self.get(job_name) + + def date_of(self, job): + # it'd be better to bound to the queue repository on deserialization + # and reorganize these methods to Job + branch_name = 'origin/{}'.format(job.branch) + branch = self.repo.branches[branch_name] + commit = self.repo[branch.target] + return date.fromtimestamp(commit.commit_time) + + def jobs(self, pattern): + """Return jobs sorted by its identifier in reverse order""" + job_names = [] + for name in self.repo.branches.remote: + origin, name = name.split('/', 1) + result = re.match(pattern, name) + if result: + job_names.append(name) + + for name in sorted(job_names, reverse=True): + yield self.get(name) + + def get(self, job_name): + branch_name = 'origin/{}'.format(job_name) + branch = self.repo.branches[branch_name] + try: + content = self.file_contents(branch.target, 'job.yml') + except KeyError: + raise CrossbowError( + 'No job is found with name: {}'.format(job_name) + ) + + buffer = StringIO(content.decode('utf-8')) + job = yaml.load(buffer) + job.queue = self + return job + + def put(self, job, prefix='build'): + if not isinstance(job, Job): + raise CrossbowError('`job` must be an instance of Job') + if job.branch is not None: + raise CrossbowError('`job.branch` is automatically generated, ' + 'thus it must be blank') + + if job.target.remote is None: + raise CrossbowError( + 'Cannot determine git remote for the Arrow repository to ' + 'clone or push to, try to push the `{}` branch first to have ' + 'a remote tracking counterpart.'.format(job.target.branch) + ) + if job.target.branch is None: + raise CrossbowError( + 'Cannot determine the current branch of the Arrow repository ' + 'to clone or push to, perhaps it is in detached HEAD state. ' + 'Please checkout a branch.' + ) + + # auto increment and set next job id, e.g. build-85 + job._queue = self + job.branch = self._next_job_id(prefix) + + # create tasks' branches + for task_name, task in job.tasks.items(): + # adding CI's name to the end of the branch in order to use skip + # patterns on travis and circleci + task.branch = '{}-{}-{}'.format(job.branch, task.ci, task_name) + params = { + **job.params, + "arrow": job.target, + "queue_remote_url": self.remote_url + } + files = task.render_files(job.template_searchpath, params=params) + branch = self.create_branch(task.branch, files=files) + self.create_tag(task.tag, branch.target) + task.commit = str(branch.target) + + # create job's branch with its description + return self.create_branch(job.branch, files=job.render_files()) + + +def get_version(root, **kwargs): + """ + Parse function for setuptools_scm that ignores tags for non-C++ + subprojects, e.g. apache-arrow-js-XXX tags. + """ + from setuptools_scm.git import parse as parse_git_version + + # query the calculated version based on the git tags + kwargs['describe_command'] = ( + 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"' + ) + version = parse_git_version(root, **kwargs) + tag = str(version.tag) + + # We may get a development tag for the next version, such as "5.0.0.dev0", + # or the tag of an already released version, such as "4.0.0". + # In the latter case, we need to increment the version so that the computed + # version comes after any patch release (the next feature version after + # 4.0.0 is 5.0.0). + pattern = r"^(\d+)\.(\d+)\.(\d+)" + match = re.match(pattern, tag) + major, minor, patch = map(int, match.groups()) + if 'dev' not in tag: + major += 1 + + return "{}.{}.{}.dev{}".format(major, minor, patch, version.distance) + + +class Serializable: + + @classmethod + def to_yaml(cls, representer, data): + tag = '!{}'.format(cls.__name__) + dct = {k: v for k, v in data.__dict__.items() if not k.startswith('_')} + return representer.represent_mapping(tag, dct) + + +class Target(Serializable): + """ + Describes target repository and revision the builds run against + + This serializable data container holding information about arrow's + git remote, branch, sha and version number as well as some metadata + (currently only an email address where the notification should be sent). + """ + + def __init__(self, head, branch, remote, version, email=None): + self.head = head + self.email = email + self.branch = branch + self.remote = remote + self.version = version + self.no_rc_version = re.sub(r'-rc\d+\Z', '', version) + # Semantic Versioning 1.0.0: https://semver.org/spec/v1.0.0.html + # + # > A pre-release version number MAY be denoted by appending an + # > arbitrary string immediately following the patch version and a + # > dash. The string MUST be comprised of only alphanumerics plus + # > dash [0-9A-Za-z-]. + # + # Example: + # + # '0.16.1.dev10' -> + # '0.16.1-dev10' + self.no_rc_semver_version = \ + re.sub(r'\.(dev\d+)\Z', r'-\1', self.no_rc_version) + + @classmethod + def from_repo(cls, repo, head=None, branch=None, remote=None, version=None, + email=None): + """Initialize from a repository + + Optionally override detected remote, branch, head, and/or version. + """ + assert isinstance(repo, Repo) + + if head is None: + head = str(repo.head.target) + if branch is None: + branch = repo.branch.branch_name + if remote is None: + remote = repo.remote_url + if version is None: + version = get_version(repo.path) + if email is None: + email = repo.user_email + + return cls(head=head, email=email, branch=branch, remote=remote, + version=version) + + +class Task(Serializable): + """ + Describes a build task and metadata required to render CI templates + + A task is represented as a single git commit and branch containing jinja2 + rendered files (currently appveyor.yml or .travis.yml configurations). + + A task can't be directly submitted to a queue, must belong to a job. + Each task's unique identifier is its branch name, which is generated after + submitting the job to a queue. + """ + + def __init__(self, ci, template, artifacts=None, params=None): + assert ci in { + 'circle', + 'travis', + 'appveyor', + 'azure', + 'github', + 'drone', + } + self.ci = ci + self.template = template + self.artifacts = artifacts or [] + self.params = params or {} + self.branch = None # filled after adding to a queue + self.commit = None # filled after adding to a queue + self._queue = None # set by the queue object after put or get + self._status = None # status cache + self._assets = None # assets cache + + def render_files(self, searchpath, params=None): + params = {**self.params, **(params or {}), "task": self} + try: + rendered = _render_jinja_template(searchpath, self.template, + params=params) + except jinja2.TemplateError as e: + raise RuntimeError( + 'Failed to render template `{}` with {}: {}'.format( + self.template, e.__class__.__name__, str(e) + ) + ) + + tree = {**_default_tree, self.filename: rendered} + return _unflatten_tree(tree) + + @property + def tag(self): + return self.branch + + @property + def filename(self): + config_files = { + 'circle': '.circleci/config.yml', + 'travis': '.travis.yml', + 'appveyor': 'appveyor.yml', + 'azure': 'azure-pipelines.yml', + 'github': '.github/workflows/crossbow.yml', + 'drone': '.drone.yml', + } + return config_files[self.ci] + + def status(self, force_query=False): + _status = getattr(self, '_status', None) + if force_query or _status is None: + github_commit = self._queue.github_commit(self.commit) + self._status = TaskStatus(github_commit) + return self._status + + def assets(self, force_query=False, validate_patterns=True): + _assets = getattr(self, '_assets', None) + if force_query or _assets is None: + github_release = self._queue.github_release(self.tag) + self._assets = TaskAssets(github_release, + artifact_patterns=self.artifacts, + validate_patterns=validate_patterns) + return self._assets + + +class TaskStatus: + """ + Combine the results from status and checks API to a single state. + + Azure pipelines uses checks API which doesn't provide a combined + interface like status API does, so we need to manually combine + both the commit statuses and the commit checks coming from + different API endpoint + + Status.state: error, failure, pending or success, default pending + CheckRun.status: queued, in_progress or completed, default: queued + CheckRun.conclusion: success, failure, neutral, cancelled, timed_out + or action_required, only set if + CheckRun.status == 'completed' + + 1. Convert CheckRun's status and conclusion to one of Status.state + 2. Merge the states based on the following rules: + - failure if any of the contexts report as error or failure + - pending if there are no statuses or a context is pending + - success if the latest status for all contexts is success + error otherwise. + + Parameters + ---------- + commit : github3.Commit + Commit to query the combined status for. + + Returns + ------- + TaskStatus( + combined_state='error|failure|pending|success', + github_status='original github status object', + github_check_runs='github checks associated with the commit', + total_count='number of statuses and checks' + ) + """ + + def __init__(self, commit): + status = commit.status() + check_runs = list(commit.check_runs()) + states = [s.state for s in status.statuses] + + for check in check_runs: + if check.status == 'completed': + if check.conclusion in {'success', 'failure'}: + states.append(check.conclusion) + elif check.conclusion in {'cancelled', 'timed_out', + 'action_required'}: + states.append('error') + # omit `neutral` conclusion + else: + states.append('pending') + + # it could be more effective, but the following is more descriptive + combined_state = 'error' + if len(states): + if any(state in {'error', 'failure'} for state in states): + combined_state = 'failure' + elif any(state == 'pending' for state in states): + combined_state = 'pending' + elif all(state == 'success' for state in states): + combined_state = 'success' + + # show link to the actual build, some of the CI providers implement + # the statuses API others implement the checks API, so display both + build_links = [s.target_url for s in status.statuses] + build_links += [c.html_url for c in check_runs] + + self.combined_state = combined_state + self.github_status = status + self.github_check_runs = check_runs + self.total_count = len(states) + self.build_links = build_links + + +class TaskAssets(dict): + + def __init__(self, github_release, artifact_patterns, + validate_patterns=True): + # HACK(kszucs): don't expect uploaded assets of no atifacts were + # defiened for the tasks in order to spare a bit of github rate limit + if not artifact_patterns: + return + + if github_release is None: + github_assets = {} # no assets have been uploaded for the task + else: + github_assets = {a.name: a for a in github_release.assets()} + + if not validate_patterns: + # shortcut to avoid pattern validation and just set all artifacts + return self.update(github_assets) + + for pattern in artifact_patterns: + # artifact can be a regex pattern + compiled = re.compile(f"^{pattern}$") + matches = list( + filter(None, map(compiled.match, github_assets.keys())) + ) + num_matches = len(matches) + + # validate artifact pattern matches single asset + if num_matches == 0: + self[pattern] = None + elif num_matches == 1: + self[pattern] = github_assets[matches[0].group(0)] + else: + raise CrossbowError( + 'Only a single asset should match pattern `{}`, there are ' + 'multiple ones: {}'.format(pattern, ', '.join(matches)) + ) + + def missing_patterns(self): + return [pattern for pattern, asset in self.items() if asset is None] + + def uploaded_assets(self): + return [asset for asset in self.values() if asset is not None] + + +class Job(Serializable): + """Describes multiple tasks against a single target repository""" + + def __init__(self, target, tasks, params=None, template_searchpath=None): + if not tasks: + raise ValueError('no tasks were provided for the job') + if not all(isinstance(task, Task) for task in tasks.values()): + raise ValueError('each `tasks` mus be an instance of Task') + if not isinstance(target, Target): + raise ValueError('`target` must be an instance of Target') + if not isinstance(target, Target): + raise ValueError('`target` must be an instance of Target') + if not isinstance(params, dict): + raise ValueError('`params` must be an instance of dict') + + self.target = target + self.tasks = tasks + self.params = params or {} # additional parameters for the tasks + self.branch = None # filled after adding to a queue + self._queue = None # set by the queue object after put or get + if template_searchpath is None: + self._template_searchpath = ArrowSources.find().path + else: + self._template_searchpath = template_searchpath + + def render_files(self): + with StringIO() as buf: + yaml.dump(self, buf) + content = buf.getvalue() + tree = {**_default_tree, "job.yml": content} + return _unflatten_tree(tree) + + def render_tasks(self, params=None): + result = {} + params = { + **self.params, + "arrow": self.target, + **(params or {}) + } + for task_name, task in self.tasks.items(): + files = task.render_files(self._template_searchpath, params) + result[task_name] = files + return result + + @property + def template_searchpath(self): + return self._template_searchpath + + @property + def queue(self): + assert isinstance(self._queue, Queue) + return self._queue + + @queue.setter + def queue(self, queue): + assert isinstance(queue, Queue) + self._queue = queue + for task in self.tasks.values(): + task._queue = queue + + @property + def email(self): + return os.environ.get('CROSSBOW_EMAIL', self.target.email) + + @property + def date(self): + return self.queue.date_of(self) + + def show(self, stream=None): + return yaml.dump(self, stream=stream) + + @classmethod + def from_config(cls, config, target, tasks=None, groups=None, params=None): + """ + Intantiate a job from based on a config. + + Parameters + ---------- + config : dict + Deserialized content of tasks.yml + target : Target + Describes target repository and revision the builds run against. + tasks : Optional[List[str]], default None + List of glob patterns for matching task names. + groups : Optional[List[str]], default None + List of exact group names matching predefined task sets in the + config. + params : Optional[Dict[str, str]], default None + Additional rendering parameters for the task templates. + + Returns + ------- + Job + + Raises + ------ + Exception: + If invalid groups or tasks has been passed. + """ + task_definitions = config.select(tasks, groups=groups) + + # instantiate the tasks + tasks = {} + versions = {'version': target.version, + 'no_rc_version': target.no_rc_version, + 'no_rc_semver_version': target.no_rc_semver_version} + for task_name, task in task_definitions.items(): + artifacts = task.pop('artifacts', None) or [] # because of yaml + artifacts = [fn.format(**versions) for fn in artifacts] + tasks[task_name] = Task(artifacts=artifacts, **task) + + return cls(target=target, tasks=tasks, params=params, + template_searchpath=config.template_searchpath) + + def is_finished(self): + for task in self.tasks.values(): + status = task.status(force_query=True) + if status.combined_state == 'pending': + return False + return True + + def wait_until_finished(self, poll_max_minutes=120, + poll_interval_minutes=10): + started_at = time.time() + while True: + if self.is_finished(): + break + + waited_for_minutes = (time.time() - started_at) / 60 + if waited_for_minutes > poll_max_minutes: + msg = ('Exceeded the maximum amount of time waiting for job ' + 'to finish, waited for {} minutes.') + raise RuntimeError(msg.format(waited_for_minutes)) + + logger.info('Waiting {} minutes and then checking again' + .format(poll_interval_minutes)) + time.sleep(poll_interval_minutes * 60) + + +class Config(dict): + + def __init__(self, tasks, template_searchpath): + super().__init__(tasks) + self.template_searchpath = template_searchpath + + @classmethod + def load_yaml(cls, path): + path = Path(path) + searchpath = path.parent + rendered = _render_jinja_template(searchpath, template=path.name, + params={}) + config = yaml.load(rendered) + return cls(config, template_searchpath=searchpath) + + def show(self, stream=None): + return yaml.dump(dict(self), stream=stream) + + def select(self, tasks=None, groups=None): + config_groups = dict(self['groups']) + config_tasks = dict(self['tasks']) + valid_groups = set(config_groups.keys()) + valid_tasks = set(config_tasks.keys()) + group_whitelist = list(groups or []) + task_whitelist = list(tasks or []) + + # validate that the passed groups are defined in the config + requested_groups = set(group_whitelist) + invalid_groups = requested_groups - valid_groups + if invalid_groups: + msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format( + invalid_groups, valid_groups + ) + raise CrossbowError(msg) + + # merge the tasks defined in the selected groups + task_patterns = [list(config_groups[name]) for name in group_whitelist] + task_patterns = set(sum(task_patterns, task_whitelist)) + + # treat the task names as glob patterns to select tasks more easily + requested_tasks = set() + for pattern in task_patterns: + matches = fnmatch.filter(valid_tasks, pattern) + if len(matches): + requested_tasks.update(matches) + else: + raise CrossbowError( + "Unable to match any tasks for `{}`".format(pattern) + ) + + # validate that the passed and matched tasks are defined in the config + invalid_tasks = requested_tasks - valid_tasks + if invalid_tasks: + msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format( + invalid_tasks, valid_tasks + ) + raise CrossbowError(msg) + + return { + task_name: config_tasks[task_name] for task_name in requested_tasks + } + + def validate(self): + # validate that the task groups are properly referening the tasks + for group_name, group in self['groups'].items(): + for pattern in group: + tasks = self.select(tasks=[pattern]) + if not tasks: + raise CrossbowError( + "The pattern `{}` defined for task group `{}` is not " + "matching any of the tasks defined in the " + "configuration file.".format(pattern, group_name) + ) + + # validate that the tasks are constructible + for task_name, task in self['tasks'].items(): + try: + Task(**task) + except Exception as e: + raise CrossbowError( + 'Unable to construct a task object from the ' + 'definition of task `{}`. The original error message ' + 'is: `{}`'.format(task_name, str(e)) + ) + + # validate that the defined tasks are renderable, in order to to that + # define the required object with dummy data + target = Target( + head='e279a7e06e61c14868ca7d71dea795420aea6539', + branch='master', + remote='https://github.com/apache/arrow', + version='1.0.0dev123', + email='dummy@example.ltd' + ) + + for task_name, task in self['tasks'].items(): + task = Task(**task) + files = task.render_files( + self.template_searchpath, + params=dict( + arrow=target, + queue_remote_url='https://github.com/org/crossbow' + ) + ) + if not files: + raise CrossbowError('No files have been rendered for task `{}`' + .format(task_name)) + + +# configure yaml serializer +yaml = YAML() +yaml.register_class(Job) +yaml.register_class(Task) +yaml.register_class(Target) diff --git a/src/arrow/dev/archery/archery/crossbow/reports.py b/src/arrow/dev/archery/archery/crossbow/reports.py new file mode 100644 index 000000000..f86a67a74 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/reports.py @@ -0,0 +1,315 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import click +import collections +import operator +import fnmatch +import functools +from io import StringIO +import textwrap + + +# TODO(kszucs): use archery.report.JinjaReport instead +class Report: + + def __init__(self, job, task_filters=None): + self.job = job + + tasks = sorted(job.tasks.items()) + if task_filters: + filtered = set() + for pattern in task_filters: + filtered |= set(fnmatch.filter(job.tasks.keys(), pattern)) + + tasks = [(name, task) for name, task in tasks if name in filtered] + + self._tasks = dict(tasks) + + @property + def tasks(self): + return self._tasks + + def show(self): + raise NotImplementedError() + + +class ConsoleReport(Report): + """Report the status of a Job to the console using click""" + + # output table's header template + HEADER = '[{state:>7}] {branch:<52} {content:>16}' + DETAILS = ' └ {url}' + + # output table's row template for assets + ARTIFACT_NAME = '{artifact:>69} ' + ARTIFACT_STATE = '[{state:>7}]' + + # state color mapping to highlight console output + COLORS = { + # from CombinedStatus + 'error': 'red', + 'failure': 'red', + 'pending': 'yellow', + 'success': 'green', + # custom state messages + 'ok': 'green', + 'missing': 'red' + } + + def lead(self, state, branch, n_uploaded, n_expected): + line = self.HEADER.format( + state=state.upper(), + branch=branch, + content='uploaded {} / {}'.format(n_uploaded, n_expected) + ) + return click.style(line, fg=self.COLORS[state.lower()]) + + def header(self): + header = self.HEADER.format( + state='state', + branch='Task / Branch', + content='Artifacts' + ) + delimiter = '-' * len(header) + return '{}\n{}'.format(header, delimiter) + + def artifact(self, state, pattern, asset): + if asset is None: + artifact = pattern + state = 'pending' if state == 'pending' else 'missing' + else: + artifact = asset.name + state = 'ok' + + name_ = self.ARTIFACT_NAME.format(artifact=artifact) + state_ = click.style( + self.ARTIFACT_STATE.format(state=state.upper()), + self.COLORS[state] + ) + return name_ + state_ + + def show(self, outstream, asset_callback=None, validate_patterns=True): + echo = functools.partial(click.echo, file=outstream) + + # write table's header + echo(self.header()) + + # write table's body + for task_name, task in self.tasks.items(): + # write summary of the uploaded vs total assets + status = task.status() + assets = task.assets(validate_patterns=validate_patterns) + + # mapping of artifact pattern to asset or None of not uploaded + n_expected = len(task.artifacts) + n_uploaded = len(assets.uploaded_assets()) + echo(self.lead(status.combined_state, task_name, n_uploaded, + n_expected)) + + # show link to the actual build, some of the CI providers implement + # the statuses API others implement the checks API, so display both + for link in status.build_links: + echo(self.DETAILS.format(url=link)) + + # write per asset status + for artifact_pattern, asset in assets.items(): + if asset_callback is not None: + asset_callback(task_name, task, asset) + echo(self.artifact(status.combined_state, artifact_pattern, + asset)) + + +class EmailReport(Report): + + HEADER = textwrap.dedent(""" + Arrow Build Report for Job {job_name} + + All tasks: {all_tasks_url} + """) + + TASK = textwrap.dedent(""" + - {name}: + URL: {url} + """).strip() + + EMAIL = textwrap.dedent(""" + From: {sender_name} <{sender_email}> + To: {recipient_email} + Subject: {subject} + + {body} + """).strip() + + STATUS_HEADERS = { + # from CombinedStatus + 'error': 'Errored Tasks:', + 'failure': 'Failed Tasks:', + 'pending': 'Pending Tasks:', + 'success': 'Succeeded Tasks:', + } + + def __init__(self, job, sender_name, sender_email, recipient_email): + self.sender_name = sender_name + self.sender_email = sender_email + self.recipient_email = recipient_email + super().__init__(job) + + def url(self, query): + repo_url = self.job.queue.remote_url.strip('.git') + return '{}/branches/all?query={}'.format(repo_url, query) + + def listing(self, tasks): + return '\n'.join( + sorted( + self.TASK.format(name=task_name, url=self.url(task.branch)) + for task_name, task in tasks.items() + ) + ) + + def header(self): + url = self.url(self.job.branch) + return self.HEADER.format(job_name=self.job.branch, all_tasks_url=url) + + def subject(self): + return ( + "[NIGHTLY] Arrow Build Report for Job {}".format(self.job.branch) + ) + + def body(self): + buffer = StringIO() + buffer.write(self.header()) + + tasks_by_state = collections.defaultdict(dict) + for task_name, task in self.job.tasks.items(): + state = task.status().combined_state + tasks_by_state[state][task_name] = task + + for state in ('failure', 'error', 'pending', 'success'): + if state in tasks_by_state: + tasks = tasks_by_state[state] + buffer.write('\n') + buffer.write(self.STATUS_HEADERS[state]) + buffer.write('\n') + buffer.write(self.listing(tasks)) + buffer.write('\n') + + return buffer.getvalue() + + def email(self): + return self.EMAIL.format( + sender_name=self.sender_name, + sender_email=self.sender_email, + recipient_email=self.recipient_email, + subject=self.subject(), + body=self.body() + ) + + def show(self, outstream): + outstream.write(self.email()) + + def send(self, smtp_user, smtp_password, smtp_server, smtp_port): + import smtplib + + email = self.email() + + server = smtplib.SMTP_SSL(smtp_server, smtp_port) + server.ehlo() + server.login(smtp_user, smtp_password) + server.sendmail(smtp_user, self.recipient_email, email) + server.close() + + +class CommentReport(Report): + + _markdown_badge = '[![{title}]({badge})]({url})' + + badges = { + 'github': _markdown_badge.format( + title='Github Actions', + url='https://github.com/{repo}/actions?query=branch:{branch}', + badge=( + 'https://github.com/{repo}/workflows/Crossbow/' + 'badge.svg?branch={branch}' + ), + ), + 'azure': _markdown_badge.format( + title='Azure', + url=( + 'https://dev.azure.com/{repo}/_build/latest' + '?definitionId=1&branchName={branch}' + ), + badge=( + 'https://dev.azure.com/{repo}/_apis/build/status/' + '{repo_dotted}?branchName={branch}' + ) + ), + 'travis': _markdown_badge.format( + title='TravisCI', + url='https://travis-ci.com/{repo}/branches', + badge='https://img.shields.io/travis/{repo}/{branch}.svg' + ), + 'circle': _markdown_badge.format( + title='CircleCI', + url='https://circleci.com/gh/{repo}/tree/{branch}', + badge=( + 'https://img.shields.io/circleci/build/github' + '/{repo}/{branch}.svg' + ) + ), + 'appveyor': _markdown_badge.format( + title='Appveyor', + url='https://ci.appveyor.com/project/{repo}/history', + badge='https://img.shields.io/appveyor/ci/{repo}/{branch}.svg' + ), + 'drone': _markdown_badge.format( + title='Drone', + url='https://cloud.drone.io/{repo}', + badge='https://img.shields.io/drone/build/{repo}/{branch}.svg' + ), + } + + def __init__(self, job, crossbow_repo): + self.crossbow_repo = crossbow_repo + super().__init__(job) + + def show(self): + url = 'https://github.com/{repo}/branches/all?query={branch}' + sha = self.job.target.head + + msg = 'Revision: {}\n\n'.format(sha) + msg += 'Submitted crossbow builds: [{repo} @ {branch}]' + msg += '({})\n'.format(url) + msg += '\n|Task|Status|\n|----|------|' + + tasks = sorted(self.job.tasks.items(), key=operator.itemgetter(0)) + for key, task in tasks: + branch = task.branch + + try: + template = self.badges[task.ci] + badge = template.format( + repo=self.crossbow_repo, + repo_dotted=self.crossbow_repo.replace('/', '.'), + branch=branch + ) + except KeyError: + badge = 'unsupported CI service `{}`'.format(task.ci) + + msg += '\n|{}|{}|'.format(key, badge) + + return msg.format(repo=self.crossbow_repo, branch=self.job.branch) diff --git a/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml new file mode 100644 index 000000000..c37c7b553 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-job.yaml @@ -0,0 +1,51 @@ +!Job +target: !Target + head: f766a1d615dd1b7ee706d05102e579195951a61c + email: unkown + branch: refs/pull/4435/merge + remote: https://github.com/apache/arrow + version: 0.13.0.dev306 + no_rc_version: 0.13.0.dev306 +tasks: + docker-cpp-cmake32: !Task + ci: circle + platform: linux + template: docker-tests/circle.linux.yml + artifacts: [] + params: + commands: + - docker-compose build cpp-cmake32 + - docker-compose run cpp-cmake32 + branch: ursabot-1-circle-docker-cpp-cmake32 + commit: a56b077c8d1b891a7935048e5672bf6fc07599ec + wheel-osx-cp37m: !Task + ci: travis + platform: osx + template: python-wheels/travis.osx.yml + artifacts: + - pyarrow-0.13.0.dev306-cp37-cp37m-macosx_10_6_intel.whl + params: + python_version: 3.7 + branch: ursabot-1-travis-wheel-osx-cp37m + commit: a56b077c8d1b891a7935048e5672bf6fc07599ec + wheel-osx-cp36m: !Task + ci: travis + platform: osx + template: python-wheels/travis.osx.yml + artifacts: + - pyarrow-0.13.0.dev306-cp36-cp36m-macosx_10_6_intel.whl + params: + python_version: 3.6 + branch: ursabot-1-travis-wheel-osx-cp36m + commit: a56b077c8d1b891a7935048e5672bf6fc07599ec + wheel-win-cp36m: !Task + ci: appveyor + platform: win + template: python-wheels/appveyor.yml + artifacts: + - pyarrow-0.13.0.dev306-cp36-cp36m-win_amd64.whl + params: + python_version: 3.6 + branch: ursabot-1-appveyor-wheel-win-cp36m + commit: a56b077c8d1b891a7935048e5672bf6fc07599ec +branch: ursabot-1 diff --git a/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md new file mode 100644 index 000000000..15825218c --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/tests/fixtures/crossbow-success-message.md @@ -0,0 +1,10 @@ +Revision: {revision} + +Submitted crossbow builds: [{repo} @ {branch}](https://github.com/{repo}/branches/all?query={branch}) + +|Task|Status| +|----|------| +|docker-cpp-cmake32|[![CircleCI](https://img.shields.io/circleci/build/github/{repo}/{branch}-circle-docker-cpp-cmake32.svg)](https://circleci.com/gh/{repo}/tree/{branch}-circle-docker-cpp-cmake32)| +|wheel-osx-cp36m|[![TravisCI](https://img.shields.io/travis/{repo}/{branch}-travis-wheel-osx-cp36m.svg)](https://travis-ci.com/{repo}/branches)| +|wheel-osx-cp37m|[![TravisCI](https://img.shields.io/travis/{repo}/{branch}-travis-wheel-osx-cp37m.svg)](https://travis-ci.com/{repo}/branches)| +|wheel-win-cp36m|[![Appveyor](https://img.shields.io/appveyor/ci/{repo}/{branch}-appveyor-wheel-win-cp36m.svg)](https://ci.appveyor.com/project/{repo}/history)| diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_core.py b/src/arrow/dev/archery/archery/crossbow/tests/test_core.py new file mode 100644 index 000000000..518474236 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/tests/test_core.py @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from archery.utils.source import ArrowSources +from archery.crossbow import Config + + +def test_config(): + src = ArrowSources.find() + conf = Config.load_yaml(src.dev / "tasks" / "tasks.yml") + conf.validate() diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py b/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py new file mode 100644 index 000000000..ee9ba1ee2 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/tests/test_crossbow_cli.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from click.testing import CliRunner +import pytest + +from archery.crossbow.cli import crossbow +from archery.utils.git import git + + +@pytest.mark.integration +def test_crossbow_submit(tmp_path): + runner = CliRunner() + + def invoke(*args): + return runner.invoke(crossbow, ['--queue-path', str(tmp_path), *args]) + + # initialize an empty crossbow repository + git.run_cmd("init", str(tmp_path)) + git.run_cmd("-C", str(tmp_path), "remote", "add", "origin", + "https://github.com/dummy/repo") + git.run_cmd("-C", str(tmp_path), "commit", "-m", "initial", + "--allow-empty") + + result = invoke('check-config') + assert result.exit_code == 0 + + result = invoke('submit', '--no-fetch', '--no-push', '-g', 'wheel') + assert result.exit_code == 0 diff --git a/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py b/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py new file mode 100644 index 000000000..0df292bb5 --- /dev/null +++ b/src/arrow/dev/archery/archery/crossbow/tests/test_reports.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import textwrap + +from archery.crossbow.core import yaml +from archery.crossbow.reports import CommentReport + + +def test_crossbow_comment_formatter(load_fixture): + msg = load_fixture('crossbow-success-message.md') + job = load_fixture('crossbow-job.yaml', decoder=yaml.load) + + report = CommentReport(job, crossbow_repo='ursa-labs/crossbow') + expected = msg.format( + repo='ursa-labs/crossbow', + branch='ursabot-1', + revision='f766a1d615dd1b7ee706d05102e579195951a61c', + status='has been succeeded.' + ) + assert report.show() == textwrap.dedent(expected).strip() diff --git a/src/arrow/dev/archery/archery/docker.py b/src/arrow/dev/archery/archery/docker.py new file mode 100644 index 000000000..17d4c713a --- /dev/null +++ b/src/arrow/dev/archery/archery/docker.py @@ -0,0 +1,402 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import re +import subprocess +from io import StringIO + +from dotenv import dotenv_values +from ruamel.yaml import YAML + +from .utils.command import Command, default_bin +from .compat import _ensure_path + + +def flatten(node, parents=None): + parents = list(parents or []) + if isinstance(node, str): + yield (node, parents) + elif isinstance(node, list): + for value in node: + yield from flatten(value, parents=parents) + elif isinstance(node, dict): + for key, value in node.items(): + yield (key, parents) + yield from flatten(value, parents=parents + [key]) + else: + raise TypeError(node) + + +def _sanitize_command(cmd): + if isinstance(cmd, list): + cmd = " ".join(cmd) + return re.sub(r"\s+", " ", cmd) + + +class UndefinedImage(Exception): + pass + + +class ComposeConfig: + + def __init__(self, config_path, dotenv_path, compose_bin, params=None): + config_path = _ensure_path(config_path) + if dotenv_path: + dotenv_path = _ensure_path(dotenv_path) + else: + dotenv_path = config_path.parent / '.env' + self._read_env(dotenv_path, params) + self._read_config(config_path, compose_bin) + + def _read_env(self, dotenv_path, params): + """ + Read .env and merge it with explicitly passed parameters. + """ + self.dotenv = dotenv_values(str(dotenv_path)) + if params is None: + self.params = {} + else: + self.params = {k: v for k, v in params.items() if k in self.dotenv} + + # forward the process' environment variables + self.env = os.environ.copy() + # set the defaults from the dotenv files + self.env.update(self.dotenv) + # override the defaults passed as parameters + self.env.update(self.params) + + # translate docker's architecture notation to a more widely used one + arch = self.env.get('ARCH', 'amd64') + arch_aliases = { + 'amd64': 'x86_64', + 'arm64v8': 'aarch64', + 's390x': 's390x' + } + arch_short_aliases = { + 'amd64': 'x64', + 'arm64v8': 'arm64', + 's390x': 's390x' + } + self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch) + self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch) + + def _read_config(self, config_path, compose_bin): + """ + Validate and read the docker-compose.yml + """ + yaml = YAML() + with config_path.open() as fp: + config = yaml.load(fp) + + services = config['services'].keys() + self.hierarchy = dict(flatten(config.get('x-hierarchy', {}))) + self.with_gpus = config.get('x-with-gpus', []) + nodes = self.hierarchy.keys() + errors = [] + + for name in self.with_gpus: + if name not in services: + errors.append( + 'Service `{}` defined in `x-with-gpus` bot not in ' + '`services`'.format(name) + ) + for name in nodes - services: + errors.append( + 'Service `{}` is defined in `x-hierarchy` bot not in ' + '`services`'.format(name) + ) + for name in services - nodes: + errors.append( + 'Service `{}` is defined in `services` but not in ' + '`x-hierarchy`'.format(name) + ) + + # trigger docker-compose's own validation + compose = Command('docker-compose') + args = ['--file', str(config_path), 'config'] + result = compose.run(*args, env=self.env, check=False, + stderr=subprocess.PIPE, stdout=subprocess.PIPE) + + if result.returncode != 0: + # strip the intro line of docker-compose errors + errors += result.stderr.decode().splitlines() + + if errors: + msg = '\n'.join([' - {}'.format(msg) for msg in errors]) + raise ValueError( + 'Found errors with docker-compose:\n{}'.format(msg) + ) + + rendered_config = StringIO(result.stdout.decode()) + self.path = config_path + self.config = yaml.load(rendered_config) + + def get(self, service_name): + try: + service = self.config['services'][service_name] + except KeyError: + raise UndefinedImage(service_name) + service['name'] = service_name + service['need_gpu'] = service_name in self.with_gpus + service['ancestors'] = self.hierarchy[service_name] + return service + + def __getitem__(self, service_name): + return self.get(service_name) + + +class Docker(Command): + + def __init__(self, docker_bin=None): + self.bin = default_bin(docker_bin, "docker") + + +class DockerCompose(Command): + + def __init__(self, config_path, dotenv_path=None, compose_bin=None, + params=None): + compose_bin = default_bin(compose_bin, 'docker-compose') + self.config = ComposeConfig(config_path, dotenv_path, compose_bin, + params) + self.bin = compose_bin + self.pull_memory = set() + + def clear_pull_memory(self): + self.pull_memory = set() + + def _execute_compose(self, *args, **kwargs): + # execute as a docker compose command + try: + result = super().run('--file', str(self.config.path), *args, + env=self.config.env, **kwargs) + result.check_returncode() + except subprocess.CalledProcessError as e: + def formatdict(d, template): + return '\n'.join( + template.format(k, v) for k, v in sorted(d.items()) + ) + msg = ( + "`{cmd}` exited with a non-zero exit code {code}, see the " + "process log above.\n\nThe docker-compose command was " + "invoked with the following parameters:\n\nDefaults defined " + "in .env:\n{dotenv}\n\nArchery was called with:\n{params}" + ) + raise RuntimeError( + msg.format( + cmd=' '.join(e.cmd), + code=e.returncode, + dotenv=formatdict(self.config.dotenv, template=' {}: {}'), + params=formatdict( + self.config.params, template=' export {}={}' + ) + ) + ) + + def _execute_docker(self, *args, **kwargs): + # execute as a plain docker cli command + try: + result = Docker().run(*args, **kwargs) + result.check_returncode() + except subprocess.CalledProcessError as e: + raise RuntimeError( + "{} exited with non-zero exit code {}".format( + ' '.join(e.cmd), e.returncode + ) + ) + + def pull(self, service_name, pull_leaf=True, using_docker=False): + def _pull(service): + args = ['pull'] + if service['image'] in self.pull_memory: + return + + if using_docker: + try: + self._execute_docker(*args, service['image']) + except Exception as e: + # better --ignore-pull-failures handling + print(e) + else: + args.append('--ignore-pull-failures') + self._execute_compose(*args, service['name']) + + self.pull_memory.add(service['image']) + + service = self.config.get(service_name) + for ancestor in service['ancestors']: + _pull(self.config.get(ancestor)) + if pull_leaf: + _pull(service) + + def build(self, service_name, use_cache=True, use_leaf_cache=True, + using_docker=False, using_buildx=False): + def _build(service, use_cache): + if 'build' not in service: + # nothing to do + return + + args = [] + cache_from = list(service.get('build', {}).get('cache_from', [])) + if use_cache: + for image in cache_from: + if image not in self.pull_memory: + try: + self._execute_docker('pull', image) + except Exception as e: + print(e) + finally: + self.pull_memory.add(image) + else: + args.append('--no-cache') + + # turn on inline build cache, this is a docker buildx feature + # used to bundle the image build cache to the pushed image manifest + # so the build cache can be reused across hosts, documented at + # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue + if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1': + args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1']) + + if using_buildx: + for k, v in service['build'].get('args', {}).items(): + args.extend(['--build-arg', '{}={}'.format(k, v)]) + + if use_cache: + cache_ref = '{}-cache'.format(service['image']) + cache_from = 'type=registry,ref={}'.format(cache_ref) + cache_to = ( + 'type=registry,ref={},mode=max'.format(cache_ref) + ) + args.extend([ + '--cache-from', cache_from, + '--cache-to', cache_to, + ]) + + args.extend([ + '--output', 'type=docker', + '-f', service['build']['dockerfile'], + '-t', service['image'], + service['build'].get('context', '.') + ]) + self._execute_docker("buildx", "build", *args) + elif using_docker: + # better for caching + for k, v in service['build'].get('args', {}).items(): + args.extend(['--build-arg', '{}={}'.format(k, v)]) + for img in cache_from: + args.append('--cache-from="{}"'.format(img)) + args.extend([ + '-f', service['build']['dockerfile'], + '-t', service['image'], + service['build'].get('context', '.') + ]) + self._execute_docker("build", *args) + else: + self._execute_compose("build", *args, service['name']) + + service = self.config.get(service_name) + # build ancestor services + for ancestor in service['ancestors']: + _build(self.config.get(ancestor), use_cache=use_cache) + # build the leaf/target service + _build(service, use_cache=use_cache and use_leaf_cache) + + def run(self, service_name, command=None, *, env=None, volumes=None, + user=None, using_docker=False): + service = self.config.get(service_name) + + args = [] + if user is not None: + args.extend(['-u', user]) + + if env is not None: + for k, v in env.items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + if volumes is not None: + for volume in volumes: + args.extend(['--volume', volume]) + + if using_docker or service['need_gpu']: + # use gpus, requires docker>=19.03 + if service['need_gpu']: + args.extend(['--gpus', 'all']) + + if service.get('shm_size'): + args.extend(['--shm-size', service['shm_size']]) + + # append env variables from the compose conf + for k, v in service.get('environment', {}).items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + # append volumes from the compose conf + for v in service.get('volumes', []): + if not isinstance(v, str): + # if not the compact string volume definition + v = "{}:{}".format(v['source'], v['target']) + args.extend(['-v', v]) + + # infer whether an interactive shell is desired or not + if command in ['cmd.exe', 'bash', 'sh', 'powershell']: + args.append('-it') + + # get the actual docker image name instead of the compose service + # name which we refer as image in general + args.append(service['image']) + + # add command from compose if it wasn't overridden + if command is not None: + args.append(command) + else: + # replace whitespaces from the preformatted compose command + cmd = _sanitize_command(service.get('command', '')) + if cmd: + args.append(cmd) + + # execute as a plain docker cli command + self._execute_docker('run', '--rm', *args) + else: + # execute as a docker-compose command + args.append(service_name) + if command is not None: + args.append(command) + self._execute_compose('run', '--rm', *args) + + def push(self, service_name, user=None, password=None, using_docker=False): + def _push(service): + if using_docker: + return self._execute_docker('push', service['image']) + else: + return self._execute_compose('push', service['name']) + + if user is not None: + try: + # TODO(kszucs): have an option for a prompt + self._execute_docker('login', '-u', user, '-p', password) + except subprocess.CalledProcessError: + # hide credentials + msg = ('Failed to push `{}`, check the passed credentials' + .format(service_name)) + raise RuntimeError(msg) from None + + service = self.config.get(service_name) + for ancestor in service['ancestors']: + _push(self.config.get(ancestor)) + _push(service) + + def images(self): + return sorted(self.config.hierarchy.keys()) diff --git a/src/arrow/dev/archery/archery/docker/__init__.py b/src/arrow/dev/archery/archery/docker/__init__.py new file mode 100644 index 000000000..6be29c916 --- /dev/null +++ b/src/arrow/dev/archery/archery/docker/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .core import DockerCompose, UndefinedImage # noqa diff --git a/src/arrow/dev/archery/archery/docker/cli.py b/src/arrow/dev/archery/archery/docker/cli.py new file mode 100644 index 000000000..c6b4a6473 --- /dev/null +++ b/src/arrow/dev/archery/archery/docker/cli.py @@ -0,0 +1,261 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +import click + +from ..utils.cli import validate_arrow_sources +from .core import DockerCompose, UndefinedImage + + +def _mock_compose_calls(compose): + from types import MethodType + from subprocess import CompletedProcess + + def _mock(compose, executable): + def _execute(self, *args, **kwargs): + params = ['{}={}'.format(k, v) + for k, v in self.config.params.items()] + command = ' '.join(params + [executable] + list(args)) + click.echo(command) + return CompletedProcess([], 0) + return MethodType(_execute, compose) + + compose._execute_docker = _mock(compose, executable='docker') + compose._execute_compose = _mock(compose, executable='docker-compose') + + +@click.group() +@click.option("--src", metavar="<arrow_src>", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory.") +@click.option('--dry-run/--execute', default=False, + help="Display the docker-compose commands instead of executing " + "them.") +@click.pass_context +def docker(ctx, src, dry_run): + """ + Interact with docker-compose based builds. + """ + ctx.ensure_object(dict) + + config_path = src.path / 'docker-compose.yml' + if not config_path.exists(): + raise click.ClickException( + "Docker compose configuration cannot be found in directory {}, " + "try to pass the arrow source directory explicitly.".format(src) + ) + + # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the + # environment variables to keep the usage similar to docker-compose + compose = DockerCompose(config_path, params=os.environ) + if dry_run: + _mock_compose_calls(compose) + ctx.obj['compose'] = compose + + +@docker.command("check-config") +@click.pass_obj +def check_config(obj): + """ + Validate docker-compose configuration. + """ + # executes the body of the docker function above which does the validation + # during the configuration loading + + +@docker.command('build') +@click.argument('image') +@click.option('--force-pull/--no-pull', default=True, + help="Whether to force pull the image and its ancestor images") +@click.option('--using-docker-cli', default=False, is_flag=True, + envvar='ARCHERY_USE_DOCKER_CLI', + help="Use docker CLI directly for building instead of calling " + "docker-compose. This may help to reuse cached layers.") +@click.option('--using-docker-buildx', default=False, is_flag=True, + envvar='ARCHERY_USE_DOCKER_BUILDX', + help="Use buildx with docker CLI directly for building instead " + "of calling docker-compose or the plain docker build " + "command. This option makes the build cache reusable " + "across hosts.") +@click.option('--use-cache/--no-cache', default=True, + help="Whether to use cache when building the image and its " + "ancestor images") +@click.option('--use-leaf-cache/--no-leaf-cache', default=True, + help="Whether to use cache when building only the (leaf) image " + "passed as the argument. To disable caching for both the " + "image and its ancestors use --no-cache option.") +@click.pass_obj +def docker_build(obj, image, *, force_pull, using_docker_cli, + using_docker_buildx, use_cache, use_leaf_cache): + """ + Execute docker-compose builds. + """ + compose = obj['compose'] + + using_docker_cli |= using_docker_buildx + try: + if force_pull: + compose.pull(image, pull_leaf=use_leaf_cache, + using_docker=using_docker_cli) + compose.build(image, use_cache=use_cache, + use_leaf_cache=use_leaf_cache, + using_docker=using_docker_cli, + using_buildx=using_docker_buildx, + pull_parents=force_pull) + except UndefinedImage as e: + raise click.ClickException( + "There is no service/image defined in docker-compose.yml with " + "name: {}".format(str(e)) + ) + except RuntimeError as e: + raise click.ClickException(str(e)) + + +@docker.command('run') +@click.argument('image') +@click.argument('command', required=False, default=None) +@click.option('--env', '-e', multiple=True, + help="Set environment variable within the container") +@click.option('--user', '-u', default=None, + help="Username or UID to run the container with") +@click.option('--force-pull/--no-pull', default=True, + help="Whether to force pull the image and its ancestor images") +@click.option('--force-build/--no-build', default=True, + help="Whether to force build the image and its ancestor images") +@click.option('--build-only', default=False, is_flag=True, + help="Pull and/or build the image, but do not run it") +@click.option('--using-docker-cli', default=False, is_flag=True, + envvar='ARCHERY_USE_DOCKER_CLI', + help="Use docker CLI directly for building instead of calling " + "docker-compose. This may help to reuse cached layers.") +@click.option('--using-docker-buildx', default=False, is_flag=True, + envvar='ARCHERY_USE_DOCKER_BUILDX', + help="Use buildx with docker CLI directly for building instead " + "of calling docker-compose or the plain docker build " + "command. This option makes the build cache reusable " + "across hosts.") +@click.option('--use-cache/--no-cache', default=True, + help="Whether to use cache when building the image and its " + "ancestor images") +@click.option('--use-leaf-cache/--no-leaf-cache', default=True, + help="Whether to use cache when building only the (leaf) image " + "passed as the argument. To disable caching for both the " + "image and its ancestors use --no-cache option.") +@click.option('--resource-limit', default=None, + help="A CPU/memory limit preset to mimic CI environments like " + "GitHub Actions. Implies --using-docker-cli. Note that " + "exporting ARCHERY_DOCKER_BIN=\"sudo docker\" is likely " + "required, unless Docker is configured with cgroups v2 " + "(else Docker will silently ignore the limits).") +@click.option('--volume', '-v', multiple=True, + help="Set volume within the container") +@click.pass_obj +def docker_run(obj, image, command, *, env, user, force_pull, force_build, + build_only, using_docker_cli, using_docker_buildx, use_cache, + use_leaf_cache, resource_limit, volume): + """ + Execute docker-compose builds. + + To see the available builds run `archery docker images`. + + Examples: + + # execute a single build + archery docker run conda-python + + # execute the builds but disable the image pulling + archery docker run --no-cache conda-python + + # pass a docker-compose parameter, like the python version + PYTHON=3.8 archery docker run conda-python + + # disable the cache only for the leaf image + PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + + # entirely skip building the image + archery docker run --no-pull --no-build conda-python + + # pass runtime parameters via docker environment variables + archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp + + # set a volume + archery docker run -v $PWD/build:/build ubuntu-cpp + + # starting an interactive bash session for debugging + archery docker run ubuntu-cpp bash + """ + compose = obj['compose'] + using_docker_cli |= using_docker_buildx + + env = dict(kv.split('=', 1) for kv in env) + try: + if force_pull: + compose.pull(image, pull_leaf=use_leaf_cache, + using_docker=using_docker_cli) + if force_build: + compose.build(image, use_cache=use_cache, + use_leaf_cache=use_leaf_cache, + using_docker=using_docker_cli, + using_buildx=using_docker_buildx) + if build_only: + return + compose.run( + image, + command=command, + env=env, + user=user, + using_docker=using_docker_cli, + resource_limit=resource_limit, + volumes=volume + ) + except UndefinedImage as e: + raise click.ClickException( + "There is no service/image defined in docker-compose.yml with " + "name: {}".format(str(e)) + ) + except RuntimeError as e: + raise click.ClickException(str(e)) + + +@docker.command('push') +@click.argument('image') +@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER', + help='Docker repository username') +@click.option('--password', '-p', required=False, + envvar='ARCHERY_DOCKER_PASSWORD', + help='Docker repository password') +@click.option('--using-docker-cli', default=False, is_flag=True, + help="Use docker CLI directly for building instead of calling " + "docker-compose. This may help to reuse cached layers.") +@click.pass_obj +def docker_compose_push(obj, image, user, password, using_docker_cli): + """Push the generated docker-compose image.""" + compose = obj['compose'] + compose.push(image, user=user, password=password, + using_docker=using_docker_cli) + + +@docker.command('images') +@click.pass_obj +def docker_compose_images(obj): + """List the available docker-compose images.""" + compose = obj['compose'] + click.echo('Available images:') + for image in compose.images(): + click.echo(f' - {image}') diff --git a/src/arrow/dev/archery/archery/docker/core.py b/src/arrow/dev/archery/archery/docker/core.py new file mode 100644 index 000000000..aaf16bdfa --- /dev/null +++ b/src/arrow/dev/archery/archery/docker/core.py @@ -0,0 +1,417 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import re +import subprocess +from io import StringIO + +from dotenv import dotenv_values +from ruamel.yaml import YAML + +from ..utils.command import Command, default_bin +from ..compat import _ensure_path + + +def flatten(node, parents=None): + parents = list(parents or []) + if isinstance(node, str): + yield (node, parents) + elif isinstance(node, list): + for value in node: + yield from flatten(value, parents=parents) + elif isinstance(node, dict): + for key, value in node.items(): + yield (key, parents) + yield from flatten(value, parents=parents + [key]) + else: + raise TypeError(node) + + +def _sanitize_command(cmd): + if isinstance(cmd, list): + cmd = " ".join(cmd) + return re.sub(r"\s+", " ", cmd) + + +class UndefinedImage(Exception): + pass + + +class ComposeConfig: + + def __init__(self, config_path, dotenv_path, compose_bin, params=None): + config_path = _ensure_path(config_path) + if dotenv_path: + dotenv_path = _ensure_path(dotenv_path) + else: + dotenv_path = config_path.parent / '.env' + self._read_env(dotenv_path, params) + self._read_config(config_path, compose_bin) + + def _read_env(self, dotenv_path, params): + """ + Read .env and merge it with explicitly passed parameters. + """ + self.dotenv = dotenv_values(str(dotenv_path)) + if params is None: + self.params = {} + else: + self.params = {k: v for k, v in params.items() if k in self.dotenv} + + # forward the process' environment variables + self.env = os.environ.copy() + # set the defaults from the dotenv files + self.env.update(self.dotenv) + # override the defaults passed as parameters + self.env.update(self.params) + + # translate docker's architecture notation to a more widely used one + arch = self.env.get('ARCH', 'amd64') + arch_aliases = { + 'amd64': 'x86_64', + 'arm64v8': 'aarch64', + 's390x': 's390x' + } + arch_short_aliases = { + 'amd64': 'x64', + 'arm64v8': 'arm64', + 's390x': 's390x' + } + self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch) + self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch) + + def _read_config(self, config_path, compose_bin): + """ + Validate and read the docker-compose.yml + """ + yaml = YAML() + with config_path.open() as fp: + config = yaml.load(fp) + + services = config['services'].keys() + self.hierarchy = dict(flatten(config.get('x-hierarchy', {}))) + self.limit_presets = config.get('x-limit-presets', {}) + self.with_gpus = config.get('x-with-gpus', []) + nodes = self.hierarchy.keys() + errors = [] + + for name in self.with_gpus: + if name not in services: + errors.append( + 'Service `{}` defined in `x-with-gpus` bot not in ' + '`services`'.format(name) + ) + for name in nodes - services: + errors.append( + 'Service `{}` is defined in `x-hierarchy` bot not in ' + '`services`'.format(name) + ) + for name in services - nodes: + errors.append( + 'Service `{}` is defined in `services` but not in ' + '`x-hierarchy`'.format(name) + ) + + # trigger docker-compose's own validation + compose = Command('docker-compose') + args = ['--file', str(config_path), 'config'] + result = compose.run(*args, env=self.env, check=False, + stderr=subprocess.PIPE, stdout=subprocess.PIPE) + + if result.returncode != 0: + # strip the intro line of docker-compose errors + errors += result.stderr.decode().splitlines() + + if errors: + msg = '\n'.join([' - {}'.format(msg) for msg in errors]) + raise ValueError( + 'Found errors with docker-compose:\n{}'.format(msg) + ) + + rendered_config = StringIO(result.stdout.decode()) + self.path = config_path + self.config = yaml.load(rendered_config) + + def get(self, service_name): + try: + service = self.config['services'][service_name] + except KeyError: + raise UndefinedImage(service_name) + service['name'] = service_name + service['need_gpu'] = service_name in self.with_gpus + service['ancestors'] = self.hierarchy[service_name] + return service + + def __getitem__(self, service_name): + return self.get(service_name) + + +class Docker(Command): + + def __init__(self, docker_bin=None): + self.bin = default_bin(docker_bin, "docker") + + +class DockerCompose(Command): + + def __init__(self, config_path, dotenv_path=None, compose_bin=None, + params=None): + compose_bin = default_bin(compose_bin, 'docker-compose') + self.config = ComposeConfig(config_path, dotenv_path, compose_bin, + params) + self.bin = compose_bin + self.pull_memory = set() + + def clear_pull_memory(self): + self.pull_memory = set() + + def _execute_compose(self, *args, **kwargs): + # execute as a docker compose command + try: + result = super().run('--file', str(self.config.path), *args, + env=self.config.env, **kwargs) + result.check_returncode() + except subprocess.CalledProcessError as e: + def formatdict(d, template): + return '\n'.join( + template.format(k, v) for k, v in sorted(d.items()) + ) + msg = ( + "`{cmd}` exited with a non-zero exit code {code}, see the " + "process log above.\n\nThe docker-compose command was " + "invoked with the following parameters:\n\nDefaults defined " + "in .env:\n{dotenv}\n\nArchery was called with:\n{params}" + ) + raise RuntimeError( + msg.format( + cmd=' '.join(e.cmd), + code=e.returncode, + dotenv=formatdict(self.config.dotenv, template=' {}: {}'), + params=formatdict( + self.config.params, template=' export {}={}' + ) + ) + ) + + def _execute_docker(self, *args, **kwargs): + # execute as a plain docker cli command + try: + result = Docker().run(*args, **kwargs) + result.check_returncode() + except subprocess.CalledProcessError as e: + raise RuntimeError( + "{} exited with non-zero exit code {}".format( + ' '.join(e.cmd), e.returncode + ) + ) + + def pull(self, service_name, pull_leaf=True, using_docker=False): + def _pull(service): + args = ['pull'] + if service['image'] in self.pull_memory: + return + + if using_docker: + try: + self._execute_docker(*args, service['image']) + except Exception as e: + # better --ignore-pull-failures handling + print(e) + else: + args.append('--ignore-pull-failures') + self._execute_compose(*args, service['name']) + + self.pull_memory.add(service['image']) + + service = self.config.get(service_name) + for ancestor in service['ancestors']: + _pull(self.config.get(ancestor)) + if pull_leaf: + _pull(service) + + def build(self, service_name, use_cache=True, use_leaf_cache=True, + using_docker=False, using_buildx=False, pull_parents=True): + def _build(service, use_cache): + if 'build' not in service: + # nothing to do + return + + args = [] + cache_from = list(service.get('build', {}).get('cache_from', [])) + if pull_parents: + for image in cache_from: + if image not in self.pull_memory: + try: + self._execute_docker('pull', image) + except Exception as e: + print(e) + finally: + self.pull_memory.add(image) + + if not use_cache: + args.append('--no-cache') + + # turn on inline build cache, this is a docker buildx feature + # used to bundle the image build cache to the pushed image manifest + # so the build cache can be reused across hosts, documented at + # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue + if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1': + args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1']) + + if using_buildx: + for k, v in service['build'].get('args', {}).items(): + args.extend(['--build-arg', '{}={}'.format(k, v)]) + + if use_cache: + cache_ref = '{}-cache'.format(service['image']) + cache_from = 'type=registry,ref={}'.format(cache_ref) + cache_to = ( + 'type=registry,ref={},mode=max'.format(cache_ref) + ) + args.extend([ + '--cache-from', cache_from, + '--cache-to', cache_to, + ]) + + args.extend([ + '--output', 'type=docker', + '-f', service['build']['dockerfile'], + '-t', service['image'], + service['build'].get('context', '.') + ]) + self._execute_docker("buildx", "build", *args) + elif using_docker: + # better for caching + for k, v in service['build'].get('args', {}).items(): + args.extend(['--build-arg', '{}={}'.format(k, v)]) + for img in cache_from: + args.append('--cache-from="{}"'.format(img)) + args.extend([ + '-f', service['build']['dockerfile'], + '-t', service['image'], + service['build'].get('context', '.') + ]) + self._execute_docker("build", *args) + else: + self._execute_compose("build", *args, service['name']) + + service = self.config.get(service_name) + # build ancestor services + for ancestor in service['ancestors']: + _build(self.config.get(ancestor), use_cache=use_cache) + # build the leaf/target service + _build(service, use_cache=use_cache and use_leaf_cache) + + def run(self, service_name, command=None, *, env=None, volumes=None, + user=None, using_docker=False, resource_limit=None): + service = self.config.get(service_name) + + args = [] + if user is not None: + args.extend(['-u', user]) + + if env is not None: + for k, v in env.items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + if volumes is not None: + for volume in volumes: + args.extend(['--volume', volume]) + + if using_docker or service['need_gpu'] or resource_limit: + # use gpus, requires docker>=19.03 + if service['need_gpu']: + args.extend(['--gpus', 'all']) + + if service.get('shm_size'): + args.extend(['--shm-size', service['shm_size']]) + + # append env variables from the compose conf + for k, v in service.get('environment', {}).items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + # append volumes from the compose conf + for v in service.get('volumes', []): + if not isinstance(v, str): + # if not the compact string volume definition + v = "{}:{}".format(v['source'], v['target']) + args.extend(['-v', v]) + + # infer whether an interactive shell is desired or not + if command in ['cmd.exe', 'bash', 'sh', 'powershell']: + args.append('-it') + + if resource_limit: + limits = self.config.limit_presets.get(resource_limit) + if not limits: + raise ValueError( + f"Unknown resource limit preset '{resource_limit}'") + cpuset = limits.get('cpuset_cpus', []) + if cpuset: + args.append(f'--cpuset-cpus={",".join(map(str, cpuset))}') + memory = limits.get('memory') + if memory: + args.append(f'--memory={memory}') + args.append(f'--memory-swap={memory}') + + # get the actual docker image name instead of the compose service + # name which we refer as image in general + args.append(service['image']) + + # add command from compose if it wasn't overridden + if command is not None: + args.append(command) + else: + # replace whitespaces from the preformatted compose command + cmd = _sanitize_command(service.get('command', '')) + if cmd: + args.append(cmd) + + # execute as a plain docker cli command + self._execute_docker('run', '--rm', *args) + else: + # execute as a docker-compose command + args.append(service_name) + if command is not None: + args.append(command) + self._execute_compose('run', '--rm', *args) + + def push(self, service_name, user=None, password=None, using_docker=False): + def _push(service): + if using_docker: + return self._execute_docker('push', service['image']) + else: + return self._execute_compose('push', service['name']) + + if user is not None: + try: + # TODO(kszucs): have an option for a prompt + self._execute_docker('login', '-u', user, '-p', password) + except subprocess.CalledProcessError: + # hide credentials + msg = ('Failed to push `{}`, check the passed credentials' + .format(service_name)) + raise RuntimeError(msg) from None + + service = self.config.get(service_name) + for ancestor in service['ancestors']: + _push(self.config.get(ancestor)) + _push(service) + + def images(self): + return sorted(self.config.hierarchy.keys()) diff --git a/src/arrow/dev/archery/archery/docker/tests/test_docker.py b/src/arrow/dev/archery/archery/docker/tests/test_docker.py new file mode 100644 index 000000000..982f3bfc1 --- /dev/null +++ b/src/arrow/dev/archery/archery/docker/tests/test_docker.py @@ -0,0 +1,531 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections +import os +import re +import subprocess +from unittest import mock + +import pytest + +from archery.docker import DockerCompose +from archery.testing import assert_subprocess_calls, override_env, PartialEnv + + +missing_service_compose_yml = """ +version: '3.5' + +x-hierarchy: + - foo: + - sub-foo: + - sub-sub-foo + - another-sub-sub-foo + - bar: + - sub-bar + - baz + +services: + foo: + image: org/foo + sub-sub-foo: + image: org/sub-sub-foo + another-sub-sub-foo: + image: org/another-sub-sub-foo + bar: + image: org/bar + sub-bar: + image: org/sub-bar + baz: + image: org/baz +""" + +missing_node_compose_yml = """ +version: '3.5' + +x-hierarchy: + - foo: + - sub-foo: + - sub-sub-foo + - another-sub-sub-foo + - bar + - baz + +services: + foo: + image: org/foo + sub-foo: + image: org/sub-foo + sub-sub-foo: + image: org/sub-foo-foo + another-sub-sub-foo: + image: org/another-sub-sub-foo + bar: + image: org/bar + sub-bar: + image: org/sub-bar + baz: + image: org/baz +""" + +ok_compose_yml = """ +version: '3.5' + +x-hierarchy: + - foo: + - sub-foo: + - sub-sub-foo + - another-sub-sub-foo + - bar: + - sub-bar + - baz + +services: + foo: + image: org/foo + sub-foo: + image: org/sub-foo + sub-sub-foo: + image: org/sub-sub-foo + another-sub-sub-foo: + image: org/another-sub-sub-foo + bar: + image: org/bar + sub-bar: + image: org/sub-bar + baz: + image: org/baz +""" + +arrow_compose_yml = """ +version: '3.5' + +x-with-gpus: + - ubuntu-cuda + +x-hierarchy: + - conda-cpp: + - conda-python: + - conda-python-pandas + - conda-python-dask + - ubuntu-cpp: + - ubuntu-cpp-cmake32 + - ubuntu-c-glib: + - ubuntu-ruby + - ubuntu-cuda + +x-limit-presets: + github: + cpuset_cpus: [0, 1] + memory: 7g + +services: + conda-cpp: + image: org/conda-cpp + build: + context: . + dockerfile: ci/docker/conda-cpp.dockerfile + conda-python: + image: org/conda-python + build: + context: . + dockerfile: ci/docker/conda-cpp.dockerfile + args: + python: 3.6 + conda-python-pandas: + image: org/conda-python-pandas + build: + context: . + dockerfile: ci/docker/conda-python-pandas.dockerfile + conda-python-dask: + image: org/conda-python-dask + ubuntu-cpp: + image: org/ubuntu-cpp + build: + context: . + dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile + ubuntu-cpp-cmake32: + image: org/ubuntu-cpp-cmake32 + ubuntu-c-glib: + image: org/ubuntu-c-glib + ubuntu-ruby: + image: org/ubuntu-ruby + ubuntu-cuda: + image: org/ubuntu-cuda + environment: + CUDA_ENV: 1 + OTHER_ENV: 2 + volumes: + - /host:/container + command: /bin/bash -c "echo 1 > /tmp/dummy && cat /tmp/dummy" +""" + +arrow_compose_env = { + 'UBUNTU': '20.04', # overridden below + 'PYTHON': '3.6', + 'PANDAS': 'latest', + 'DASK': 'latest', # overridden below +} + + +def create_config(directory, yml_content, env_content=None): + env_path = directory / '.env' + config_path = directory / 'docker-compose.yml' + + with config_path.open('w') as fp: + fp.write(yml_content) + + if env_content is not None: + with env_path.open('w') as fp: + for k, v in env_content.items(): + fp.write("{}={}\n".format(k, v)) + + return config_path + + +def format_run(args): + cmd = ["run", "--rm"] + if isinstance(args, str): + return " ".join(cmd + [args]) + else: + return cmd + args + + +@pytest.fixture +def arrow_compose_path(tmpdir): + return create_config(tmpdir, arrow_compose_yml, arrow_compose_env) + + +def test_config_validation(tmpdir): + config_path = create_config(tmpdir, missing_service_compose_yml) + msg = "`sub-foo` is defined in `x-hierarchy` bot not in `services`" + with pytest.raises(ValueError, match=msg): + DockerCompose(config_path) + + config_path = create_config(tmpdir, missing_node_compose_yml) + msg = "`sub-bar` is defined in `services` but not in `x-hierarchy`" + with pytest.raises(ValueError, match=msg): + DockerCompose(config_path) + + config_path = create_config(tmpdir, ok_compose_yml) + DockerCompose(config_path) # no issue + + +def assert_docker_calls(compose, expected_args): + base_command = ['docker'] + expected_commands = [] + for args in expected_args: + if isinstance(args, str): + args = re.split(r"\s", args) + expected_commands.append(base_command + args) + return assert_subprocess_calls(expected_commands, check=True) + + +def assert_compose_calls(compose, expected_args, env=mock.ANY): + base_command = ['docker-compose', '--file', str(compose.config.path)] + expected_commands = [] + for args in expected_args: + if isinstance(args, str): + args = re.split(r"\s", args) + expected_commands.append(base_command + args) + return assert_subprocess_calls(expected_commands, check=True, env=env) + + +def test_arrow_example_validation_passes(arrow_compose_path): + DockerCompose(arrow_compose_path) + + +def test_compose_default_params_and_env(arrow_compose_path): + compose = DockerCompose(arrow_compose_path, params=dict( + UBUNTU='18.04', + DASK='master' + )) + assert compose.config.dotenv == arrow_compose_env + assert compose.config.params == { + 'UBUNTU': '18.04', + 'DASK': 'master', + } + + +def test_forwarding_env_variables(arrow_compose_path): + expected_calls = [ + "pull --ignore-pull-failures conda-cpp", + "build conda-cpp", + ] + expected_env = PartialEnv( + MY_CUSTOM_VAR_A='a', + MY_CUSTOM_VAR_B='b' + ) + with override_env({'MY_CUSTOM_VAR_A': 'a', 'MY_CUSTOM_VAR_B': 'b'}): + compose = DockerCompose(arrow_compose_path) + with assert_compose_calls(compose, expected_calls, env=expected_env): + assert os.environ['MY_CUSTOM_VAR_A'] == 'a' + assert os.environ['MY_CUSTOM_VAR_B'] == 'b' + compose.pull('conda-cpp') + compose.build('conda-cpp') + + +def test_compose_pull(arrow_compose_path): + compose = DockerCompose(arrow_compose_path) + + expected_calls = [ + "pull --ignore-pull-failures conda-cpp", + ] + with assert_compose_calls(compose, expected_calls): + compose.clear_pull_memory() + compose.pull('conda-cpp') + + expected_calls = [ + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", + "pull --ignore-pull-failures conda-python-pandas" + ] + with assert_compose_calls(compose, expected_calls): + compose.clear_pull_memory() + compose.pull('conda-python-pandas') + + expected_calls = [ + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", + ] + with assert_compose_calls(compose, expected_calls): + compose.clear_pull_memory() + compose.pull('conda-python-pandas', pull_leaf=False) + + +def test_compose_pull_params(arrow_compose_path): + expected_calls = [ + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", + ] + compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04')) + expected_env = PartialEnv(PYTHON='3.6', PANDAS='latest') + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.clear_pull_memory() + compose.pull('conda-python-pandas', pull_leaf=False) + + +def test_compose_build(arrow_compose_path): + compose = DockerCompose(arrow_compose_path) + + expected_calls = [ + "build conda-cpp", + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-cpp') + + expected_calls = [ + "build --no-cache conda-cpp" + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-cpp', use_cache=False) + + expected_calls = [ + "build conda-cpp", + "build conda-python", + "build conda-python-pandas" + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-python-pandas') + + expected_calls = [ + "build --no-cache conda-cpp", + "build --no-cache conda-python", + "build --no-cache conda-python-pandas", + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-python-pandas', use_cache=False) + + expected_calls = [ + "build conda-cpp", + "build conda-python", + "build --no-cache conda-python-pandas", + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-python-pandas', use_cache=True, + use_leaf_cache=False) + + +@mock.patch.dict(os.environ, {"BUILDKIT_INLINE_CACHE": "1"}) +def test_compose_buildkit_inline_cache(arrow_compose_path): + compose = DockerCompose(arrow_compose_path) + + expected_calls = [ + "build --build-arg BUILDKIT_INLINE_CACHE=1 conda-cpp", + ] + with assert_compose_calls(compose, expected_calls): + compose.build('conda-cpp') + + +def test_compose_build_params(arrow_compose_path): + expected_calls = [ + "build ubuntu-cpp", + ] + + compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04')) + expected_env = PartialEnv(UBUNTU="18.04") + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.build('ubuntu-cpp') + + compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='16.04')) + expected_env = PartialEnv(UBUNTU="16.04") + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.build('ubuntu-cpp') + + expected_calls = [ + "build --no-cache conda-cpp", + "build --no-cache conda-python", + "build --no-cache conda-python-pandas", + ] + compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04')) + expected_env = PartialEnv(PYTHON='3.6', PANDAS='latest') + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.build('conda-python-pandas', use_cache=False) + + +def test_compose_run(arrow_compose_path): + expected_calls = [ + format_run("conda-cpp"), + ] + compose = DockerCompose(arrow_compose_path) + with assert_compose_calls(compose, expected_calls): + compose.run('conda-cpp') + + expected_calls = [ + format_run("conda-python") + ] + expected_env = PartialEnv(PYTHON='3.6') + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.run('conda-python') + + compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8')) + expected_env = PartialEnv(PYTHON='3.8') + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.run('conda-python') + + compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8')) + for command in ["bash", "echo 1"]: + expected_calls = [ + format_run(["conda-python", command]), + ] + expected_env = PartialEnv(PYTHON='3.8') + with assert_compose_calls(compose, expected_calls, env=expected_env): + compose.run('conda-python', command) + + expected_calls = [ + ( + format_run("-e CONTAINER_ENV_VAR_A=a -e CONTAINER_ENV_VAR_B=b " + "conda-python") + ) + ] + compose = DockerCompose(arrow_compose_path) + expected_env = PartialEnv(PYTHON='3.6') + with assert_compose_calls(compose, expected_calls, env=expected_env): + env = collections.OrderedDict([ + ("CONTAINER_ENV_VAR_A", "a"), + ("CONTAINER_ENV_VAR_B", "b") + ]) + compose.run('conda-python', env=env) + + expected_calls = [ + ( + format_run("--volume /host/build:/build --volume " + "/host/ccache:/ccache:delegated conda-python") + ) + ] + compose = DockerCompose(arrow_compose_path) + with assert_compose_calls(compose, expected_calls): + volumes = ("/host/build:/build", "/host/ccache:/ccache:delegated") + compose.run('conda-python', volumes=volumes) + + +def test_compose_run_with_resource_limits(arrow_compose_path): + expected_calls = [ + format_run([ + "--cpuset-cpus=0,1", + "--memory=7g", + "--memory-swap=7g", + "org/conda-cpp" + ]), + ] + compose = DockerCompose(arrow_compose_path) + with assert_docker_calls(compose, expected_calls): + compose.run('conda-cpp', resource_limit="github") + + +def test_compose_push(arrow_compose_path): + compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8')) + expected_env = PartialEnv(PYTHON="3.8") + expected_calls = [ + mock.call(["docker", "login", "-u", "user", "-p", "pass"], check=True), + ] + for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: + expected_calls.append( + mock.call(["docker-compose", "--file", str(compose.config.path), + "push", image], check=True, env=expected_env) + ) + with assert_subprocess_calls(expected_calls): + compose.push('conda-python-pandas', user='user', password='pass') + + +def test_compose_error(arrow_compose_path): + compose = DockerCompose(arrow_compose_path, params=dict( + PYTHON='3.8', + PANDAS='master' + )) + + error = subprocess.CalledProcessError(99, []) + with mock.patch('subprocess.run', side_effect=error): + with pytest.raises(RuntimeError) as exc: + compose.run('conda-cpp') + + exception_message = str(exc.value) + assert "exited with a non-zero exit code 99" in exception_message + assert "PANDAS: latest" in exception_message + assert "export PANDAS=master" in exception_message + + +def test_image_with_gpu(arrow_compose_path): + compose = DockerCompose(arrow_compose_path) + + expected_calls = [ + [ + "run", "--rm", "--gpus", "all", + "-e", "CUDA_ENV=1", + "-e", "OTHER_ENV=2", + "-v", "/host:/container:rw", + "org/ubuntu-cuda", + '/bin/bash -c "echo 1 > /tmp/dummy && cat /tmp/dummy"' + ] + ] + with assert_docker_calls(compose, expected_calls): + compose.run('ubuntu-cuda') + + +def test_listing_images(arrow_compose_path): + compose = DockerCompose(arrow_compose_path) + assert sorted(compose.images()) == [ + 'conda-cpp', + 'conda-python', + 'conda-python-dask', + 'conda-python-pandas', + 'ubuntu-c-glib', + 'ubuntu-cpp', + 'ubuntu-cpp-cmake32', + 'ubuntu-cuda', + 'ubuntu-ruby', + ] diff --git a/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py b/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py new file mode 100644 index 000000000..ab39c7b9d --- /dev/null +++ b/src/arrow/dev/archery/archery/docker/tests/test_docker_cli.py @@ -0,0 +1,201 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from unittest.mock import patch + +from click.testing import CliRunner + +from archery.docker import DockerCompose +from archery.docker.cli import docker + + +@patch.object(DockerCompose, "pull") +@patch.object(DockerCompose, "build") +@patch.object(DockerCompose, "run") +def test_docker_run_with_custom_command(run, build, pull): + # with custom command + args = ["run", "ubuntu-cpp", "bash"] + result = CliRunner().invoke(docker, args) + + assert result.exit_code == 0 + pull.assert_called_once_with( + "ubuntu-cpp", pull_leaf=True, using_docker=False + ) + build.assert_called_once_with( + "ubuntu-cpp", + use_cache=True, + use_leaf_cache=True, + using_docker=False, + using_buildx=False + ) + run.assert_called_once_with( + "ubuntu-cpp", + command="bash", + env={}, + resource_limit=None, + user=None, + using_docker=False, + volumes=(), + ) + + +@patch.object(DockerCompose, "pull") +@patch.object(DockerCompose, "build") +@patch.object(DockerCompose, "run") +def test_docker_run_options(run, build, pull): + # environment variables and volumes + args = [ + "run", + "-e", + "ARROW_GANDIVA=OFF", + "-e", + "ARROW_FLIGHT=ON", + "--volume", + "./build:/build", + "-v", + "./ccache:/ccache:delegated", + "-u", + "root", + "ubuntu-cpp", + ] + result = CliRunner().invoke(docker, args) + assert result.exit_code == 0 + pull.assert_called_once_with( + "ubuntu-cpp", pull_leaf=True, using_docker=False + ) + build.assert_called_once_with( + "ubuntu-cpp", + use_cache=True, + use_leaf_cache=True, + using_docker=False, + using_buildx=False + ) + run.assert_called_once_with( + "ubuntu-cpp", + command=None, + env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"}, + resource_limit=None, + user="root", + using_docker=False, + volumes=( + "./build:/build", + "./ccache:/ccache:delegated", + ), + ) + + +@patch.object(DockerCompose, "run") +def test_docker_limit_options(run): + # environment variables and volumes + args = [ + "run", + "-e", + "ARROW_GANDIVA=OFF", + "-e", + "ARROW_FLIGHT=ON", + "--volume", + "./build:/build", + "-v", + "./ccache:/ccache:delegated", + "-u", + "root", + "--resource-limit=github", + "--no-build", + "--no-pull", + "ubuntu-cpp", + ] + result = CliRunner().invoke(docker, args) + assert result.exit_code == 0 + run.assert_called_once_with( + "ubuntu-cpp", + command=None, + env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"}, + resource_limit="github", + user="root", + using_docker=False, + volumes=( + "./build:/build", + "./ccache:/ccache:delegated", + ), + ) + + +@patch.object(DockerCompose, "run") +def test_docker_run_without_pulling_or_building(run): + args = ["run", "--no-pull", "--no-build", "ubuntu-cpp"] + result = CliRunner().invoke(docker, args) + assert result.exit_code == 0 + run.assert_called_once_with( + "ubuntu-cpp", + command=None, + env={}, + resource_limit=None, + user=None, + using_docker=False, + volumes=(), + ) + + +@patch.object(DockerCompose, "pull") +@patch.object(DockerCompose, "build") +def test_docker_run_only_pulling_and_building(build, pull): + args = ["run", "ubuntu-cpp", "--build-only"] + result = CliRunner().invoke(docker, args) + assert result.exit_code == 0 + pull.assert_called_once_with( + "ubuntu-cpp", pull_leaf=True, using_docker=False + ) + build.assert_called_once_with( + "ubuntu-cpp", + use_cache=True, + use_leaf_cache=True, + using_docker=False, + using_buildx=False + ) + + +@patch.object(DockerCompose, "build") +@patch.object(DockerCompose, "run") +def test_docker_run_without_build_cache(run, build): + args = [ + "run", + "--no-pull", + "--force-build", + "--user", + "me", + "--no-cache", + "--no-leaf-cache", + "ubuntu-cpp", + ] + result = CliRunner().invoke(docker, args) + assert result.exit_code == 0 + build.assert_called_once_with( + "ubuntu-cpp", + use_cache=False, + use_leaf_cache=False, + using_docker=False, + using_buildx=False + ) + run.assert_called_once_with( + "ubuntu-cpp", + command=None, + env={}, + resource_limit=None, + user="me", + using_docker=False, + volumes=(), + ) diff --git a/src/arrow/dev/archery/archery/integration/__init__.py b/src/arrow/dev/archery/archery/integration/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/src/arrow/dev/archery/archery/integration/datagen.py b/src/arrow/dev/archery/archery/integration/datagen.py new file mode 100644 index 000000000..b764982bd --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/datagen.py @@ -0,0 +1,1662 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import namedtuple, OrderedDict +import binascii +import json +import os +import random +import tempfile + +import numpy as np + +from .util import frombytes, tobytes, random_bytes, random_utf8 + + +def metadata_key_values(pairs): + return [{'key': k, 'value': v} for k, v in pairs] + + +class Field(object): + + def __init__(self, name, *, nullable=True, metadata=None): + self.name = name + self.nullable = nullable + self.metadata = metadata or [] + + def get_json(self): + entries = [ + ('name', self.name), + ('type', self._get_type()), + ('nullable', self.nullable), + ('children', self._get_children()), + ] + + dct = self._get_dictionary() + if dct: + entries.append(('dictionary', dct)) + + if self.metadata is not None and len(self.metadata) > 0: + entries.append(('metadata', metadata_key_values(self.metadata))) + + return OrderedDict(entries) + + def _get_dictionary(self): + return None + + def _make_is_valid(self, size, null_probability=0.4): + if self.nullable: + return (np.random.random_sample(size) > null_probability + ).astype(np.int8) + else: + return np.ones(size, dtype=np.int8) + + +class Column(object): + + def __init__(self, name, count): + self.name = name + self.count = count + + def __len__(self): + return self.count + + def _get_children(self): + return [] + + def _get_buffers(self): + return [] + + def get_json(self): + entries = [ + ('name', self.name), + ('count', self.count) + ] + + buffers = self._get_buffers() + entries.extend(buffers) + + children = self._get_children() + if len(children) > 0: + entries.append(('children', children)) + + return OrderedDict(entries) + + +class PrimitiveField(Field): + + def _get_children(self): + return [] + + +class PrimitiveColumn(Column): + + def __init__(self, name, count, is_valid, values): + super().__init__(name, count) + self.is_valid = is_valid + self.values = values + + def _encode_value(self, x): + return x + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]), + ('DATA', list([self._encode_value(x) for x in self.values])) + ] + + +class NullColumn(Column): + # This subclass is for readability only + pass + + +class NullField(PrimitiveField): + + def __init__(self, name, metadata=None): + super().__init__(name, nullable=True, + metadata=metadata) + + def _get_type(self): + return OrderedDict([('name', 'null')]) + + def generate_column(self, size, name=None): + return NullColumn(name or self.name, size) + + +TEST_INT_MAX = 2 ** 31 - 1 +TEST_INT_MIN = ~TEST_INT_MAX + + +class IntegerField(PrimitiveField): + + def __init__(self, name, is_signed, bit_width, *, nullable=True, + metadata=None, + min_value=TEST_INT_MIN, + max_value=TEST_INT_MAX): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.is_signed = is_signed + self.bit_width = bit_width + self.min_value = min_value + self.max_value = max_value + + def _get_generated_data_bounds(self): + if self.is_signed: + signed_iinfo = np.iinfo('int' + str(self.bit_width)) + min_value, max_value = signed_iinfo.min, signed_iinfo.max + else: + unsigned_iinfo = np.iinfo('uint' + str(self.bit_width)) + min_value, max_value = 0, unsigned_iinfo.max + + lower_bound = max(min_value, self.min_value) + upper_bound = min(max_value, self.max_value) + return lower_bound, upper_bound + + def _get_type(self): + return OrderedDict([ + ('name', 'int'), + ('isSigned', self.is_signed), + ('bitWidth', self.bit_width) + ]) + + def generate_column(self, size, name=None): + lower_bound, upper_bound = self._get_generated_data_bounds() + return self.generate_range(size, lower_bound, upper_bound, + name=name, include_extremes=True) + + def generate_range(self, size, lower, upper, name=None, + include_extremes=False): + values = np.random.randint(lower, upper, size=size, dtype=np.int64) + if include_extremes and size >= 2: + values[:2] = [lower, upper] + values = list(map(int if self.bit_width < 64 else str, values)) + + is_valid = self._make_is_valid(size) + + if name is None: + name = self.name + return PrimitiveColumn(name, size, is_valid, values) + + +class DateField(IntegerField): + + DAY = 0 + MILLISECOND = 1 + + # 1/1/1 to 12/31/9999 + _ranges = { + DAY: [-719162, 2932896], + MILLISECOND: [-62135596800000, 253402214400000] + } + + def __init__(self, name, unit, *, nullable=True, metadata=None): + bit_width = 32 if unit == self.DAY else 64 + + min_value, max_value = self._ranges[unit] + super().__init__( + name, True, bit_width, + nullable=nullable, metadata=metadata, + min_value=min_value, max_value=max_value + ) + self.unit = unit + + def _get_type(self): + return OrderedDict([ + ('name', 'date'), + ('unit', 'DAY' if self.unit == self.DAY else 'MILLISECOND') + ]) + + +TIMEUNIT_NAMES = { + 's': 'SECOND', + 'ms': 'MILLISECOND', + 'us': 'MICROSECOND', + 'ns': 'NANOSECOND' +} + + +class TimeField(IntegerField): + + BIT_WIDTHS = { + 's': 32, + 'ms': 32, + 'us': 64, + 'ns': 64 + } + + _ranges = { + 's': [0, 86400], + 'ms': [0, 86400000], + 'us': [0, 86400000000], + 'ns': [0, 86400000000000] + } + + def __init__(self, name, unit='s', *, nullable=True, + metadata=None): + min_val, max_val = self._ranges[unit] + super().__init__(name, True, self.BIT_WIDTHS[unit], + nullable=nullable, metadata=metadata, + min_value=min_val, max_value=max_val) + self.unit = unit + + def _get_type(self): + return OrderedDict([ + ('name', 'time'), + ('unit', TIMEUNIT_NAMES[self.unit]), + ('bitWidth', self.bit_width) + ]) + + +class TimestampField(IntegerField): + + # 1/1/1 to 12/31/9999 + _ranges = { + 's': [-62135596800, 253402214400], + 'ms': [-62135596800000, 253402214400000], + 'us': [-62135596800000000, 253402214400000000], + + # Physical range for int64, ~584 years and change + 'ns': [np.iinfo('int64').min, np.iinfo('int64').max] + } + + def __init__(self, name, unit='s', tz=None, *, nullable=True, + metadata=None): + min_val, max_val = self._ranges[unit] + super().__init__(name, True, 64, + nullable=nullable, + metadata=metadata, + min_value=min_val, + max_value=max_val) + self.unit = unit + self.tz = tz + + def _get_type(self): + fields = [ + ('name', 'timestamp'), + ('unit', TIMEUNIT_NAMES[self.unit]) + ] + + if self.tz is not None: + fields.append(('timezone', self.tz)) + + return OrderedDict(fields) + + +class DurationIntervalField(IntegerField): + + def __init__(self, name, unit='s', *, nullable=True, + metadata=None): + min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max, + super().__init__( + name, True, 64, + nullable=nullable, metadata=metadata, + min_value=min_val, max_value=max_val) + self.unit = unit + + def _get_type(self): + fields = [ + ('name', 'duration'), + ('unit', TIMEUNIT_NAMES[self.unit]) + ] + + return OrderedDict(fields) + + +class YearMonthIntervalField(IntegerField): + def __init__(self, name, *, nullable=True, metadata=None): + min_val, max_val = [-10000*12, 10000*12] # +/- 10000 years. + super().__init__( + name, True, 32, + nullable=nullable, metadata=metadata, + min_value=min_val, max_value=max_val) + + def _get_type(self): + fields = [ + ('name', 'interval'), + ('unit', 'YEAR_MONTH'), + ] + + return OrderedDict(fields) + + +class DayTimeIntervalField(PrimitiveField): + def __init__(self, name, *, nullable=True, metadata=None): + super().__init__(name, + nullable=True, + metadata=metadata) + + @property + def numpy_type(self): + return object + + def _get_type(self): + + return OrderedDict([ + ('name', 'interval'), + ('unit', 'DAY_TIME'), + ]) + + def generate_column(self, size, name=None): + min_day_value, max_day_value = -10000*366, 10000*366 + values = [{'days': random.randint(min_day_value, max_day_value), + 'milliseconds': random.randint(-86400000, +86400000)} + for _ in range(size)] + + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return PrimitiveColumn(name, size, is_valid, values) + + +class MonthDayNanoIntervalField(PrimitiveField): + def __init__(self, name, *, nullable=True, metadata=None): + super().__init__(name, + nullable=True, + metadata=metadata) + + @property + def numpy_type(self): + return object + + def _get_type(self): + + return OrderedDict([ + ('name', 'interval'), + ('unit', 'MONTH_DAY_NANO'), + ]) + + def generate_column(self, size, name=None): + I32 = 'int32' + min_int_value, max_int_value = np.iinfo(I32).min, np.iinfo(I32).max + I64 = 'int64' + min_nano_val, max_nano_val = np.iinfo(I64).min, np.iinfo(I64).max, + values = [{'months': random.randint(min_int_value, max_int_value), + 'days': random.randint(min_int_value, max_int_value), + 'nanoseconds': random.randint(min_nano_val, max_nano_val)} + for _ in range(size)] + + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return PrimitiveColumn(name, size, is_valid, values) + + +class FloatingPointField(PrimitiveField): + + def __init__(self, name, bit_width, *, nullable=True, + metadata=None): + super().__init__(name, + nullable=nullable, + metadata=metadata) + + self.bit_width = bit_width + self.precision = { + 16: 'HALF', + 32: 'SINGLE', + 64: 'DOUBLE' + }[self.bit_width] + + @property + def numpy_type(self): + return 'float' + str(self.bit_width) + + def _get_type(self): + return OrderedDict([ + ('name', 'floatingpoint'), + ('precision', self.precision) + ]) + + def generate_column(self, size, name=None): + values = np.random.randn(size) * 1000 + values = np.round(values, 3) + + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return PrimitiveColumn(name, size, is_valid, values) + + +DECIMAL_PRECISION_TO_VALUE = { + key: (1 << (8 * i - 1)) - 1 for i, key in enumerate( + [1, 3, 5, 7, 10, 12, 15, 17, 19, 22, 24, 27, 29, 32, 34, 36, + 40, 42, 44, 50, 60, 70], + start=1, + ) +} + + +def decimal_range_from_precision(precision): + assert 1 <= precision <= 76 + try: + max_value = DECIMAL_PRECISION_TO_VALUE[precision] + except KeyError: + return decimal_range_from_precision(precision - 1) + else: + return ~max_value, max_value + + +class DecimalField(PrimitiveField): + def __init__(self, name, precision, scale, bit_width, *, + nullable=True, metadata=None): + super().__init__(name, nullable=True, + metadata=metadata) + self.precision = precision + self.scale = scale + self.bit_width = bit_width + + @property + def numpy_type(self): + return object + + def _get_type(self): + return OrderedDict([ + ('name', 'decimal'), + ('precision', self.precision), + ('scale', self.scale), + ('bitWidth', self.bit_width), + ]) + + def generate_column(self, size, name=None): + min_value, max_value = decimal_range_from_precision(self.precision) + values = [random.randint(min_value, max_value) for _ in range(size)] + + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return DecimalColumn(name, size, is_valid, values, self.bit_width) + + +class DecimalColumn(PrimitiveColumn): + + def __init__(self, name, count, is_valid, values, bit_width): + super().__init__(name, count, is_valid, values) + self.bit_width = bit_width + + def _encode_value(self, x): + return str(x) + + +class BooleanField(PrimitiveField): + bit_width = 1 + + def _get_type(self): + return OrderedDict([('name', 'bool')]) + + @property + def numpy_type(self): + return 'bool' + + def generate_column(self, size, name=None): + values = list(map(bool, np.random.randint(0, 2, size=size))) + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return PrimitiveColumn(name, size, is_valid, values) + + +class FixedSizeBinaryField(PrimitiveField): + + def __init__(self, name, byte_width, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.byte_width = byte_width + + @property + def numpy_type(self): + return object + + @property + def column_class(self): + return FixedSizeBinaryColumn + + def _get_type(self): + return OrderedDict([('name', 'fixedsizebinary'), + ('byteWidth', self.byte_width)]) + + def generate_column(self, size, name=None): + is_valid = self._make_is_valid(size) + values = [] + + for i in range(size): + values.append(random_bytes(self.byte_width)) + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, values) + + +class BinaryField(PrimitiveField): + + @property + def numpy_type(self): + return object + + @property + def column_class(self): + return BinaryColumn + + def _get_type(self): + return OrderedDict([('name', 'binary')]) + + def _random_sizes(self, size): + return np.random.exponential(scale=4, size=size).astype(np.int32) + + def generate_column(self, size, name=None): + is_valid = self._make_is_valid(size) + values = [] + + sizes = self._random_sizes(size) + + for i, nbytes in enumerate(sizes): + if is_valid[i]: + values.append(random_bytes(nbytes)) + else: + values.append(b"") + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, values) + + +class StringField(BinaryField): + + @property + def column_class(self): + return StringColumn + + def _get_type(self): + return OrderedDict([('name', 'utf8')]) + + def generate_column(self, size, name=None): + K = 7 + is_valid = self._make_is_valid(size) + values = [] + + for i in range(size): + if is_valid[i]: + values.append(tobytes(random_utf8(K))) + else: + values.append(b"") + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, values) + + +class LargeBinaryField(BinaryField): + + @property + def column_class(self): + return LargeBinaryColumn + + def _get_type(self): + return OrderedDict([('name', 'largebinary')]) + + +class LargeStringField(StringField): + + @property + def column_class(self): + return LargeStringColumn + + def _get_type(self): + return OrderedDict([('name', 'largeutf8')]) + + +class Schema(object): + + def __init__(self, fields, metadata=None): + self.fields = fields + self.metadata = metadata + + def get_json(self): + entries = [ + ('fields', [field.get_json() for field in self.fields]) + ] + + if self.metadata is not None and len(self.metadata) > 0: + entries.append(('metadata', metadata_key_values(self.metadata))) + + return OrderedDict(entries) + + +class _NarrowOffsetsMixin: + + def _encode_offsets(self, offsets): + return list(map(int, offsets)) + + +class _LargeOffsetsMixin: + + def _encode_offsets(self, offsets): + # 64-bit offsets have to be represented as strings to roundtrip + # through JSON. + return list(map(str, offsets)) + + +class _BaseBinaryColumn(PrimitiveColumn): + + def _encode_value(self, x): + return frombytes(binascii.hexlify(x).upper()) + + def _get_buffers(self): + offset = 0 + offsets = [0] + + data = [] + for i, v in enumerate(self.values): + if self.is_valid[i]: + offset += len(v) + else: + v = b"" + + offsets.append(offset) + data.append(self._encode_value(v)) + + return [ + ('VALIDITY', [int(x) for x in self.is_valid]), + ('OFFSET', self._encode_offsets(offsets)), + ('DATA', data) + ] + + +class _BaseStringColumn(_BaseBinaryColumn): + + def _encode_value(self, x): + return frombytes(x) + + +class BinaryColumn(_BaseBinaryColumn, _NarrowOffsetsMixin): + pass + + +class StringColumn(_BaseStringColumn, _NarrowOffsetsMixin): + pass + + +class LargeBinaryColumn(_BaseBinaryColumn, _LargeOffsetsMixin): + pass + + +class LargeStringColumn(_BaseStringColumn, _LargeOffsetsMixin): + pass + + +class FixedSizeBinaryColumn(PrimitiveColumn): + + def _encode_value(self, x): + return frombytes(binascii.hexlify(x).upper()) + + def _get_buffers(self): + data = [] + for i, v in enumerate(self.values): + data.append(self._encode_value(v)) + + return [ + ('VALIDITY', [int(x) for x in self.is_valid]), + ('DATA', data) + ] + + +class ListField(Field): + + def __init__(self, name, value_field, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.value_field = value_field + + @property + def column_class(self): + return ListColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'list') + ]) + + def _get_children(self): + return [self.value_field.get_json()] + + def generate_column(self, size, name=None): + MAX_LIST_SIZE = 4 + + is_valid = self._make_is_valid(size) + list_sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size) + offsets = [0] + + offset = 0 + for i in range(size): + if is_valid[i]: + offset += int(list_sizes[i]) + offsets.append(offset) + + # The offset now is the total number of elements in the child array + values = self.value_field.generate_column(offset) + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, offsets, values) + + +class LargeListField(ListField): + + @property + def column_class(self): + return LargeListColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'largelist') + ]) + + +class _BaseListColumn(Column): + + def __init__(self, name, count, is_valid, offsets, values): + super().__init__(name, count) + self.is_valid = is_valid + self.offsets = offsets + self.values = values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]), + ('OFFSET', self._encode_offsets(self.offsets)) + ] + + def _get_children(self): + return [self.values.get_json()] + + +class ListColumn(_BaseListColumn, _NarrowOffsetsMixin): + pass + + +class LargeListColumn(_BaseListColumn, _LargeOffsetsMixin): + pass + + +class MapField(Field): + + def __init__(self, name, key_field, item_field, *, nullable=True, + metadata=None, keys_sorted=False, entries_name='entries'): + super().__init__(name, nullable=nullable, + metadata=metadata) + + assert not key_field.nullable + self.key_field = key_field + self.item_field = item_field + self.pair_field = StructField(entries_name, [key_field, item_field], + nullable=False) + self.keys_sorted = keys_sorted + + def _get_type(self): + return OrderedDict([ + ('name', 'map'), + ('keysSorted', self.keys_sorted) + ]) + + def _get_children(self): + return [self.pair_field.get_json()] + + def generate_column(self, size, name=None): + MAX_MAP_SIZE = 4 + + is_valid = self._make_is_valid(size) + map_sizes = np.random.randint(0, MAX_MAP_SIZE + 1, size=size) + offsets = [0] + + offset = 0 + for i in range(size): + if is_valid[i]: + offset += int(map_sizes[i]) + offsets.append(offset) + + # The offset now is the total number of elements in the child array + pairs = self.pair_field.generate_column(offset) + if name is None: + name = self.name + + return MapColumn(name, size, is_valid, offsets, pairs) + + +class MapColumn(Column): + + def __init__(self, name, count, is_valid, offsets, pairs): + super().__init__(name, count) + self.is_valid = is_valid + self.offsets = offsets + self.pairs = pairs + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]), + ('OFFSET', list(self.offsets)) + ] + + def _get_children(self): + return [self.pairs.get_json()] + + +class FixedSizeListField(Field): + + def __init__(self, name, value_field, list_size, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.value_field = value_field + self.list_size = list_size + + def _get_type(self): + return OrderedDict([ + ('name', 'fixedsizelist'), + ('listSize', self.list_size) + ]) + + def _get_children(self): + return [self.value_field.get_json()] + + def generate_column(self, size, name=None): + is_valid = self._make_is_valid(size) + values = self.value_field.generate_column(size * self.list_size) + + if name is None: + name = self.name + return FixedSizeListColumn(name, size, is_valid, values) + + +class FixedSizeListColumn(Column): + + def __init__(self, name, count, is_valid, values): + super().__init__(name, count) + self.is_valid = is_valid + self.values = values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]) + ] + + def _get_children(self): + return [self.values.get_json()] + + +class StructField(Field): + + def __init__(self, name, fields, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.fields = fields + + def _get_type(self): + return OrderedDict([ + ('name', 'struct') + ]) + + def _get_children(self): + return [field.get_json() for field in self.fields] + + def generate_column(self, size, name=None): + is_valid = self._make_is_valid(size) + + field_values = [field.generate_column(size) for field in self.fields] + if name is None: + name = self.name + return StructColumn(name, size, is_valid, field_values) + + +class _BaseUnionField(Field): + + def __init__(self, name, fields, type_ids=None, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, metadata=metadata) + if type_ids is None: + type_ids = list(range(fields)) + else: + assert len(fields) == len(type_ids) + self.fields = fields + self.type_ids = type_ids + assert all(x >= 0 for x in self.type_ids) + + def _get_type(self): + return OrderedDict([ + ('name', 'union'), + ('mode', self.mode), + ('typeIds', self.type_ids), + ]) + + def _get_children(self): + return [field.get_json() for field in self.fields] + + def _make_type_ids(self, size): + return np.random.choice(self.type_ids, size) + + +class SparseUnionField(_BaseUnionField): + mode = 'SPARSE' + + def generate_column(self, size, name=None): + array_type_ids = self._make_type_ids(size) + field_values = [field.generate_column(size) for field in self.fields] + + if name is None: + name = self.name + return SparseUnionColumn(name, size, array_type_ids, field_values) + + +class DenseUnionField(_BaseUnionField): + mode = 'DENSE' + + def generate_column(self, size, name=None): + # Reverse mapping {logical type id => physical child id} + child_ids = [None] * (max(self.type_ids) + 1) + for i, type_id in enumerate(self.type_ids): + child_ids[type_id] = i + + array_type_ids = self._make_type_ids(size) + offsets = [] + child_sizes = [0] * len(self.fields) + + for i in range(size): + child_id = child_ids[array_type_ids[i]] + offset = child_sizes[child_id] + offsets.append(offset) + child_sizes[child_id] = offset + 1 + + field_values = [ + field.generate_column(child_size) + for field, child_size in zip(self.fields, child_sizes)] + + if name is None: + name = self.name + return DenseUnionColumn(name, size, array_type_ids, offsets, + field_values) + + +class Dictionary(object): + + def __init__(self, id_, field, size, name=None, ordered=False): + self.id_ = id_ + self.field = field + self.values = field.generate_column(size=size, name=name) + self.ordered = ordered + + def __len__(self): + return len(self.values) + + def get_json(self): + dummy_batch = RecordBatch(len(self.values), [self.values]) + return OrderedDict([ + ('id', self.id_), + ('data', dummy_batch.get_json()) + ]) + + +class DictionaryField(Field): + + def __init__(self, name, index_field, dictionary, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + assert index_field.name == '' + assert isinstance(index_field, IntegerField) + assert isinstance(dictionary, Dictionary) + + self.index_field = index_field + self.dictionary = dictionary + + def _get_type(self): + return self.dictionary.field._get_type() + + def _get_children(self): + return self.dictionary.field._get_children() + + def _get_dictionary(self): + return OrderedDict([ + ('id', self.dictionary.id_), + ('indexType', self.index_field._get_type()), + ('isOrdered', self.dictionary.ordered) + ]) + + def generate_column(self, size, name=None): + if name is None: + name = self.name + return self.index_field.generate_range(size, 0, len(self.dictionary), + name=name) + + +ExtensionType = namedtuple( + 'ExtensionType', ['extension_name', 'serialized', 'storage_field']) + + +class ExtensionField(Field): + + def __init__(self, name, extension_type, *, nullable=True, metadata=None): + metadata = (metadata or []) + [ + ('ARROW:extension:name', extension_type.extension_name), + ('ARROW:extension:metadata', extension_type.serialized), + ] + super().__init__(name, nullable=nullable, metadata=metadata) + self.extension_type = extension_type + + def _get_type(self): + return self.extension_type.storage_field._get_type() + + def _get_children(self): + return self.extension_type.storage_field._get_children() + + def _get_dictionary(self): + return self.extension_type.storage_field._get_dictionary() + + def generate_column(self, size, name=None): + if name is None: + name = self.name + return self.extension_type.storage_field.generate_column(size, name) + + +class StructColumn(Column): + + def __init__(self, name, count, is_valid, field_values): + super().__init__(name, count) + self.is_valid = is_valid + self.field_values = field_values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]) + ] + + def _get_children(self): + return [field.get_json() for field in self.field_values] + + +class SparseUnionColumn(Column): + + def __init__(self, name, count, type_ids, field_values): + super().__init__(name, count) + self.type_ids = type_ids + self.field_values = field_values + + def _get_buffers(self): + return [ + ('TYPE_ID', [int(v) for v in self.type_ids]) + ] + + def _get_children(self): + return [field.get_json() for field in self.field_values] + + +class DenseUnionColumn(Column): + + def __init__(self, name, count, type_ids, offsets, field_values): + super().__init__(name, count) + self.type_ids = type_ids + self.offsets = offsets + self.field_values = field_values + + def _get_buffers(self): + return [ + ('TYPE_ID', [int(v) for v in self.type_ids]), + ('OFFSET', [int(v) for v in self.offsets]), + ] + + def _get_children(self): + return [field.get_json() for field in self.field_values] + + +class RecordBatch(object): + + def __init__(self, count, columns): + self.count = count + self.columns = columns + + def get_json(self): + return OrderedDict([ + ('count', self.count), + ('columns', [col.get_json() for col in self.columns]) + ]) + + +class File(object): + + def __init__(self, name, schema, batches, dictionaries=None, + skip=None, path=None): + self.name = name + self.schema = schema + self.dictionaries = dictionaries or [] + self.batches = batches + self.skip = set() + self.path = path + if skip: + self.skip.update(skip) + + def get_json(self): + entries = [ + ('schema', self.schema.get_json()) + ] + + if len(self.dictionaries) > 0: + entries.append(('dictionaries', + [dictionary.get_json() + for dictionary in self.dictionaries])) + + entries.append(('batches', [batch.get_json() + for batch in self.batches])) + return OrderedDict(entries) + + def write(self, path): + with open(path, 'wb') as f: + f.write(json.dumps(self.get_json(), indent=2).encode('utf-8')) + self.path = path + + def skip_category(self, category): + """Skip this test for the given category. + + Category should be SKIP_ARROW or SKIP_FLIGHT. + """ + self.skip.add(category) + return self + + +def get_field(name, type_, **kwargs): + if type_ == 'binary': + return BinaryField(name, **kwargs) + elif type_ == 'utf8': + return StringField(name, **kwargs) + elif type_ == 'largebinary': + return LargeBinaryField(name, **kwargs) + elif type_ == 'largeutf8': + return LargeStringField(name, **kwargs) + elif type_.startswith('fixedsizebinary_'): + byte_width = int(type_.split('_')[1]) + return FixedSizeBinaryField(name, byte_width=byte_width, **kwargs) + + dtype = np.dtype(type_) + + if dtype.kind in ('i', 'u'): + signed = dtype.kind == 'i' + bit_width = dtype.itemsize * 8 + return IntegerField(name, signed, bit_width, **kwargs) + elif dtype.kind == 'f': + bit_width = dtype.itemsize * 8 + return FloatingPointField(name, bit_width, **kwargs) + elif dtype.kind == 'b': + return BooleanField(name, **kwargs) + else: + raise TypeError(dtype) + + +def _generate_file(name, fields, batch_sizes, dictionaries=None, skip=None, + metadata=None): + schema = Schema(fields, metadata=metadata) + batches = [] + for size in batch_sizes: + columns = [] + for field in fields: + col = field.generate_column(size) + columns.append(col) + + batches.append(RecordBatch(size, columns)) + + return File(name, schema, batches, dictionaries, skip=skip) + + +def generate_custom_metadata_case(): + def meta(items): + # Generate a simple block of metadata where each value is '{}'. + # Keys are delimited by whitespace in `items`. + return [(k, '{}') for k in items.split()] + + fields = [ + get_field('sort_of_pandas', 'int8', metadata=meta('pandas')), + + get_field('lots_of_meta', 'int8', metadata=meta('a b c d .. w x y z')), + + get_field( + 'unregistered_extension', 'int8', + metadata=[ + ('ARROW:extension:name', '!nonexistent'), + ('ARROW:extension:metadata', ''), + ('ARROW:integration:allow_unregistered_extension', 'true'), + ]), + + ListField('list_with_odd_values', + get_field('item', 'int32', metadata=meta('odd_values'))), + ] + + batch_sizes = [1] + return _generate_file('custom_metadata', fields, batch_sizes, + metadata=meta('schema_custom_0 schema_custom_1')) + + +def generate_duplicate_fieldnames_case(): + fields = [ + get_field('ints', 'int8'), + get_field('ints', 'int32'), + + StructField('struct', [get_field('', 'int32'), get_field('', 'utf8')]), + ] + + batch_sizes = [1] + return _generate_file('duplicate_fieldnames', fields, batch_sizes) + + +def generate_primitive_case(batch_sizes, name='primitive'): + types = ['bool', 'int8', 'int16', 'int32', 'int64', + 'uint8', 'uint16', 'uint32', 'uint64', + 'float32', 'float64', 'binary', 'utf8', + 'fixedsizebinary_19', 'fixedsizebinary_120'] + + fields = [] + + for type_ in types: + fields.append(get_field(type_ + "_nullable", type_, nullable=True)) + fields.append(get_field(type_ + "_nonnullable", type_, nullable=False)) + + return _generate_file(name, fields, batch_sizes) + + +def generate_primitive_large_offsets_case(batch_sizes): + types = ['largebinary', 'largeutf8'] + + fields = [] + + for type_ in types: + fields.append(get_field(type_ + "_nullable", type_, nullable=True)) + fields.append(get_field(type_ + "_nonnullable", type_, nullable=False)) + + return _generate_file('primitive_large_offsets', fields, batch_sizes) + + +def generate_null_case(batch_sizes): + # Interleave null with non-null types to ensure the appropriate number of + # buffers (0) is read and written + fields = [ + NullField(name='f0'), + get_field('f1', 'int32'), + NullField(name='f2'), + get_field('f3', 'float64'), + NullField(name='f4') + ] + return _generate_file('null', fields, batch_sizes) + + +def generate_null_trivial_case(batch_sizes): + # Generate a case with no buffers + fields = [ + NullField(name='f0'), + ] + return _generate_file('null_trivial', fields, batch_sizes) + + +def generate_decimal128_case(): + fields = [ + DecimalField(name='f{}'.format(i), precision=precision, scale=2, + bit_width=128) + for i, precision in enumerate(range(3, 39)) + ] + + possible_batch_sizes = 7, 10 + batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))] + # 'decimal' is the original name for the test, and it must match + # provide "gold" files that test backwards compatibility, so they + # can be appropriately skipped. + return _generate_file('decimal', fields, batch_sizes) + + +def generate_decimal256_case(): + fields = [ + DecimalField(name='f{}'.format(i), precision=precision, scale=5, + bit_width=256) + for i, precision in enumerate(range(37, 70)) + ] + + possible_batch_sizes = 7, 10 + batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))] + return _generate_file('decimal256', fields, batch_sizes) + + +def generate_datetime_case(): + fields = [ + DateField('f0', DateField.DAY), + DateField('f1', DateField.MILLISECOND), + TimeField('f2', 's'), + TimeField('f3', 'ms'), + TimeField('f4', 'us'), + TimeField('f5', 'ns'), + TimestampField('f6', 's'), + TimestampField('f7', 'ms'), + TimestampField('f8', 'us'), + TimestampField('f9', 'ns'), + TimestampField('f10', 'ms', tz=None), + TimestampField('f11', 's', tz='UTC'), + TimestampField('f12', 'ms', tz='US/Eastern'), + TimestampField('f13', 'us', tz='Europe/Paris'), + TimestampField('f14', 'ns', tz='US/Pacific'), + ] + + batch_sizes = [7, 10] + return _generate_file("datetime", fields, batch_sizes) + + +def generate_interval_case(): + fields = [ + DurationIntervalField('f1', 's'), + DurationIntervalField('f2', 'ms'), + DurationIntervalField('f3', 'us'), + DurationIntervalField('f4', 'ns'), + YearMonthIntervalField('f5'), + DayTimeIntervalField('f6'), + ] + + batch_sizes = [7, 10] + return _generate_file("interval", fields, batch_sizes) + + +def generate_month_day_nano_interval_case(): + fields = [ + MonthDayNanoIntervalField('f1'), + ] + + batch_sizes = [7, 10] + return _generate_file("interval_mdn", fields, batch_sizes) + + +def generate_map_case(): + fields = [ + MapField('map_nullable', get_field('key', 'utf8', nullable=False), + get_field('value', 'int32')), + ] + + batch_sizes = [7, 10] + return _generate_file("map", fields, batch_sizes) + + +def generate_non_canonical_map_case(): + fields = [ + MapField('map_other_names', + get_field('some_key', 'utf8', nullable=False), + get_field('some_value', 'int32'), + entries_name='some_entries'), + ] + + batch_sizes = [7] + return _generate_file("map_non_canonical", fields, batch_sizes) + + +def generate_nested_case(): + fields = [ + ListField('list_nullable', get_field('item', 'int32')), + FixedSizeListField('fixedsizelist_nullable', + get_field('item', 'int32'), 4), + StructField('struct_nullable', [get_field('f1', 'int32'), + get_field('f2', 'utf8')]), + # Fails on Go (ARROW-8452) + # ListField('list_nonnullable', get_field('item', 'int32'), + # nullable=False), + ] + + batch_sizes = [7, 10] + return _generate_file("nested", fields, batch_sizes) + + +def generate_recursive_nested_case(): + fields = [ + ListField('lists_list', + ListField('inner_list', get_field('item', 'int16'))), + ListField('structs_list', + StructField('inner_struct', + [get_field('f1', 'int32'), + get_field('f2', 'utf8')])), + ] + + batch_sizes = [7, 10] + return _generate_file("recursive_nested", fields, batch_sizes) + + +def generate_nested_large_offsets_case(): + fields = [ + LargeListField('large_list_nullable', get_field('item', 'int32')), + LargeListField('large_list_nonnullable', + get_field('item', 'int32'), nullable=False), + LargeListField('large_list_nested', + ListField('inner_list', get_field('item', 'int16'))), + ] + + batch_sizes = [0, 13] + return _generate_file("nested_large_offsets", fields, batch_sizes) + + +def generate_unions_case(): + fields = [ + SparseUnionField('sparse', [get_field('f1', 'int32'), + get_field('f2', 'utf8')], + type_ids=[5, 7]), + DenseUnionField('dense', [get_field('f1', 'int16'), + get_field('f2', 'binary')], + type_ids=[10, 20]), + SparseUnionField('sparse', [get_field('f1', 'float32', nullable=False), + get_field('f2', 'bool')], + type_ids=[5, 7], nullable=False), + DenseUnionField('dense', [get_field('f1', 'uint8', nullable=False), + get_field('f2', 'uint16'), + NullField('f3')], + type_ids=[42, 43, 44], nullable=False), + ] + + batch_sizes = [0, 11] + return _generate_file("union", fields, batch_sizes) + + +def generate_dictionary_case(): + dict0 = Dictionary(0, StringField('dictionary1'), size=10, name='DICT0') + dict1 = Dictionary(1, StringField('dictionary1'), size=5, name='DICT1') + dict2 = Dictionary(2, get_field('dictionary2', 'int64'), + size=50, name='DICT2') + + fields = [ + DictionaryField('dict0', get_field('', 'int8'), dict0), + DictionaryField('dict1', get_field('', 'int32'), dict1), + DictionaryField('dict2', get_field('', 'int16'), dict2) + ] + batch_sizes = [7, 10] + return _generate_file("dictionary", fields, batch_sizes, + dictionaries=[dict0, dict1, dict2]) + + +def generate_dictionary_unsigned_case(): + dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0') + dict1 = Dictionary(1, StringField('dictionary1'), size=5, name='DICT1') + dict2 = Dictionary(2, StringField('dictionary2'), size=5, name='DICT2') + + # TODO: JavaScript does not support uint64 dictionary indices, so disabled + # for now + + # dict3 = Dictionary(3, StringField('dictionary3'), size=5, name='DICT3') + fields = [ + DictionaryField('f0', get_field('', 'uint8'), dict0), + DictionaryField('f1', get_field('', 'uint16'), dict1), + DictionaryField('f2', get_field('', 'uint32'), dict2), + # DictionaryField('f3', get_field('', 'uint64'), dict3) + ] + batch_sizes = [7, 10] + return _generate_file("dictionary_unsigned", fields, batch_sizes, + dictionaries=[dict0, dict1, dict2]) + + +def generate_nested_dictionary_case(): + dict0 = Dictionary(0, StringField('str'), size=10, name='DICT0') + + list_of_dict = ListField( + 'list', + DictionaryField('str_dict', get_field('', 'int8'), dict0)) + dict1 = Dictionary(1, list_of_dict, size=30, name='DICT1') + + struct_of_dict = StructField('struct', [ + DictionaryField('str_dict_a', get_field('', 'int8'), dict0), + DictionaryField('str_dict_b', get_field('', 'int8'), dict0) + ]) + dict2 = Dictionary(2, struct_of_dict, size=30, name='DICT2') + + fields = [ + DictionaryField('list_dict', get_field('', 'int8'), dict1), + DictionaryField('struct_dict', get_field('', 'int8'), dict2) + ] + + batch_sizes = [10, 13] + return _generate_file("nested_dictionary", fields, batch_sizes, + dictionaries=[dict0, dict1, dict2]) + + +def generate_extension_case(): + dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0') + + uuid_type = ExtensionType('uuid', 'uuid-serialized', + FixedSizeBinaryField('', 16)) + dict_ext_type = ExtensionType( + 'dict-extension', 'dict-extension-serialized', + DictionaryField('str_dict', get_field('', 'int8'), dict0)) + + fields = [ + ExtensionField('uuids', uuid_type), + ExtensionField('dict_exts', dict_ext_type), + ] + + batch_sizes = [0, 13] + return _generate_file("extension", fields, batch_sizes, + dictionaries=[dict0]) + + +def get_generated_json_files(tempdir=None): + tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-') + + def _temp_path(): + return + + file_objs = [ + generate_primitive_case([], name='primitive_no_batches'), + generate_primitive_case([17, 20], name='primitive'), + generate_primitive_case([0, 0, 0], name='primitive_zerolength'), + + generate_primitive_large_offsets_case([17, 20]) + .skip_category('C#') + .skip_category('Go') + .skip_category('JS'), + + generate_null_case([10, 0]) + .skip_category('C#') + .skip_category('JS'), # TODO(ARROW-7900) + + generate_null_trivial_case([0, 0]) + .skip_category('C#') + .skip_category('JS'), # TODO(ARROW-7900) + + generate_decimal128_case() + .skip_category('Rust'), + + generate_decimal256_case() + .skip_category('Go') # TODO(ARROW-7948): Decimal + Go + .skip_category('JS') + .skip_category('Rust'), + + generate_datetime_case() + .skip_category('C#'), + + generate_interval_case() + .skip_category('C#') + .skip_category('JS') # TODO(ARROW-5239): Intervals + JS + .skip_category('Rust'), + + generate_month_day_nano_interval_case() + .skip_category('C#') + .skip_category('JS') + .skip_category('Rust'), + + + generate_map_case() + .skip_category('C#') + .skip_category('Rust'), + + generate_non_canonical_map_case() + .skip_category('C#') + .skip_category('Java') # TODO(ARROW-8715) + .skip_category('JS') # TODO(ARROW-8716) + .skip_category('Rust'), + + generate_nested_case() + .skip_category('C#'), + + generate_recursive_nested_case() + .skip_category('C#'), + + generate_nested_large_offsets_case() + .skip_category('C#') + .skip_category('Go') + .skip_category('JS') + .skip_category('Rust'), + + generate_unions_case() + .skip_category('C#') + .skip_category('Go') + .skip_category('JS') + .skip_category('Rust'), + + generate_custom_metadata_case() + .skip_category('C#') + .skip_category('JS'), + + generate_duplicate_fieldnames_case() + .skip_category('C#') + .skip_category('Go') + .skip_category('JS'), + + # TODO(ARROW-3039, ARROW-5267): Dictionaries in GO + generate_dictionary_case() + .skip_category('C#') + .skip_category('Go'), + + generate_dictionary_unsigned_case() + .skip_category('C#') + .skip_category('Go') # TODO(ARROW-9378) + .skip_category('Java'), # TODO(ARROW-9377) + + generate_nested_dictionary_case() + .skip_category('C#') + .skip_category('Go') + .skip_category('Java') # TODO(ARROW-7779) + .skip_category('JS') + .skip_category('Rust'), + + generate_extension_case() + .skip_category('C#') + .skip_category('Go') # TODO(ARROW-3039): requires dictionaries + .skip_category('JS') + .skip_category('Rust'), + ] + + generated_paths = [] + for file_obj in file_objs: + out_path = os.path.join(tempdir, 'generated_' + + file_obj.name + '.json') + file_obj.write(out_path) + generated_paths.append(file_obj) + + return generated_paths diff --git a/src/arrow/dev/archery/archery/integration/runner.py b/src/arrow/dev/archery/archery/integration/runner.py new file mode 100644 index 000000000..463917b81 --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/runner.py @@ -0,0 +1,429 @@ +# licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import namedtuple +from concurrent.futures import ThreadPoolExecutor +from functools import partial +import glob +import gzip +import itertools +import os +import sys +import tempfile +import traceback + +from .scenario import Scenario +from .tester_cpp import CPPTester +from .tester_go import GoTester +from .tester_rust import RustTester +from .tester_java import JavaTester +from .tester_js import JSTester +from .tester_csharp import CSharpTester +from .util import (ARROW_ROOT_DEFAULT, guid, SKIP_ARROW, SKIP_FLIGHT, + printer) +from . import datagen + + +Failure = namedtuple('Failure', + ('test_case', 'producer', 'consumer', 'exc_info')) + +log = printer.print + + +class Outcome: + def __init__(self): + self.failure = None + self.skipped = False + + +class IntegrationRunner(object): + + def __init__(self, json_files, flight_scenarios, testers, tempdir=None, + debug=False, stop_on_error=True, gold_dirs=None, + serial=False, match=None, **unused_kwargs): + self.json_files = json_files + self.flight_scenarios = flight_scenarios + self.testers = testers + self.temp_dir = tempdir or tempfile.mkdtemp() + self.debug = debug + self.stop_on_error = stop_on_error + self.serial = serial + self.gold_dirs = gold_dirs + self.failures = [] + self.match = match + + if self.match is not None: + print("-- Only running tests with {} in their name" + .format(self.match)) + self.json_files = [json_file for json_file in self.json_files + if self.match in json_file.name] + + def run(self): + """ + Run Arrow IPC integration tests for the matrix of enabled + implementations. + """ + for producer, consumer in itertools.product( + filter(lambda t: t.PRODUCER, self.testers), + filter(lambda t: t.CONSUMER, self.testers)): + self._compare_implementations( + producer, consumer, self._produce_consume, + self.json_files) + if self.gold_dirs: + for gold_dir, consumer in itertools.product( + self.gold_dirs, + filter(lambda t: t.CONSUMER, self.testers)): + log('\n\n\n\n') + log('******************************************************') + log('Tests against golden files in {}'.format(gold_dir)) + log('******************************************************') + + def run_gold(producer, consumer, outcome, test_case): + self._run_gold(gold_dir, producer, consumer, outcome, + test_case) + self._compare_implementations( + consumer, consumer, run_gold, + self._gold_tests(gold_dir)) + + def run_flight(self): + """ + Run Arrow Flight integration tests for the matrix of enabled + implementations. + """ + servers = filter(lambda t: t.FLIGHT_SERVER, self.testers) + clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER), + self.testers) + for server, client in itertools.product(servers, clients): + self._compare_flight_implementations(server, client) + + def _gold_tests(self, gold_dir): + prefix = os.path.basename(os.path.normpath(gold_dir)) + SUFFIX = ".json.gz" + golds = [jf for jf in os.listdir(gold_dir) if jf.endswith(SUFFIX)] + for json_path in golds: + name = json_path[json_path.index('_')+1: -len(SUFFIX)] + base_name = prefix + "_" + name + ".gold.json" + out_path = os.path.join(self.temp_dir, base_name) + with gzip.open(os.path.join(gold_dir, json_path)) as i: + with open(out_path, "wb") as out: + out.write(i.read()) + + try: + skip = next(f for f in self.json_files + if f.name == name).skip + except StopIteration: + skip = set() + if name == 'union' and prefix == '0.17.1': + skip.add("Java") + if prefix == '1.0.0-bigendian' or prefix == '1.0.0-littleendian': + skip.add("C#") + skip.add("Go") + skip.add("Java") + skip.add("JS") + skip.add("Rust") + if prefix == '2.0.0-compression': + skip.add("C#") + skip.add("JS") + skip.add("Rust") + + # See https://github.com/apache/arrow/pull/9822 for how to + # disable specific compression type tests. + + if prefix == '4.0.0-shareddict': + skip.add("C#") + skip.add("Go") + + yield datagen.File(name, None, None, skip=skip, path=out_path) + + def _run_test_cases(self, producer, consumer, case_runner, + test_cases): + def case_wrapper(test_case): + with printer.cork(): + return case_runner(test_case) + + if self.failures and self.stop_on_error: + return + + if self.serial: + for outcome in map(case_wrapper, test_cases): + if outcome.failure is not None: + self.failures.append(outcome.failure) + if self.stop_on_error: + break + + else: + with ThreadPoolExecutor() as executor: + for outcome in executor.map(case_wrapper, test_cases): + if outcome.failure is not None: + self.failures.append(outcome.failure) + if self.stop_on_error: + break + + def _compare_implementations( + self, producer, consumer, run_binaries, test_cases): + """ + Compare Arrow IPC for two implementations (one producer, one consumer). + """ + log('##########################################################') + log('IPC: {0} producing, {1} consuming' + .format(producer.name, consumer.name)) + log('##########################################################') + + case_runner = partial(self._run_ipc_test_case, + producer, consumer, run_binaries) + self._run_test_cases(producer, consumer, case_runner, test_cases) + + def _run_ipc_test_case(self, producer, consumer, run_binaries, test_case): + """ + Run one IPC test case. + """ + outcome = Outcome() + + json_path = test_case.path + log('==========================================================') + log('Testing file {0}'.format(json_path)) + log('==========================================================') + + if producer.name in test_case.skip: + log('-- Skipping test because producer {0} does ' + 'not support'.format(producer.name)) + outcome.skipped = True + + elif consumer.name in test_case.skip: + log('-- Skipping test because consumer {0} does ' + 'not support'.format(consumer.name)) + outcome.skipped = True + + elif SKIP_ARROW in test_case.skip: + log('-- Skipping test') + outcome.skipped = True + + else: + try: + run_binaries(producer, consumer, outcome, test_case) + except Exception: + traceback.print_exc(file=printer.stdout) + outcome.failure = Failure(test_case, producer, consumer, + sys.exc_info()) + + return outcome + + def _produce_consume(self, producer, consumer, outcome, test_case): + # Make the random access file + json_path = test_case.path + file_id = guid()[:8] + name = os.path.splitext(os.path.basename(json_path))[0] + + producer_file_path = os.path.join(self.temp_dir, file_id + '_' + + name + '.json_as_file') + producer_stream_path = os.path.join(self.temp_dir, file_id + '_' + + name + '.producer_file_as_stream') + consumer_file_path = os.path.join(self.temp_dir, file_id + '_' + + name + '.consumer_stream_as_file') + + log('-- Creating binary inputs') + producer.json_to_file(json_path, producer_file_path) + + # Validate the file + log('-- Validating file') + consumer.validate(json_path, producer_file_path) + + log('-- Validating stream') + producer.file_to_stream(producer_file_path, producer_stream_path) + consumer.stream_to_file(producer_stream_path, consumer_file_path) + consumer.validate(json_path, consumer_file_path) + + def _run_gold(self, gold_dir, producer, consumer, outcome, test_case): + json_path = test_case.path + + # Validate the file + log('-- Validating file') + producer_file_path = os.path.join( + gold_dir, "generated_" + test_case.name + ".arrow_file") + consumer.validate(json_path, producer_file_path) + + log('-- Validating stream') + consumer_stream_path = os.path.join( + gold_dir, "generated_" + test_case.name + ".stream") + file_id = guid()[:8] + name = os.path.splitext(os.path.basename(json_path))[0] + + consumer_file_path = os.path.join(self.temp_dir, file_id + '_' + + name + '.consumer_stream_as_file') + + consumer.stream_to_file(consumer_stream_path, consumer_file_path) + consumer.validate(json_path, consumer_file_path) + + def _compare_flight_implementations(self, producer, consumer): + log('##########################################################') + log('Flight: {0} serving, {1} requesting' + .format(producer.name, consumer.name)) + log('##########################################################') + + case_runner = partial(self._run_flight_test_case, producer, consumer) + self._run_test_cases(producer, consumer, case_runner, + self.json_files + self.flight_scenarios) + + def _run_flight_test_case(self, producer, consumer, test_case): + """ + Run one Flight test case. + """ + outcome = Outcome() + + log('=' * 58) + log('Testing file {0}'.format(test_case.name)) + log('=' * 58) + + if producer.name in test_case.skip: + log('-- Skipping test because producer {0} does ' + 'not support'.format(producer.name)) + outcome.skipped = True + + elif consumer.name in test_case.skip: + log('-- Skipping test because consumer {0} does ' + 'not support'.format(consumer.name)) + outcome.skipped = True + + elif SKIP_FLIGHT in test_case.skip: + log('-- Skipping test') + outcome.skipped = True + + else: + try: + if isinstance(test_case, Scenario): + server = producer.flight_server(test_case.name) + client_args = {'scenario_name': test_case.name} + else: + server = producer.flight_server() + client_args = {'json_path': test_case.path} + + with server as port: + # Have the client upload the file, then download and + # compare + consumer.flight_request(port, **client_args) + except Exception: + traceback.print_exc(file=printer.stdout) + outcome.failure = Failure(test_case, producer, consumer, + sys.exc_info()) + + return outcome + + +def get_static_json_files(): + glob_pattern = os.path.join(ARROW_ROOT_DEFAULT, + 'integration', 'data', '*.json') + return [ + datagen.File(name=os.path.basename(p), path=p, skip=set(), + schema=None, batches=None) + for p in glob.glob(glob_pattern) + ] + + +def run_all_tests(with_cpp=True, with_java=True, with_js=True, + with_csharp=True, with_go=True, with_rust=False, + run_flight=False, tempdir=None, **kwargs): + tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-') + + testers = [] + + if with_cpp: + testers.append(CPPTester(**kwargs)) + + if with_java: + testers.append(JavaTester(**kwargs)) + + if with_js: + testers.append(JSTester(**kwargs)) + + if with_csharp: + testers.append(CSharpTester(**kwargs)) + + if with_go: + testers.append(GoTester(**kwargs)) + + if with_rust: + testers.append(RustTester(**kwargs)) + + static_json_files = get_static_json_files() + generated_json_files = datagen.get_generated_json_files(tempdir=tempdir) + json_files = static_json_files + generated_json_files + + # Additional integration test cases for Arrow Flight. + flight_scenarios = [ + Scenario( + "auth:basic_proto", + description="Authenticate using the BasicAuth protobuf."), + Scenario( + "middleware", + description="Ensure headers are propagated via middleware.", + skip={"Rust"} # TODO(ARROW-10961): tonic upgrade needed + ), + ] + + runner = IntegrationRunner(json_files, flight_scenarios, testers, **kwargs) + runner.run() + if run_flight: + runner.run_flight() + + fail_count = 0 + if runner.failures: + log("################# FAILURES #################") + for test_case, producer, consumer, exc_info in runner.failures: + fail_count += 1 + log("FAILED TEST:", end=" ") + log(test_case.name, producer.name, "producing, ", + consumer.name, "consuming") + if exc_info: + traceback.print_exception(*exc_info) + log() + + log(fail_count, "failures") + if fail_count > 0: + sys.exit(1) + + +def write_js_test_json(directory): + datagen.generate_map_case().write( + os.path.join(directory, 'map.json') + ) + datagen.generate_nested_case().write( + os.path.join(directory, 'nested.json') + ) + datagen.generate_decimal128_case().write( + os.path.join(directory, 'decimal.json') + ) + datagen.generate_decimal256_case().write( + os.path.join(directory, 'decimal256.json') + ) + datagen.generate_datetime_case().write( + os.path.join(directory, 'datetime.json') + ) + datagen.generate_dictionary_case().write( + os.path.join(directory, 'dictionary.json') + ) + datagen.generate_dictionary_unsigned_case().write( + os.path.join(directory, 'dictionary_unsigned.json') + ) + datagen.generate_primitive_case([]).write( + os.path.join(directory, 'primitive_no_batches.json') + ) + datagen.generate_primitive_case([7, 10]).write( + os.path.join(directory, 'primitive.json') + ) + datagen.generate_primitive_case([0, 0, 0]).write( + os.path.join(directory, 'primitive-empty.json') + ) diff --git a/src/arrow/dev/archery/archery/integration/scenario.py b/src/arrow/dev/archery/archery/integration/scenario.py new file mode 100644 index 000000000..1fcbca64e --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/scenario.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +class Scenario: + """ + An integration test scenario for Arrow Flight. + + Does not correspond to a particular IPC JSON file. + """ + + def __init__(self, name, description, skip=None): + self.name = name + self.description = description + self.skip = skip or set() diff --git a/src/arrow/dev/archery/archery/integration/tester.py b/src/arrow/dev/archery/archery/integration/tester.py new file mode 100644 index 000000000..122e4f2e4 --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Base class for language-specific integration test harnesses + +import subprocess + +from .util import log + + +class Tester(object): + PRODUCER = False + CONSUMER = False + FLIGHT_SERVER = False + FLIGHT_CLIENT = False + + def __init__(self, debug=False, **args): + self.args = args + self.debug = debug + + def run_shell_command(self, cmd): + cmd = ' '.join(cmd) + if self.debug: + log(cmd) + subprocess.check_call(cmd, shell=True) + + def json_to_file(self, json_path, arrow_path): + raise NotImplementedError + + def stream_to_file(self, stream_path, file_path): + raise NotImplementedError + + def file_to_stream(self, file_path, stream_path): + raise NotImplementedError + + def validate(self, json_path, arrow_path): + raise NotImplementedError + + def flight_server(self, scenario_name=None): + """Start the Flight server on a free port. + + This should be a context manager that returns the port as the + managed object, and cleans up the server on exit. + """ + raise NotImplementedError + + def flight_request(self, port, json_path=None, scenario_name=None): + raise NotImplementedError diff --git a/src/arrow/dev/archery/archery/integration/tester_cpp.py b/src/arrow/dev/archery/archery/integration/tester_cpp.py new file mode 100644 index 000000000..d35c9550e --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_cpp.py @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import os +import subprocess + +from .tester import Tester +from .util import run_cmd, ARROW_ROOT_DEFAULT, log + + +class CPPTester(Tester): + PRODUCER = True + CONSUMER = True + FLIGHT_SERVER = True + FLIGHT_CLIENT = True + + EXE_PATH = os.environ.get( + 'ARROW_CPP_EXE_PATH', + os.path.join(ARROW_ROOT_DEFAULT, 'cpp/build/debug')) + + CPP_INTEGRATION_EXE = os.path.join(EXE_PATH, 'arrow-json-integration-test') + STREAM_TO_FILE = os.path.join(EXE_PATH, 'arrow-stream-to-file') + FILE_TO_STREAM = os.path.join(EXE_PATH, 'arrow-file-to-stream') + + FLIGHT_SERVER_CMD = [ + os.path.join(EXE_PATH, 'flight-test-integration-server')] + FLIGHT_CLIENT_CMD = [ + os.path.join(EXE_PATH, 'flight-test-integration-client'), + "-host", "localhost"] + + name = 'C++' + + def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): + cmd = [self.CPP_INTEGRATION_EXE, '--integration'] + + if arrow_path is not None: + cmd.append('--arrow=' + arrow_path) + + if json_path is not None: + cmd.append('--json=' + json_path) + + cmd.append('--mode=' + command) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'JSON_TO_ARROW') + + def stream_to_file(self, stream_path, file_path): + cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path] + self.run_shell_command(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path] + self.run_shell_command(cmd) + + @contextlib.contextmanager + def flight_server(self, scenario_name=None): + cmd = self.FLIGHT_SERVER_CMD + ['-port=0'] + if scenario_name: + cmd = cmd + ["-scenario", scenario_name] + if self.debug: + log(' '.join(cmd)) + server = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + try: + output = server.stdout.readline().decode() + if not output.startswith("Server listening on localhost:"): + server.kill() + out, err = server.communicate() + raise RuntimeError( + "Flight-C++ server did not start properly, " + "stdout:\n{}\n\nstderr:\n{}\n" + .format(output + out.decode(), err.decode())) + port = int(output.split(":")[1]) + yield port + finally: + server.kill() + server.wait(5) + + def flight_request(self, port, json_path=None, scenario_name=None): + cmd = self.FLIGHT_CLIENT_CMD + [ + '-port=' + str(port), + ] + if json_path: + cmd.extend(('-path', json_path)) + elif scenario_name: + cmd.extend(('-scenario', scenario_name)) + else: + raise TypeError("Must provide one of json_path or scenario_name") + + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) diff --git a/src/arrow/dev/archery/archery/integration/tester_csharp.py b/src/arrow/dev/archery/archery/integration/tester_csharp.py new file mode 100644 index 000000000..130c49cfe --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_csharp.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from .tester import Tester +from .util import run_cmd, ARROW_ROOT_DEFAULT, log + + +class CSharpTester(Tester): + PRODUCER = True + CONSUMER = True + + EXE_PATH = os.path.join( + ARROW_ROOT_DEFAULT, + 'csharp/artifacts/Apache.Arrow.IntegrationTest', + 'Debug/netcoreapp3.1/Apache.Arrow.IntegrationTest') + + name = 'C#' + + def _run(self, json_path=None, arrow_path=None, command='validate'): + cmd = [self.EXE_PATH] + + cmd.extend(['--mode', command]) + + if json_path is not None: + cmd.extend(['-j', json_path]) + + if arrow_path is not None: + cmd.extend(['-a', arrow_path]) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(json_path, arrow_path, 'validate') + + def json_to_file(self, json_path, arrow_path): + return self._run(json_path, arrow_path, 'json-to-arrow') + + def stream_to_file(self, stream_path, file_path): + cmd = [self.EXE_PATH] + cmd.extend(['--mode', 'stream-to-file', '-a', file_path]) + cmd.extend(['<', stream_path]) + self.run_shell_command(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = [self.EXE_PATH] + cmd.extend(['--mode', 'file-to-stream']) + cmd.extend(['-a', file_path, '>', stream_path]) + self.run_shell_command(cmd) diff --git a/src/arrow/dev/archery/archery/integration/tester_go.py b/src/arrow/dev/archery/archery/integration/tester_go.py new file mode 100644 index 000000000..eeba38fe5 --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_go.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import os +import subprocess + +from .tester import Tester +from .util import run_cmd, log + + +class GoTester(Tester): + PRODUCER = True + CONSUMER = True + FLIGHT_SERVER = True + FLIGHT_CLIENT = True + + # FIXME(sbinet): revisit for Go modules + HOME = os.getenv('HOME', '~') + GOPATH = os.getenv('GOPATH', os.path.join(HOME, 'go')) + GOBIN = os.environ.get('GOBIN', os.path.join(GOPATH, 'bin')) + + GO_INTEGRATION_EXE = os.path.join(GOBIN, 'arrow-json-integration-test') + STREAM_TO_FILE = os.path.join(GOBIN, 'arrow-stream-to-file') + FILE_TO_STREAM = os.path.join(GOBIN, 'arrow-file-to-stream') + + FLIGHT_SERVER_CMD = [ + os.path.join(GOBIN, 'arrow-flight-integration-server')] + FLIGHT_CLIENT_CMD = [ + os.path.join(GOBIN, 'arrow-flight-integration-client'), + '-host', 'localhost'] + + name = 'Go' + + def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): + cmd = [self.GO_INTEGRATION_EXE] + + if arrow_path is not None: + cmd.extend(['-arrow', arrow_path]) + + if json_path is not None: + cmd.extend(['-json', json_path]) + + cmd.extend(['-mode', command]) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'JSON_TO_ARROW') + + def stream_to_file(self, stream_path, file_path): + cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path] + self.run_shell_command(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path] + self.run_shell_command(cmd) + + @contextlib.contextmanager + def flight_server(self, scenario_name=None): + cmd = self.FLIGHT_SERVER_CMD + ['-port=0'] + if scenario_name: + cmd = cmd + ['-scenario', scenario_name] + if self.debug: + log(' '.join(cmd)) + server = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + try: + output = server.stdout.readline().decode() + if not output.startswith("Server listening on localhost:"): + server.kill() + out, err = server.communicate() + raise RuntimeError( + "Flight-Go server did not start properly, " + "stdout: \n{}\n\nstderr:\n{}\n" + .format(output + out.decode(), err.decode()) + ) + port = int(output.split(":")[1]) + yield port + finally: + server.kill() + server.wait(5) + + def flight_request(self, port, json_path=None, scenario_name=None): + cmd = self.FLIGHT_CLIENT_CMD + [ + '-port=' + str(port), + ] + if json_path: + cmd.extend(('-path', json_path)) + elif scenario_name: + cmd.extend(('-scenario', scenario_name)) + else: + raise TypeError("Must provide one of json_path or scenario_name") + + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) diff --git a/src/arrow/dev/archery/archery/integration/tester_java.py b/src/arrow/dev/archery/archery/integration/tester_java.py new file mode 100644 index 000000000..f283f6cd2 --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_java.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import os +import subprocess + +from .tester import Tester +from .util import run_cmd, ARROW_ROOT_DEFAULT, log + + +def load_version_from_pom(): + import xml.etree.ElementTree as ET + tree = ET.parse(os.path.join(ARROW_ROOT_DEFAULT, 'java', 'pom.xml')) + tag_pattern = '{http://maven.apache.org/POM/4.0.0}version' + version_tag = list(tree.getroot().findall(tag_pattern))[0] + return version_tag.text + + +class JavaTester(Tester): + PRODUCER = True + CONSUMER = True + FLIGHT_SERVER = True + FLIGHT_CLIENT = True + + JAVA_OPTS = ['-Dio.netty.tryReflectionSetAccessible=true', + '-Darrow.struct.conflict.policy=CONFLICT_APPEND'] + + _arrow_version = load_version_from_pom() + ARROW_TOOLS_JAR = os.environ.get( + 'ARROW_JAVA_INTEGRATION_JAR', + os.path.join(ARROW_ROOT_DEFAULT, + 'java/tools/target/arrow-tools-{}-' + 'jar-with-dependencies.jar'.format(_arrow_version))) + ARROW_FLIGHT_JAR = os.environ.get( + 'ARROW_FLIGHT_JAVA_INTEGRATION_JAR', + os.path.join(ARROW_ROOT_DEFAULT, + 'java/flight/flight-core/target/flight-core-{}-' + 'jar-with-dependencies.jar'.format(_arrow_version))) + ARROW_FLIGHT_SERVER = ('org.apache.arrow.flight.example.integration.' + 'IntegrationTestServer') + ARROW_FLIGHT_CLIENT = ('org.apache.arrow.flight.example.integration.' + 'IntegrationTestClient') + + name = 'Java' + + def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): + cmd = ['java'] + self.JAVA_OPTS + \ + ['-cp', self.ARROW_TOOLS_JAR, 'org.apache.arrow.tools.Integration'] + + if arrow_path is not None: + cmd.extend(['-a', arrow_path]) + + if json_path is not None: + cmd.extend(['-j', json_path]) + + cmd.extend(['-c', command]) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'JSON_TO_ARROW') + + def stream_to_file(self, stream_path, file_path): + cmd = ['java'] + self.JAVA_OPTS + \ + ['-cp', self.ARROW_TOOLS_JAR, + 'org.apache.arrow.tools.StreamToFile', stream_path, file_path] + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = ['java'] + self.JAVA_OPTS + \ + ['-cp', self.ARROW_TOOLS_JAR, + 'org.apache.arrow.tools.FileToStream', file_path, stream_path] + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) + + def flight_request(self, port, json_path=None, scenario_name=None): + cmd = ['java'] + self.JAVA_OPTS + \ + ['-cp', self.ARROW_FLIGHT_JAR, self.ARROW_FLIGHT_CLIENT, + '-port', str(port)] + + if json_path: + cmd.extend(('-j', json_path)) + elif scenario_name: + cmd.extend(('-scenario', scenario_name)) + else: + raise TypeError("Must provide one of json_path or scenario_name") + + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) + + @contextlib.contextmanager + def flight_server(self, scenario_name=None): + cmd = ['java'] + self.JAVA_OPTS + \ + ['-cp', self.ARROW_FLIGHT_JAR, self.ARROW_FLIGHT_SERVER, + '-port', '0'] + if scenario_name: + cmd.extend(('-scenario', scenario_name)) + if self.debug: + log(' '.join(cmd)) + server = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + try: + output = server.stdout.readline().decode() + if not output.startswith("Server listening on localhost:"): + server.kill() + out, err = server.communicate() + raise RuntimeError( + "Flight-Java server did not start properly, " + "stdout:\n{}\n\nstderr:\n{}\n" + .format(output + out.decode(), err.decode())) + port = int(output.split(":")[1]) + yield port + finally: + server.kill() + server.wait(5) diff --git a/src/arrow/dev/archery/archery/integration/tester_js.py b/src/arrow/dev/archery/archery/integration/tester_js.py new file mode 100644 index 000000000..e24eec0ca --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_js.py @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from .tester import Tester +from .util import run_cmd, ARROW_ROOT_DEFAULT, log + + +class JSTester(Tester): + PRODUCER = True + CONSUMER = True + + EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, 'js/bin') + VALIDATE = os.path.join(EXE_PATH, 'integration.js') + JSON_TO_ARROW = os.path.join(EXE_PATH, 'json-to-arrow.js') + STREAM_TO_FILE = os.path.join(EXE_PATH, 'stream-to-file.js') + FILE_TO_STREAM = os.path.join(EXE_PATH, 'file-to-stream.js') + + name = 'JS' + + def _run(self, exe_cmd, arrow_path=None, json_path=None, + command='VALIDATE'): + cmd = [exe_cmd] + + if arrow_path is not None: + cmd.extend(['-a', arrow_path]) + + if json_path is not None: + cmd.extend(['-j', json_path]) + + cmd.extend(['--mode', command]) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(self.VALIDATE, arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + cmd = ['node', + '--no-warnings', self.JSON_TO_ARROW, + '-a', arrow_path, + '-j', json_path] + self.run_shell_command(cmd) + + def stream_to_file(self, stream_path, file_path): + cmd = ['node', '--no-warnings', self.STREAM_TO_FILE, + '<', stream_path, + '>', file_path] + self.run_shell_command(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = ['node', '--no-warnings', self.FILE_TO_STREAM, + '<', file_path, + '>', stream_path] + self.run_shell_command(cmd) diff --git a/src/arrow/dev/archery/archery/integration/tester_rust.py b/src/arrow/dev/archery/archery/integration/tester_rust.py new file mode 100644 index 000000000..bca80ebae --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/tester_rust.py @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import os +import subprocess + +from .tester import Tester +from .util import run_cmd, ARROW_ROOT_DEFAULT, log + + +class RustTester(Tester): + PRODUCER = True + CONSUMER = True + FLIGHT_SERVER = True + FLIGHT_CLIENT = True + + EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, 'rust/target/debug') + + RUST_INTEGRATION_EXE = os.path.join(EXE_PATH, + 'arrow-json-integration-test') + STREAM_TO_FILE = os.path.join(EXE_PATH, 'arrow-stream-to-file') + FILE_TO_STREAM = os.path.join(EXE_PATH, 'arrow-file-to-stream') + + FLIGHT_SERVER_CMD = [ + os.path.join(EXE_PATH, 'flight-test-integration-server')] + FLIGHT_CLIENT_CMD = [ + os.path.join(EXE_PATH, 'flight-test-integration-client'), + "--host", "localhost"] + + name = 'Rust' + + def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): + cmd = [self.RUST_INTEGRATION_EXE, '--integration'] + + if arrow_path is not None: + cmd.append('--arrow=' + arrow_path) + + if json_path is not None: + cmd.append('--json=' + json_path) + + cmd.append('--mode=' + command) + + if self.debug: + log(' '.join(cmd)) + + run_cmd(cmd) + + def validate(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + return self._run(arrow_path, json_path, 'JSON_TO_ARROW') + + def stream_to_file(self, stream_path, file_path): + cmd = [self.STREAM_TO_FILE, '<', stream_path, '>', file_path] + self.run_shell_command(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path] + self.run_shell_command(cmd) + + @contextlib.contextmanager + def flight_server(self, scenario_name=None): + cmd = self.FLIGHT_SERVER_CMD + ['--port=0'] + if scenario_name: + cmd = cmd + ["--scenario", scenario_name] + if self.debug: + log(' '.join(cmd)) + server = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + try: + output = server.stdout.readline().decode() + if not output.startswith("Server listening on localhost:"): + server.kill() + out, err = server.communicate() + raise RuntimeError( + "Flight-Rust server did not start properly, " + "stdout:\n{}\n\nstderr:\n{}\n" + .format(output + out.decode(), err.decode())) + port = int(output.split(":")[1]) + yield port + finally: + server.kill() + server.wait(5) + + def flight_request(self, port, json_path=None, scenario_name=None): + cmd = self.FLIGHT_CLIENT_CMD + [ + '--port=' + str(port), + ] + if json_path: + cmd.extend(('--path', json_path)) + elif scenario_name: + cmd.extend(('--scenario', scenario_name)) + else: + raise TypeError("Must provide one of json_path or scenario_name") + + if self.debug: + log(' '.join(cmd)) + run_cmd(cmd) diff --git a/src/arrow/dev/archery/archery/integration/util.py b/src/arrow/dev/archery/archery/integration/util.py new file mode 100644 index 000000000..a4c4982ec --- /dev/null +++ b/src/arrow/dev/archery/archery/integration/util.py @@ -0,0 +1,166 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import io +import os +import random +import socket +import subprocess +import sys +import threading +import uuid + +import numpy as np + + +def guid(): + return uuid.uuid4().hex + + +# SKIP categories +SKIP_ARROW = 'arrow' +SKIP_FLIGHT = 'flight' + +ARROW_ROOT_DEFAULT = os.environ.get( + 'ARROW_ROOT', + os.path.abspath(__file__).rsplit("/", 5)[0] +) + + +class _Printer: + """ + A print()-providing object that can override the stream output on + a per-thread basis. + """ + + def __init__(self): + self._tls = threading.local() + + def _get_stdout(self): + try: + return self._tls.stdout + except AttributeError: + self._tls.stdout = sys.stdout + self._tls.corked = False + return self._tls.stdout + + def print(self, *args, **kwargs): + """ + A variant of print() that writes to a thread-local stream. + """ + print(*args, file=self._get_stdout(), **kwargs) + + @property + def stdout(self): + """ + A thread-local stdout wrapper that may be temporarily buffered + using `cork()`. + """ + return self._get_stdout() + + @contextlib.contextmanager + def cork(self): + """ + Temporarily buffer this thread's stream and write out its contents + at the end of the context manager. Useful to avoid interleaved + output when multiple threads output progress information. + """ + outer_stdout = self._get_stdout() + assert not self._tls.corked, "reentrant call" + inner_stdout = self._tls.stdout = io.StringIO() + self._tls.corked = True + try: + yield + finally: + self._tls.stdout = outer_stdout + self._tls.corked = False + outer_stdout.write(inner_stdout.getvalue()) + outer_stdout.flush() + + +printer = _Printer() +log = printer.print + + +_RAND_CHARS = np.array(list("abcdefghijklmnop123456Ârrôwµ£°€矢"), dtype="U") + + +def random_utf8(nchars): + """ + Generate one random UTF8 string. + """ + return ''.join(np.random.choice(_RAND_CHARS, nchars)) + + +def random_bytes(nbytes): + """ + Generate one random binary string. + """ + # NOTE getrandbits(0) fails + if nbytes > 0: + return random.getrandbits(nbytes * 8).to_bytes(nbytes, + byteorder='little') + else: + return b"" + + +def tobytes(o): + if isinstance(o, str): + return o.encode('utf8') + return o + + +def frombytes(o): + if isinstance(o, bytes): + return o.decode('utf8') + return o + + +def run_cmd(cmd): + if isinstance(cmd, str): + cmd = cmd.split(' ') + + try: + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + # this avoids hiding the stdout / stderr of failed processes + sio = io.StringIO() + print('Command failed:', " ".join(cmd), file=sio) + print('With output:', file=sio) + print('--------------', file=sio) + print(frombytes(e.output), file=sio) + print('--------------', file=sio) + raise RuntimeError(sio.getvalue()) + + return frombytes(output) + + +# Adapted from CPython +def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM): + """Returns an unused port that should be suitable for binding. This is + achieved by creating a temporary socket with the same family and type as + the 'sock' parameter (default is AF_INET, SOCK_STREAM), and binding it to + the specified host address (defaults to 0.0.0.0) with the port set to 0, + eliciting an unused ephemeral port from the OS. The temporary socket is + then closed and deleted, and the ephemeral port is returned. + """ + with socket.socket(family, socktype) as tempsock: + tempsock.bind(('', 0)) + port = tempsock.getsockname()[1] + del tempsock + return port diff --git a/src/arrow/dev/archery/archery/lang/__init__.py b/src/arrow/dev/archery/archery/lang/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/src/arrow/dev/archery/archery/lang/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/src/arrow/dev/archery/archery/lang/cpp.py b/src/arrow/dev/archery/archery/lang/cpp.py new file mode 100644 index 000000000..c2b1ca680 --- /dev/null +++ b/src/arrow/dev/archery/archery/lang/cpp.py @@ -0,0 +1,296 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from ..utils.cmake import CMakeDefinition + + +def truthifier(value): + return "ON" if value else "OFF" + + +def or_else(value, default): + return value if value else default + + +def coalesce(value, fallback): + return fallback if value is None else value + + +LLVM_VERSION = 7 + + +class CppConfiguration: + def __init__(self, + + # toolchain + cc=None, cxx=None, cxx_flags=None, + build_type=None, warn_level=None, + cpp_package_prefix=None, install_prefix=None, use_conda=None, + build_static=False, build_shared=True, build_unity=True, + # tests & examples + with_tests=None, with_benchmarks=None, with_examples=None, + with_integration=None, + # static checks + use_asan=None, use_tsan=None, use_ubsan=None, + with_fuzzing=None, + # Components + with_compute=None, with_csv=None, with_cuda=None, + with_dataset=None, with_filesystem=None, with_flight=None, + with_gandiva=None, with_hdfs=None, with_hiveserver2=None, + with_ipc=True, with_json=None, with_jni=None, + with_mimalloc=None, + with_parquet=None, with_plasma=None, with_python=True, + with_r=None, with_s3=None, + # Compressions + with_brotli=None, with_bz2=None, with_lz4=None, + with_snappy=None, with_zlib=None, with_zstd=None, + # extras + with_lint_only=False, + use_gold_linker=True, + simd_level="SSE4_2", + cmake_extras=None): + self._cc = cc + self._cxx = cxx + self.cxx_flags = cxx_flags + + self._build_type = build_type + self.warn_level = warn_level + self._install_prefix = install_prefix + self._package_prefix = cpp_package_prefix + self._use_conda = use_conda + self.build_static = build_static + self.build_shared = build_shared + self.build_unity = build_unity + + self.with_tests = with_tests + self.with_benchmarks = with_benchmarks + self.with_examples = with_examples + self.with_integration = with_integration + + self.use_asan = use_asan + self.use_tsan = use_tsan + self.use_ubsan = use_ubsan + self.with_fuzzing = with_fuzzing + + self.with_compute = with_compute + self.with_csv = with_csv + self.with_cuda = with_cuda + self.with_dataset = with_dataset + self.with_filesystem = with_filesystem + self.with_flight = with_flight + self.with_gandiva = with_gandiva + self.with_hdfs = with_hdfs + self.with_hiveserver2 = with_hiveserver2 + self.with_ipc = with_ipc + self.with_json = with_json + self.with_jni = with_jni + self.with_mimalloc = with_mimalloc + self.with_parquet = with_parquet + self.with_plasma = with_plasma + self.with_python = with_python + self.with_r = with_r + self.with_s3 = with_s3 + + self.with_brotli = with_brotli + self.with_bz2 = with_bz2 + self.with_lz4 = with_lz4 + self.with_snappy = with_snappy + self.with_zlib = with_zlib + self.with_zstd = with_zstd + + self.with_lint_only = with_lint_only + self.use_gold_linker = use_gold_linker + self.simd_level = simd_level + + self.cmake_extras = cmake_extras + + # Fixup required dependencies by providing sane defaults if the caller + # didn't specify the option. + if self.with_r: + self.with_csv = coalesce(with_csv, True) + self.with_dataset = coalesce(with_dataset, True) + self.with_filesystem = coalesce(with_filesystem, True) + self.with_ipc = coalesce(with_ipc, True) + self.with_json = coalesce(with_json, True) + self.with_parquet = coalesce(with_parquet, True) + + if self.with_python: + self.with_zlib = coalesce(with_zlib, True) + self.with_lz4 = coalesce(with_lz4, True) + + if self.with_dataset: + self.with_filesystem = coalesce(with_filesystem, True) + self.with_parquet = coalesce(with_parquet, True) + + if self.with_parquet: + self.with_snappy = coalesce(with_snappy, True) + + @property + def build_type(self): + if self._build_type: + return self._build_type + + if self.with_fuzzing: + return "relwithdebinfo" + + return "release" + + @property + def cc(self): + if self._cc: + return self._cc + + if self.with_fuzzing: + return "clang-{}".format(LLVM_VERSION) + + return None + + @property + def cxx(self): + if self._cxx: + return self._cxx + + if self.with_fuzzing: + return "clang++-{}".format(LLVM_VERSION) + + return None + + def _gen_defs(self): + if self.cxx_flags: + yield ("ARROW_CXXFLAGS", self.cxx_flags) + + yield ("CMAKE_EXPORT_COMPILE_COMMANDS", truthifier(True)) + yield ("CMAKE_BUILD_TYPE", self.build_type) + + if not self.with_lint_only: + yield ("BUILD_WARNING_LEVEL", + or_else(self.warn_level, "production")) + + # if not ctx.quiet: + # yield ("ARROW_VERBOSE_THIRDPARTY_BUILD", "ON") + + maybe_prefix = self.install_prefix + if maybe_prefix: + yield ("CMAKE_INSTALL_PREFIX", maybe_prefix) + + if self._package_prefix is not None: + yield ("ARROW_DEPENDENCY_SOURCE", "SYSTEM") + yield ("ARROW_PACKAGE_PREFIX", self._package_prefix) + + yield ("ARROW_BUILD_STATIC", truthifier(self.build_static)) + yield ("ARROW_BUILD_SHARED", truthifier(self.build_shared)) + yield ("CMAKE_UNITY_BUILD", truthifier(self.build_unity)) + + # Tests and benchmarks + yield ("ARROW_BUILD_TESTS", truthifier(self.with_tests)) + yield ("ARROW_BUILD_BENCHMARKS", truthifier(self.with_benchmarks)) + yield ("ARROW_BUILD_EXAMPLES", truthifier(self.with_examples)) + yield ("ARROW_BUILD_INTEGRATION", truthifier(self.with_integration)) + + # Static checks + yield ("ARROW_USE_ASAN", truthifier(self.use_asan)) + yield ("ARROW_USE_TSAN", truthifier(self.use_tsan)) + yield ("ARROW_USE_UBSAN", truthifier(self.use_ubsan)) + yield ("ARROW_FUZZING", truthifier(self.with_fuzzing)) + + # Components + yield ("ARROW_COMPUTE", truthifier(self.with_compute)) + yield ("ARROW_CSV", truthifier(self.with_csv)) + yield ("ARROW_CUDA", truthifier(self.with_cuda)) + yield ("ARROW_DATASET", truthifier(self.with_dataset)) + yield ("ARROW_FILESYSTEM", truthifier(self.with_filesystem)) + yield ("ARROW_FLIGHT", truthifier(self.with_flight)) + yield ("ARROW_GANDIVA", truthifier(self.with_gandiva)) + yield ("ARROW_PARQUET", truthifier(self.with_parquet)) + yield ("ARROW_HDFS", truthifier(self.with_hdfs)) + yield ("ARROW_HIVESERVER2", truthifier(self.with_hiveserver2)) + yield ("ARROW_IPC", truthifier(self.with_ipc)) + yield ("ARROW_JSON", truthifier(self.with_json)) + yield ("ARROW_JNI", truthifier(self.with_jni)) + yield ("ARROW_MIMALLOC", truthifier(self.with_mimalloc)) + yield ("ARROW_PLASMA", truthifier(self.with_plasma)) + yield ("ARROW_PYTHON", truthifier(self.with_python)) + yield ("ARROW_S3", truthifier(self.with_s3)) + + # Compressions + yield ("ARROW_WITH_BROTLI", truthifier(self.with_brotli)) + yield ("ARROW_WITH_BZ2", truthifier(self.with_bz2)) + yield ("ARROW_WITH_LZ4", truthifier(self.with_lz4)) + yield ("ARROW_WITH_SNAPPY", truthifier(self.with_snappy)) + yield ("ARROW_WITH_ZLIB", truthifier(self.with_zlib)) + yield ("ARROW_WITH_ZSTD", truthifier(self.with_zstd)) + + yield ("ARROW_LINT_ONLY", truthifier(self.with_lint_only)) + + # Some configurations don't like gnu gold linker. + broken_with_gold_ld = [self.with_fuzzing, self.with_gandiva] + if self.use_gold_linker and not any(broken_with_gold_ld): + yield ("ARROW_USE_LD_GOLD", truthifier(self.use_gold_linker)) + yield ("ARROW_SIMD_LEVEL", or_else(self.simd_level, "SSE4_2")) + + # Detect custom conda toolchain + if self.use_conda: + for d, v in [('CMAKE_AR', 'AR'), ('CMAKE_RANLIB', 'RANLIB')]: + v = os.environ.get(v) + if v: + yield (d, v) + + @property + def install_prefix(self): + if self._install_prefix: + return self._install_prefix + + if self.use_conda: + return os.environ.get("CONDA_PREFIX") + + return None + + @property + def use_conda(self): + # If the user didn't specify a preference, guess via environment + if self._use_conda is None: + return os.environ.get("CONDA_PREFIX") is not None + + return self._use_conda + + @property + def definitions(self): + extras = list(self.cmake_extras) if self.cmake_extras else [] + definitions = ["-D{}={}".format(d[0], d[1]) for d in self._gen_defs()] + return definitions + extras + + @property + def environment(self): + env = os.environ.copy() + + if self.cc: + env["CC"] = self.cc + + if self.cxx: + env["CXX"] = self.cxx + + return env + + +class CppCMakeDefinition(CMakeDefinition): + def __init__(self, source, conf, **kwargs): + self.configuration = conf + super().__init__(source, **kwargs, + definitions=conf.definitions, env=conf.environment, + build_type=conf.build_type) diff --git a/src/arrow/dev/archery/archery/lang/java.py b/src/arrow/dev/archery/archery/lang/java.py new file mode 100644 index 000000000..bc169adf6 --- /dev/null +++ b/src/arrow/dev/archery/archery/lang/java.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from ..utils.command import Command, CommandStackMixin, default_bin +from ..utils.maven import MavenDefinition + + +class Java(Command): + def __init__(self, java_bin=None): + self.bin = default_bin(java_bin, "java") + + +class Jar(CommandStackMixin, Java): + def __init__(self, jar, *args, **kwargs): + self.jar = jar + self.argv = ("-jar", jar) + Java.__init__(self, *args, **kwargs) + + +class JavaConfiguration: + def __init__(self, + + # toolchain + java_home=None, java_options=None, + # build & benchmark + build_extras=None, benchmark_extras=None): + self.java_home = java_home + self.java_options = java_options + + self.build_extras = list(build_extras) if build_extras else [] + self.benchmark_extras = list( + benchmark_extras) if benchmark_extras else [] + + @property + def build_definitions(self): + return self.build_extras + + @property + def benchmark_definitions(self): + return self.benchmark_extras + + @property + def environment(self): + env = os.environ.copy() + + if self.java_home: + env["JAVA_HOME"] = self.java_home + + if self.java_options: + env["JAVA_OPTIONS"] = self.java_options + + return env + + +class JavaMavenDefinition(MavenDefinition): + def __init__(self, source, conf, **kwargs): + self.configuration = conf + super().__init__(source, **kwargs, + build_definitions=conf.build_definitions, + benchmark_definitions=conf.benchmark_definitions, + env=conf.environment) diff --git a/src/arrow/dev/archery/archery/lang/python.py b/src/arrow/dev/archery/archery/lang/python.py new file mode 100644 index 000000000..c6ebbe650 --- /dev/null +++ b/src/arrow/dev/archery/archery/lang/python.py @@ -0,0 +1,223 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import inspect +import tokenize +from contextlib import contextmanager + +try: + from numpydoc.validate import Docstring, validate +except ImportError: + have_numpydoc = False +else: + have_numpydoc = True + +from ..utils.logger import logger +from ..utils.command import Command, capture_stdout, default_bin + + +class Flake8(Command): + def __init__(self, flake8_bin=None): + self.bin = default_bin(flake8_bin, "flake8") + + +class Autopep8(Command): + def __init__(self, autopep8_bin=None): + self.bin = default_bin(autopep8_bin, "autopep8") + + @capture_stdout() + def run_captured(self, *args, **kwargs): + return self.run(*args, **kwargs) + + +def _tokenize_signature(s): + lines = s.encode('ascii').splitlines() + generator = iter(lines).__next__ + return tokenize.tokenize(generator) + + +def _convert_typehint(tokens): + names = [] + opening_bracket_reached = False + for token in tokens: + # omit the tokens before the opening bracket + if not opening_bracket_reached: + if token.string == '(': + opening_bracket_reached = True + else: + continue + + if token.type == 1: # type 1 means NAME token + names.append(token) + else: + if len(names) == 1: + yield (names[0].type, names[0].string) + elif len(names) == 2: + # two "NAME" tokens follow each other which means a cython + # typehint like `bool argument`, so remove the typehint + # note that we could convert it to python typehints, but hints + # are not supported by _signature_fromstr + yield (names[1].type, names[1].string) + elif len(names) > 2: + raise ValueError('More than two NAME tokens follow each other') + names = [] + yield (token.type, token.string) + + +def inspect_signature(obj): + """ + Custom signature inspection primarily for cython generated callables. + + Cython puts the signatures to the first line of the docstrings, which we + can reuse to parse the python signature from, but some gymnastics are + required, like removing the cython typehints. + + It converts the cython signature: + array(obj, type=None, mask=None, size=None, from_pandas=None, + bool safe=True, MemoryPool memory_pool=None) + To: + <Signature (obj, type=None, mask=None, size=None, from_pandas=None, + safe=True, memory_pool=None)> + """ + cython_signature = obj.__doc__.splitlines()[0] + cython_tokens = _tokenize_signature(cython_signature) + python_tokens = _convert_typehint(cython_tokens) + python_signature = tokenize.untokenize(python_tokens) + return inspect._signature_fromstr(inspect.Signature, obj, python_signature) + + +class NumpyDoc: + + def __init__(self, symbols=None): + if not have_numpydoc: + raise RuntimeError( + 'Numpydoc is not available, install the development version ' + 'with command: pip install numpydoc==1.1.0' + ) + self.symbols = set(symbols or {'pyarrow'}) + + def traverse(self, fn, obj, from_package): + """Apply a function on publicly exposed API components. + + Recursively iterates over the members of the passed object. It omits + any '_' prefixed and thirdparty (non pyarrow) symbols. + + Parameters + ---------- + obj : Any + from_package : string, default 'pyarrow' + Predicate to only consider objects from this package. + """ + todo = [obj] + seen = set() + + while todo: + obj = todo.pop() + if obj in seen: + continue + else: + seen.add(obj) + + fn(obj) + + for name in dir(obj): + if name.startswith('_'): + continue + + member = getattr(obj, name) + module = getattr(member, '__module__', None) + if not (module and module.startswith(from_package)): + continue + + todo.append(member) + + @contextmanager + def _apply_patches(self): + """ + Patch Docstring class to bypass loading already loaded python objects. + """ + orig_load_obj = Docstring._load_obj + orig_signature = inspect.signature + + @staticmethod + def _load_obj(obj): + # By default it expects a qualname and import the object, but we + # have already loaded object after the API traversal. + if isinstance(obj, str): + return orig_load_obj(obj) + else: + return obj + + def signature(obj): + # inspect.signature tries to parse __text_signature__ if other + # properties like __signature__ doesn't exists, but cython + # doesn't set that property despite that embedsignature cython + # directive is set. The only way to inspect a cython compiled + # callable's signature to parse it from __doc__ while + # embedsignature directive is set during the build phase. + # So path inspect.signature function to attempt to parse the first + # line of callable.__doc__ as a signature. + try: + return orig_signature(obj) + except Exception as orig_error: + try: + return inspect_signature(obj) + except Exception: + raise orig_error + + try: + Docstring._load_obj = _load_obj + inspect.signature = signature + yield + finally: + Docstring._load_obj = orig_load_obj + inspect.signature = orig_signature + + def validate(self, from_package='', allow_rules=None, + disallow_rules=None): + results = [] + + def callback(obj): + try: + result = validate(obj) + except OSError as e: + symbol = f"{obj.__module__}.{obj.__name__}" + logger.warning(f"Unable to validate `{symbol}` due to `{e}`") + return + + errors = [] + for errcode, errmsg in result.get('errors', []): + if allow_rules and errcode not in allow_rules: + continue + if disallow_rules and errcode in disallow_rules: + continue + errors.append((errcode, errmsg)) + + if len(errors): + result['errors'] = errors + results.append((obj, result)) + + with self._apply_patches(): + for symbol in self.symbols: + try: + obj = Docstring._load_obj(symbol) + except (ImportError, AttributeError): + print('{} is not available for import'.format(symbol)) + else: + self.traverse(callback, obj, from_package=from_package) + + return results diff --git a/src/arrow/dev/archery/archery/linking.py b/src/arrow/dev/archery/archery/linking.py new file mode 100644 index 000000000..c2e6f1772 --- /dev/null +++ b/src/arrow/dev/archery/archery/linking.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import platform +import subprocess + +from .utils.command import Command + + +_ldd = Command("ldd") +_otool = Command("otool") + + +class DependencyError(Exception): + pass + + +class DynamicLibrary: + + def __init__(self, path): + self.path = path + + def list_dependencies(self): + """ + List the full name of the library dependencies. + """ + system = platform.system() + if system == "Linux": + result = _ldd.run(self.path, stdout=subprocess.PIPE) + lines = result.stdout.splitlines() + return [ll.split(None, 1)[0].decode() for ll in lines] + elif system == "Darwin": + result = _otool.run("-L", self.path, stdout=subprocess.PIPE) + lines = result.stdout.splitlines() + return [dl.split(None, 1)[0].decode() for dl in lines] + else: + raise ValueError(f"{platform} is not supported") + + def list_dependency_names(self): + """ + List the truncated names of the dynamic library dependencies. + """ + names = [] + for dependency in self.list_dependencies(): + *_, library = dependency.rsplit("/", 1) + name, *_ = library.split(".", 1) + names.append(name) + return names + + +def check_dynamic_library_dependencies(path, allowed, disallowed): + dylib = DynamicLibrary(path) + for dep in dylib.list_dependency_names(): + if allowed and dep not in allowed: + raise DependencyError( + f"Unexpected shared dependency found in {dylib.path}: `{dep}`" + ) + if disallowed and dep in disallowed: + raise DependencyError( + f"Disallowed shared dependency found in {dylib.path}: `{dep}`" + ) diff --git a/src/arrow/dev/archery/archery/release.py b/src/arrow/dev/archery/archery/release.py new file mode 100644 index 000000000..6baeabc9d --- /dev/null +++ b/src/arrow/dev/archery/archery/release.py @@ -0,0 +1,535 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import defaultdict +import functools +import os +import re +import pathlib +import shelve +import warnings + +from git import Repo +from jira import JIRA +from semver import VersionInfo as SemVer + +from .utils.source import ArrowSources +from .utils.report import JinjaReport + + +def cached_property(fn): + return property(functools.lru_cache(maxsize=1)(fn)) + + +class Version(SemVer): + + __slots__ = ('released', 'release_date') + + def __init__(self, released=False, release_date=None, **kwargs): + super().__init__(**kwargs) + self.released = released + self.release_date = release_date + + @classmethod + def parse(cls, version, **kwargs): + return cls(**SemVer.parse(version).to_dict(), **kwargs) + + @classmethod + def from_jira(cls, jira_version): + return cls.parse( + jira_version.name, + released=jira_version.released, + release_date=getattr(jira_version, 'releaseDate', None) + ) + + +class Issue: + + def __init__(self, key, type, summary): + self.key = key + self.type = type + self.summary = summary + + @classmethod + def from_jira(cls, jira_issue): + return cls( + key=jira_issue.key, + type=jira_issue.fields.issuetype.name, + summary=jira_issue.fields.summary + ) + + @property + def project(self): + return self.key.split('-')[0] + + @property + def number(self): + return int(self.key.split('-')[1]) + + +class Jira(JIRA): + + def __init__(self, user=None, password=None, + url='https://issues.apache.org/jira'): + user = user or os.environ.get('APACHE_JIRA_USER') + password = password or os.environ.get('APACHE_JIRA_PASSWORD') + super().__init__(url, basic_auth=(user, password)) + + def project_version(self, version_string, project='ARROW'): + # query version from jira to populated with additional metadata + versions = {str(v): v for v in self.project_versions(project)} + return versions[version_string] + + def project_versions(self, project): + versions = [] + for v in super().project_versions(project): + try: + versions.append(Version.from_jira(v)) + except ValueError: + # ignore invalid semantic versions like JS-0.4.0 + continue + return sorted(versions, reverse=True) + + def issue(self, key): + return Issue.from_jira(super().issue(key)) + + def project_issues(self, version, project='ARROW'): + query = "project={} AND fixVersion={}".format(project, version) + issues = super().search_issues(query, maxResults=False) + return list(map(Issue.from_jira, issues)) + + +class CachedJira: + + def __init__(self, cache_path, jira=None): + self.jira = jira or Jira() + self.cache_path = cache_path + + def __getattr__(self, name): + attr = getattr(self.jira, name) + return self._cached(name, attr) if callable(attr) else attr + + def _cached(self, name, method): + def wrapper(*args, **kwargs): + key = str((name, args, kwargs)) + with shelve.open(self.cache_path) as cache: + try: + result = cache[key] + except KeyError: + cache[key] = result = method(*args, **kwargs) + return result + return wrapper + + +_TITLE_REGEX = re.compile( + r"(?P<issue>(?P<project>(ARROW|PARQUET))\-\d+)?\s*:?\s*" + r"(?P<components>\[.*\])?\s*(?P<summary>.*)" +) +_COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]") + + +class CommitTitle: + + def __init__(self, summary, project=None, issue=None, components=None): + self.project = project + self.issue = issue + self.components = components or [] + self.summary = summary + + def __str__(self): + out = "" + if self.issue: + out += "{}: ".format(self.issue) + if self.components: + for component in self.components: + out += "[{}]".format(component) + out += " " + out += self.summary + return out + + def __eq__(self, other): + return ( + self.summary == other.summary and + self.project == other.project and + self.issue == other.issue and + self.components == other.components + ) + + def __hash__(self): + return hash( + (self.summary, self.project, self.issue, tuple(self.components)) + ) + + @classmethod + def parse(cls, headline): + matches = _TITLE_REGEX.match(headline) + if matches is None: + warnings.warn( + "Unable to parse commit message `{}`".format(headline) + ) + return CommitTitle(headline) + + values = matches.groupdict() + components = values.get('components') or '' + components = _COMPONENT_REGEX.findall(components) + + return CommitTitle( + values['summary'], + project=values.get('project'), + issue=values.get('issue'), + components=components + ) + + +class Commit: + + def __init__(self, wrapped): + self._title = CommitTitle.parse(wrapped.summary) + self._wrapped = wrapped + + def __getattr__(self, attr): + if hasattr(self._title, attr): + return getattr(self._title, attr) + else: + return getattr(self._wrapped, attr) + + def __repr__(self): + template = '<Commit sha={!r} issue={!r} components={!r} summary={!r}>' + return template.format(self.hexsha, self.issue, self.components, + self.summary) + + @property + def url(self): + return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha) + + @property + def title(self): + return self._title + + +class ReleaseCuration(JinjaReport): + templates = { + 'console': 'release_curation.txt.j2' + } + fields = [ + 'release', + 'within', + 'outside', + 'nojira', + 'parquet', + 'nopatch' + ] + + +class JiraChangelog(JinjaReport): + templates = { + 'markdown': 'release_changelog.md.j2', + 'html': 'release_changelog.html.j2' + } + fields = [ + 'release', + 'categories' + ] + + +class Release: + + def __init__(self): + raise TypeError("Do not initialize Release class directly, use " + "Release.from_jira(version) instead.") + + def __repr__(self): + if self.version.released: + status = "released_at={!r}".format(self.version.release_date) + else: + status = "pending" + return "<{} {!r} {}>".format(self.__class__.__name__, + str(self.version), status) + + @staticmethod + def from_jira(version, jira=None, repo=None): + if jira is None: + jira = Jira() + elif isinstance(jira, str): + jira = Jira(jira) + elif not isinstance(jira, (Jira, CachedJira)): + raise TypeError("`jira` argument must be a server url or a valid " + "Jira instance") + + if repo is None: + arrow = ArrowSources.find() + repo = Repo(arrow.path) + elif isinstance(repo, (str, pathlib.Path)): + repo = Repo(repo) + elif not isinstance(repo, Repo): + raise TypeError("`repo` argument must be a path or a valid Repo " + "instance") + + if isinstance(version, str): + version = jira.project_version(version, project='ARROW') + elif not isinstance(version, Version): + raise TypeError(version) + + # decide the type of the release based on the version number + if version.patch == 0: + if version.minor == 0: + klass = MajorRelease + elif version.major == 0: + # handle minor releases before 1.0 as major releases + klass = MajorRelease + else: + klass = MinorRelease + else: + klass = PatchRelease + + # prevent instantiating release object directly + obj = klass.__new__(klass) + obj.version = version + obj.jira = jira + obj.repo = repo + + return obj + + @property + def is_released(self): + return self.version.released + + @property + def tag(self): + return "apache-arrow-{}".format(str(self.version)) + + @property + def branch(self): + raise NotImplementedError() + + @property + def siblings(self): + """ + Releases to consider when calculating previous and next releases. + """ + raise NotImplementedError() + + @cached_property + def previous(self): + # select all non-patch releases + position = self.siblings.index(self.version) + try: + previous = self.siblings[position + 1] + except IndexError: + # first release doesn't have a previous one + return None + else: + return Release.from_jira(previous, jira=self.jira, repo=self.repo) + + @cached_property + def next(self): + # select all non-patch releases + position = self.siblings.index(self.version) + if position <= 0: + raise ValueError("There is no upcoming release set in JIRA after " + "version {}".format(self.version)) + upcoming = self.siblings[position - 1] + return Release.from_jira(upcoming, jira=self.jira, repo=self.repo) + + @cached_property + def issues(self): + issues = self.jira.project_issues(self.version, project='ARROW') + return {i.key: i for i in issues} + + @cached_property + def commits(self): + """ + All commits applied between two versions. + """ + if self.previous is None: + # first release + lower = '' + else: + lower = self.repo.tags[self.previous.tag] + + if self.version.released: + upper = self.repo.tags[self.tag] + else: + try: + upper = self.repo.branches[self.branch] + except IndexError: + warnings.warn("Release branch `{}` doesn't exist." + .format(self.branch)) + return [] + + commit_range = "{}..{}".format(lower, upper) + return list(map(Commit, self.repo.iter_commits(commit_range))) + + def curate(self): + # handle commits with parquet issue key specially and query them from + # jira and add it to the issues + release_issues = self.issues + + within, outside, nojira, parquet = [], [], [], [] + for c in self.commits: + if c.issue is None: + nojira.append(c) + elif c.issue in release_issues: + within.append((release_issues[c.issue], c)) + elif c.project == 'PARQUET': + parquet.append((self.jira.issue(c.issue), c)) + else: + outside.append((self.jira.issue(c.issue), c)) + + # remaining jira tickets + within_keys = {i.key for i, c in within} + nopatch = [issue for key, issue in release_issues.items() + if key not in within_keys] + + return ReleaseCuration(release=self, within=within, outside=outside, + nojira=nojira, parquet=parquet, nopatch=nopatch) + + def changelog(self): + release_issues = [] + + # get organized report for the release + curation = self.curate() + + # jira tickets having patches in the release + for issue, _ in curation.within: + release_issues.append(issue) + + # jira tickets without patches + for issue in curation.nopatch: + release_issues.append(issue) + + # parquet patches in the release + for issue, _ in curation.parquet: + release_issues.append(issue) + + # organize issues into categories + issue_types = { + 'Bug': 'Bug Fixes', + 'Improvement': 'New Features and Improvements', + 'New Feature': 'New Features and Improvements', + 'Sub-task': 'New Features and Improvements', + 'Task': 'New Features and Improvements', + 'Test': 'Bug Fixes', + 'Wish': 'New Features and Improvements', + } + categories = defaultdict(list) + for issue in release_issues: + categories[issue_types[issue.type]].append(issue) + + # sort issues by the issue key in ascending order + for name, issues in categories.items(): + issues.sort(key=lambda issue: (issue.project, issue.number)) + + return JiraChangelog(release=self, categories=categories) + + +class MaintenanceMixin: + """ + Utility methods for cherry-picking commits from the main branch. + """ + + def commits_to_pick(self, exclude_already_applied=True): + # collect commits applied on the main branch since the root of the + # maintenance branch (the previous major release) + if self.version.major == 0: + # treat minor releases as major releases preceeding 1.0.0 release + commit_range = "apache-arrow-0.{}.0..master".format( + self.version.minor + ) + else: + commit_range = "apache-arrow-{}.0.0..master".format( + self.version.major + ) + + # keeping the original order of the commits helps to minimize the merge + # conflicts during cherry-picks + commits = map(Commit, self.repo.iter_commits(commit_range)) + + # exclude patches that have been already applied to the maintenance + # branch, we cannot identify patches based on sha because it changes + # after the cherry pick so use commit title instead + if exclude_already_applied: + already_applied = {c.title for c in self.commits} + else: + already_applied = set() + + # iterate over the commits applied on the main branch and filter out + # the ones that are included in the jira release + patches_to_pick = [c for c in commits if + c.issue in self.issues and + c.title not in already_applied] + + return reversed(patches_to_pick) + + def cherry_pick_commits(self, recreate_branch=True): + if recreate_branch: + # delete, create and checkout the maintenance branch based off of + # the previous tag + if self.branch in self.repo.branches: + self.repo.git.branch('-D', self.branch) + self.repo.git.checkout(self.previous.tag, b=self.branch) + else: + # just checkout the already existing maintenance branch + self.repo.git.checkout(self.branch) + + # cherry pick the commits based on the jira tickets + for commit in self.commits_to_pick(): + self.repo.git.cherry_pick(commit.hexsha) + + +class MajorRelease(Release): + + @property + def branch(self): + return "master" + + @cached_property + def siblings(self): + """ + Filter only the major releases. + """ + # handle minor releases before 1.0 as major releases + return [v for v in self.jira.project_versions('ARROW') + if v.patch == 0 and (v.major == 0 or v.minor == 0)] + + +class MinorRelease(Release, MaintenanceMixin): + + @property + def branch(self): + return "maint-{}.x.x".format(self.version.major) + + @cached_property + def siblings(self): + """ + Filter the major and minor releases. + """ + return [v for v in self.jira.project_versions('ARROW') if v.patch == 0] + + +class PatchRelease(Release, MaintenanceMixin): + + @property + def branch(self): + return "maint-{}.{}.x".format(self.version.major, self.version.minor) + + @cached_property + def siblings(self): + """ + No filtering, consider all releases. + """ + return self.jira.project_versions('ARROW') diff --git a/src/arrow/dev/archery/archery/templates/release_changelog.md.j2 b/src/arrow/dev/archery/archery/templates/release_changelog.md.j2 new file mode 100644 index 000000000..c0406ddf4 --- /dev/null +++ b/src/arrow/dev/archery/archery/templates/release_changelog.md.j2 @@ -0,0 +1,29 @@ +{# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#} +# Apache Arrow {{ release.version }} ({{ release.version.release_date or today() }}) + +{% for category, issues in categories.items() -%} + +## {{ category }} + +{% for issue in issues -%} +* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) - {{ issue.summary | md }} +{% endfor %} + +{% endfor %} diff --git a/src/arrow/dev/archery/archery/templates/release_curation.txt.j2 b/src/arrow/dev/archery/archery/templates/release_curation.txt.j2 new file mode 100644 index 000000000..a5d11e9d4 --- /dev/null +++ b/src/arrow/dev/archery/archery/templates/release_curation.txt.j2 @@ -0,0 +1,41 @@ +{# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#} +Total number of JIRA tickets assigned to version {{ release.version }}: {{ release.issues|length }} + +Total number of applied patches since version {{ release.previous.version }}: {{ release.commits|length }} + +Patches with assigned issue in version {{ release.version }}: +{% for issue, commit in within -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +Patches with assigned issue outside of version {{ release.version }}: +{% for issue, commit in outside -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +Patches in version {{ release.version }} without a linked issue: +{% for commit in nojira -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +JIRA issues in version {{ release.version }} without a linked patch: +{% for issue in nopatch -%} + - https://issues.apache.org/jira/browse/{{ issue.key }} +{% endfor %} diff --git a/src/arrow/dev/archery/archery/testing.py b/src/arrow/dev/archery/archery/testing.py new file mode 100644 index 000000000..471a54d4c --- /dev/null +++ b/src/arrow/dev/archery/archery/testing.py @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from contextlib import contextmanager +import os +from unittest import mock +import re + + +class DotDict(dict): + + def __getattr__(self, key): + try: + item = self[key] + except KeyError: + raise AttributeError(key) + if isinstance(item, dict): + return DotDict(item) + else: + return item + + +class PartialEnv(dict): + + def __eq__(self, other): + return self.items() <= other.items() + + +_mock_call_type = type(mock.call()) + + +def _ensure_mock_call_object(obj, **kwargs): + if isinstance(obj, _mock_call_type): + return obj + elif isinstance(obj, str): + cmd = re.split(r"\s+", obj) + return mock.call(cmd, **kwargs) + elif isinstance(obj, list): + return mock.call(obj, **kwargs) + else: + raise TypeError(obj) + + +class SuccessfulSubprocessResult: + + def check_returncode(self): + return + + +@contextmanager +def assert_subprocess_calls(expected_commands_or_calls, **kwargs): + calls = [ + _ensure_mock_call_object(obj, **kwargs) + for obj in expected_commands_or_calls + ] + with mock.patch('subprocess.run', autospec=True) as run: + run.return_value = SuccessfulSubprocessResult() + yield run + run.assert_has_calls(calls) + + +@contextmanager +def override_env(mapping): + original = os.environ + try: + os.environ = dict(os.environ, **mapping) + yield os.environ + finally: + os.environ = original diff --git a/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl new file mode 100644 index 000000000..5854eb75c --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff-empty-lines.jsonl @@ -0,0 +1,6 @@ +{"benchmark": "RegressionSumKernel/32768/10", "change": 0.0046756468886368545, "regression": false, "baseline": 13265442258.099466, "contender": 13327466781.91994, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"} +{"benchmark": "RegressionSumKernel/32768/1", "change": 0.0025108399115900733, "regression": false, "baseline": 15181891659.539782, "contender": 15220010959.05199, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"} + +{"benchmark": "RegressionSumKernel/32768/50", "change": 0.00346735806287155, "regression": false, "baseline": 11471825667.817123, "contender": 11511602595.042286, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"} + +{"benchmark": "RegressionSumKernel/32768/0", "change": 0.010140954727954987, "regression": false, "baseline": 18316987019.994465, "contender": 18502738756.116768, "unit": "bytes_per_second", "less_is_better": false, "suite": "arrow-compute-aggregate-benchmark"} diff --git a/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl new file mode 100644 index 000000000..1e25810d7 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/archery-benchmark-diff.jsonl @@ -0,0 +1,4 @@ +{"benchmark":"RegressionSumKernel/32768/50","change":-0.001550846227215492,"regression":false,"baseline":19241207435.428757,"contender":19211367281.47045,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"} +{"benchmark":"RegressionSumKernel/32768/1","change":0.0020681767923465765,"regression":true,"baseline":24823170673.777943,"contender":24771831968.277977,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"} +{"benchmark":"RegressionSumKernel/32768/10","change":0.0033323376378746905,"regression":false,"baseline":21902707565.968014,"contender":21975694782.76145,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"} +{"benchmark":"RegressionSumKernel/32768/0","change":-0.004918126090954414,"regression":true,"baseline":27685006611.446762,"contender":27821164964.790764,"unit":"bytes_per_second","less_is_better":false,"suite":"arrow-compute-aggregate-benchmark"} diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json new file mode 100644 index 000000000..d591105f0 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-build-command.json @@ -0,0 +1,212 @@ +{ + "action": "created", + "comment": { + "author_association": "MEMBER", + "body": "@ursabot build", + "created_at": "2019-04-05T11:55:43Z", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726", + "id": 480248726, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==", + "updated_at": "2019-04-05T11:55:43Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "", + "closed_at": null, + "comments": 3, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "created_at": "2019-04-05T11:22:15Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "pull_request": { + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Unittests for GithubHook", + "updated_at": "2019-04-05T11:55:43Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json new file mode 100644 index 000000000..5a8f3461c --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-non-authorized-user.json @@ -0,0 +1,212 @@ +{ + "action": "created", + "comment": { + "author_association": "NONE", + "body": "Unknown command \"\"", + "created_at": "2019-04-05T11:35:47Z", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815", + "id": 480243815, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==", + "updated_at": "2019-04-05T11:35:47Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815", + "user": { + "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4", + "events_url": "https://api.github.com/users/ursabot/events{/privacy}", + "followers_url": "https://api.github.com/users/ursabot/followers", + "following_url": "https://api.github.com/users/ursabot/following{/other_user}", + "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursabot", + "id": 49275095, + "login": "someone", + "node_id": "MDQ6VXNlcjQ5Mjc1MDk1", + "organizations_url": "https://api.github.com/users/ursabot/orgs", + "received_events_url": "https://api.github.com/users/ursabot/received_events", + "repos_url": "https://api.github.com/users/ursabot/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions", + "type": "User", + "url": "https://api.github.com/users/ursabot" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "NONE", + "body": "", + "closed_at": null, + "comments": 2, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "created_at": "2019-04-05T11:22:15Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "pull_request": { + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Unittests for GithubHook", + "updated_at": "2019-04-05T11:35:47Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "someone", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4", + "events_url": "https://api.github.com/users/ursabot/events{/privacy}", + "followers_url": "https://api.github.com/users/ursabot/followers", + "following_url": "https://api.github.com/users/ursabot/following{/other_user}", + "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursabot", + "id": 49275095, + "login": "someone", + "node_id": "MDQ6VXNlcjQ5Mjc1MDk1", + "organizations_url": "https://api.github.com/users/ursabot/orgs", + "received_events_url": "https://api.github.com/users/ursabot/received_events", + "repos_url": "https://api.github.com/users/ursabot/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions", + "type": "User", + "url": "https://api.github.com/users/ursabot" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json new file mode 100644 index 000000000..bfb7210df --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-by-ursabot.json @@ -0,0 +1,212 @@ +{ + "action": "created", + "comment": { + "author_association": "NONE", + "body": "Unknown command \"\"", + "created_at": "2019-04-05T11:35:47Z", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815", + "id": 480243815, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==", + "updated_at": "2019-04-05T11:35:47Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815", + "user": { + "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4", + "events_url": "https://api.github.com/users/ursabot/events{/privacy}", + "followers_url": "https://api.github.com/users/ursabot/followers", + "following_url": "https://api.github.com/users/ursabot/following{/other_user}", + "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursabot", + "id": 49275095, + "login": "ursabot", + "node_id": "MDQ6VXNlcjQ5Mjc1MDk1", + "organizations_url": "https://api.github.com/users/ursabot/orgs", + "received_events_url": "https://api.github.com/users/ursabot/received_events", + "repos_url": "https://api.github.com/users/ursabot/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions", + "type": "User", + "url": "https://api.github.com/users/ursabot" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "", + "closed_at": null, + "comments": 2, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "created_at": "2019-04-05T11:22:15Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "pull_request": { + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Unittests for GithubHook", + "updated_at": "2019-04-05T11:35:47Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars2.githubusercontent.com/u/49275095?v=4", + "events_url": "https://api.github.com/users/ursabot/events{/privacy}", + "followers_url": "https://api.github.com/users/ursabot/followers", + "following_url": "https://api.github.com/users/ursabot/following{/other_user}", + "gists_url": "https://api.github.com/users/ursabot/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursabot", + "id": 49275095, + "login": "ursabot", + "node_id": "MDQ6VXNlcjQ5Mjc1MDk1", + "organizations_url": "https://api.github.com/users/ursabot/orgs", + "received_events_url": "https://api.github.com/users/ursabot/received_events", + "repos_url": "https://api.github.com/users/ursabot/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursabot/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursabot/subscriptions", + "type": "User", + "url": "https://api.github.com/users/ursabot" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json new file mode 100644 index 000000000..a3d450078 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-not-mentioning-ursabot.json @@ -0,0 +1,212 @@ +{ + "action": "created", + "comment": { + "author_association": "MEMBER", + "body": "bear is no game", + "created_at": "2019-04-05T11:26:56Z", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727", + "id": 480241727, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==", + "updated_at": "2019-04-05T11:26:56Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "", + "closed_at": null, + "comments": 0, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "created_at": "2019-04-05T11:22:15Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "pull_request": { + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Unittests for GithubHook", + "updated_at": "2019-04-05T11:26:56Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json new file mode 100644 index 000000000..c88197c8e --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-with-empty-command.json @@ -0,0 +1,217 @@ +{ + "action": "created", + "comment": { + "author_association": "MEMBER", + "body": "@ursabot ", + "body_html": "", + "body_text": "", + "created_at": "2019-04-05T11:35:46Z", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811", + "id": 480243811, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==", + "updated_at": "2019-04-05T11:35:46Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "", + "body_html": "", + "body_text": "", + "closed_at": null, + "closed_by": null, + "comments": 1, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "created_at": "2019-04-05T11:22:15Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "pull_request": { + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Unittests for GithubHook", + "updated_at": "2019-04-05T11:35:46Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json new file mode 100644 index 000000000..9e362fc0e --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-issue-comment-without-pull-request.json @@ -0,0 +1,206 @@ +{ + "action": "created", + "comment": { + "author_association": "MEMBER", + "body": "@ursabot build", + "created_at": "2019-04-05T13:07:57Z", + "html_url": "https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480268708", + "id": 480268708, + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19", + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI2ODcwOA==", + "updated_at": "2019-04-05T13:07:57Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480268708", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "issue": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "", + "closed_at": null, + "comments": 5, + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments", + "created_at": "2019-04-02T09:56:41Z", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/events", + "html_url": "https://github.com/ursa-labs/ursabot/issues/19", + "id": 428131685, + "labels": [], + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDU6SXNzdWU0MjgxMzE2ODU=", + "number": 19, + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "state": "open", + "title": "Build ursabot itself via ursabot", + "updated_at": "2019-04-05T13:07:57Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19", + "user": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } + }, + "organization": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "description": "Innovation lab for open source data science tools, powered by Apache Arrow", + "events_url": "https://api.github.com/orgs/ursa-labs/events", + "hooks_url": "https://api.github.com/orgs/ursa-labs/hooks", + "id": 46514972, + "issues_url": "https://api.github.com/orgs/ursa-labs/issues", + "login": "ursa-labs", + "members_url": "https://api.github.com/orgs/ursa-labs/members{/member}", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "public_members_url": "https://api.github.com/orgs/ursa-labs/public_members{/member}", + "repos_url": "https://api.github.com/orgs/ursa-labs/repos", + "url": "https://api.github.com/orgs/ursa-labs" + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T12:01:40Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 898, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json b/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json new file mode 100644 index 000000000..9cf5c0dda --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/event-pull-request-opened.json @@ -0,0 +1,445 @@ +{ + "action": "opened", + "number": 26, + "pull_request": { + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26", + "id": 267785552, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "number": 26, + "state": "open", + "locked": false, + "title": "Unittests for GithubHook", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "body": "", + "created_at": "2019-04-05T11:22:15Z", + "updated_at": "2019-04-05T12:01:40Z", + "closed_at": null, + "merged_at": null, + "merge_commit_sha": "cc5dc3606988b3824be54df779ed2028776113cb", + "assignee": null, + "assignees": [], + "requested_reviewers": [], + "requested_teams": [], + "labels": [], + "milestone": null, + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits", + "review_comments_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments", + "review_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d", + "head": { + "label": "ursa-labs:test-hook", + "ref": "test-hook", + "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d", + "user": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "repo": { + "id": 169101701, + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "name": "ursabot", + "full_name": "ursa-labs/ursabot", + "private": false, + "owner": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/ursa-labs/ursabot", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "created_at": "2019-02-04T15:40:31Z", + "updated_at": "2019-04-04T17:49:10Z", + "pushed_at": "2019-04-05T12:01:40Z", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "svn_url": "https://github.com/ursa-labs/ursabot", + "homepage": null, + "size": 898, + "stargazers_count": 1, + "watchers_count": 1, + "language": "Jupyter Notebook", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 19, + "license": null, + "forks": 0, + "open_issues": 19, + "watchers": 1, + "default_branch": "master" + } + }, + "base": { + "label": "ursa-labs:master", + "ref": "master", + "sha": "a162ad254b589b924db47e057791191b39613fd5", + "user": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "repo": { + "id": 169101701, + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "name": "ursabot", + "full_name": "ursa-labs/ursabot", + "private": false, + "owner": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/ursa-labs/ursabot", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "created_at": "2019-02-04T15:40:31Z", + "updated_at": "2019-04-04T17:49:10Z", + "pushed_at": "2019-04-05T12:01:40Z", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "svn_url": "https://github.com/ursa-labs/ursabot", + "homepage": null, + "size": 898, + "stargazers_count": 1, + "watchers_count": 1, + "language": "Jupyter Notebook", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 19, + "license": null, + "forks": 0, + "open_issues": 19, + "watchers": 1, + "default_branch": "master" + } + }, + "_links": { + "self": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "html": { + "href": "https://github.com/ursa-labs/ursabot/pull/26" + }, + "issue": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26" + }, + "comments": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments" + }, + "review_comments": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments" + }, + "review_comment": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}" + }, + "commits": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits" + }, + "statuses": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d" + } + }, + "author_association": "MEMBER", + "merged": false, + "mergeable": true, + "rebaseable": true, + "mergeable_state": "unstable", + "merged_by": null, + "comments": 5, + "review_comments": 0, + "maintainer_can_modify": false, + "commits": 2, + "additions": 1124, + "deletions": 0, + "changed_files": 7 + }, + "repository": { + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "archived": false, + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "created_at": "2019-02-04T15:40:31Z", + "default_branch": "master", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "description": null, + "disabled": false, + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "fork": false, + "forks": 0, + "forks_count": 0, + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "full_name": "ursa-labs/ursabot", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "has_downloads": true, + "has_issues": true, + "has_pages": false, + "has_projects": true, + "has_wiki": true, + "homepage": null, + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "html_url": "https://github.com/ursa-labs/ursabot", + "id": 169101701, + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "language": "Jupyter Notebook", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "license": null, + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "mirror_url": null, + "name": "ursabot", + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "open_issues": 19, + "open_issues_count": 19, + "owner": { + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/ursa-labs", + "id": 46514972, + "login": "ursa-labs", + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "type": "Organization", + "url": "https://api.github.com/users/ursa-labs" + }, + "private": false, + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "pushed_at": "2019-04-05T11:22:16Z", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "size": 892, + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "stargazers_count": 1, + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "svn_url": "https://github.com/ursa-labs/ursabot", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "updated_at": "2019-04-04T17:49:10Z", + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "watchers": 1, + "watchers_count": 1 + }, + "sender": { + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/kszucs", + "id": 961747, + "login": "kszucs", + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "repos_url": "https://api.github.com/users/kszucs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/kszucs" + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json new file mode 100644 index 000000000..1e4939776 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-19.json @@ -0,0 +1,64 @@ +{ + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19", + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/19/events", + "html_url": "https://github.com/ursa-labs/ursabot/issues/19", + "id": 428131685, + "node_id": "MDU6SXNzdWU0MjgxMzE2ODU=", + "number": 19, + "title": "Build ursabot itself via ursabot", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "labels": [], + "state": "closed", + "locked": false, + "assignee": null, + "assignees": [], + "milestone": null, + "comments": 8, + "created_at": "2019-04-02T09:56:41Z", + "updated_at": "2019-04-05T13:30:49Z", + "closed_at": "2019-04-05T13:30:49Z", + "author_association": "MEMBER", + "body": "", + "closed_by": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json new file mode 100644 index 000000000..44c4d3bed --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-26.json @@ -0,0 +1,70 @@ +{ + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "repository_url": "https://api.github.com/repos/ursa-labs/ursabot", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/events", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "id": 429706959, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "number": 26, + "title": "Unittests for GithubHook + native asyncio syntax", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "labels": [], + "state": "closed", + "locked": false, + "assignee": null, + "assignees": [], + "milestone": null, + "comments": 9, + "created_at": "2019-04-05T11:22:15Z", + "updated_at": "2019-08-28T00:34:19Z", + "closed_at": "2019-04-05T13:54:34Z", + "author_association": "MEMBER", + "pull_request": { + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch" + }, + "body": "Resolves:\r\n- #26 Unittests for GithubHook + native asyncio syntax\r\n- #27 Use native async/await keywords instead of @inlineCallbacks and yield\r\n", + "closed_by": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + } +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json new file mode 100644 index 000000000..93ee4b13c --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480243811.json @@ -0,0 +1,31 @@ +{ + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/479081273", + "html_url": "https://github.com/ursa-labs/ursabot/pull/21#issuecomment-479081273", + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/21", + "id": 480243811, + "node_id": "MDEyOklzc3VlQ29tbWVudDQ3OTA4MTI3Mw==", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "created_at": "2019-04-02T16:29:46Z", + "updated_at": "2019-04-02T16:29:46Z", + "author_association": "MEMBER", + "body": "@ursabot" +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json new file mode 100644 index 000000000..f3cd34083 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/issue-comment-480248726.json @@ -0,0 +1,31 @@ +{ + "url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726", + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "id": 480248726, + "node_id": "MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "created_at": "2019-04-05T11:55:43Z", + "updated_at": "2019-04-05T11:55:43Z", + "author_association": "MEMBER", + "body": "@ursabot build" +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json new file mode 100644 index 000000000..ffc48943a --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-commit.json @@ -0,0 +1,158 @@ +{ + "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d", + "node_id": "MDY6Q29tbWl0MTY5MTAxNzAxOjI3MDVkYTJiNjE2Yjk4ZmE2MDEwYTI1ODEzYzVhN2EyNzQ1NmY3MWQ=", + "commit": { + "author": { + "name": "Krisztián Szűcs", + "email": "szucs.krisztian@gmail.com", + "date": "2019-04-05T12:01:31Z" + }, + "committer": { + "name": "Krisztián Szűcs", + "email": "szucs.krisztian@gmail.com", + "date": "2019-04-05T12:01:31Z" + }, + "message": "add recorded event requests", + "tree": { + "sha": "16a7bb186833a67e9c2d84a58393503b85500ceb", + "url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees/16a7bb186833a67e9c2d84a58393503b85500ceb" + }, + "url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits/2705da2b616b98fa6010a25813c5a7a27456f71d", + "comment_count": 0, + "verification": { + "verified": true, + "reason": "valid", + "signature": "-----BEGIN PGP SIGNATURE-----\n\niQFOBAABCAA4FiEEOOW2r8dr6sA77zHlgjqBKYe1QKUFAlynQ58aHHN6dWNzLmty\naXN6dGlhbkBnbWFpbC5jb20ACgkQgjqBKYe1QKUYKwf6AiXDMaLqNLNSjRY7lIXX\nudioewz0hSb4bgIXBv30nswu9CoOA0+mHCokEVtZhYbXzXDsZ1KJrilSC4j+Ws4q\nkRGA6iEmrne2HcSKNZXzcVnwV9zpwKxlVh2QCTNb1PuOYFBLH0kwE704uWIWMGDN\nbo8cjQPwegePCRguCvPh/5wa5J3uiq5gmJLG6bC/d1XYE+FJVtlnyzqzLMIryGKe\ntIciw+wwkF413Q/YVbZ49vLUeCX9H8PHC4mZYGDWuvjFW1WTfkjK5bAH+oaTVM6h\n350I5ZFloHmMA/QeRge5qFxXoEBMDGiXHHktzYZDXnliFOQNxzqwirA5lQQ6LRSS\naQ==\n=7rqi\n-----END PGP SIGNATURE-----", + "payload": "tree 16a7bb186833a67e9c2d84a58393503b85500ceb\nparent 446ae69b9385e8d0f40aa9595f723d34383af2f7\nauthor Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\ncommitter Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\n\nadd recorded event requests\n" + } + }, + "url": "https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d", + "html_url": "https://github.com/ursa-labs/ursabot/commit/2705da2b616b98fa6010a25813c5a7a27456f71d", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d/comments", + "author": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "committer": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "parents": [ + { + "sha": "446ae69b9385e8d0f40aa9595f723d34383af2f7", + "url": "https://api.github.com/repos/ursa-labs/ursabot/commits/446ae69b9385e8d0f40aa9595f723d34383af2f7", + "html_url": "https://github.com/ursa-labs/ursabot/commit/446ae69b9385e8d0f40aa9595f723d34383af2f7" + } + ], + "stats": { + "total": 1062, + "additions": 1058, + "deletions": 4 + }, + "files": [ + { + "sha": "dfae6eeaef384ae6180c6302a58b49e39982dc33", + "filename": "ursabot/tests/fixtures/issue-comment-build-command.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"I've successfully started builds for this PR\",\n+ \"created_at\": \"2019-04-05T11:55:44Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248730\",\n+ \"id\": 480248730,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODczMA==\",\n+ \"updated_at\": \"2019-04-05T11:55:44Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248730\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 4,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:55:44Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}" + }, + { + "sha": "7ef554e333327f0e62aa1fd76b4b17844a39adeb", + "filename": "ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Unknown command \\\"\\\"\",\n+ \"created_at\": \"2019-04-05T11:35:47Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\",\n+ \"id\": 480243815,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 2,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}" + }, + { + "sha": "a8082dbc91fdfe815b795e49ec10e49000771ef5", + "filename": "ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"bear is no game\",\n+ \"created_at\": \"2019-04-05T11:26:56Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\",\n+ \"id\": 480241727,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 0,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "2770e29ba9086394455315e590c0b433d08e437e", + "filename": "ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot \",\n+ \"created_at\": \"2019-04-05T11:35:46Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\",\n+ \"id\": 480243811,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 1,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "80ff46510a2f39ae60f7c3a98e5fdaef8e688784", + "filename": "ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "status": "added", + "additions": 206, + "deletions": 0, + "changes": 206, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -0,0 +1,206 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Ursabot only listens to pull request comments!\",\n+ \"created_at\": \"2019-04-05T11:53:43Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480248217\",\n+ \"id\": 480248217,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODIxNw==\",\n+ \"updated_at\": \"2019-04-05T11:53:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248217\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 4,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\",\n+ \"created_at\": \"2019-04-02T09:56:41Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19\",\n+ \"id\": 428131685,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDU6SXNzdWU0MjgxMzE2ODU=\",\n+ \"number\": 19,\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Build ursabot itself via ursabot\",\n+ \"updated_at\": \"2019-04-05T11:53:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}" + }, + { + "sha": "c738bb0eb54c87ba0f23e97e827d77c2be74d0b6", + "filename": "ursabot/tests/test_hooks.py", + "status": "modified", + "additions": 4, + "deletions": 4, + "changes": 8, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=2705da2b616b98fa6010a25813c5a7a27456f71d", + "patch": "@@ -54,7 +54,7 @@ class TestGithubHook(ChangeHookTestCase):\n await self.request('ping', {})\n assert len(self.hook.master.data.updates.changesAdded) == 0\n \n- @ensure_deferred\n- async def test_issue_comment(self):\n- payload = {}\n- await self.request('issue_comment', payload)\n+ # @ensure_deferred\n+ # async def test_issue_comment(self):\n+ # payload = {}\n+ # await self.request('issue_comment', payload)" + } + ] +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json new file mode 100644 index 000000000..b039b3d10 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26-files.json @@ -0,0 +1,170 @@ +[ + { + "sha": "ebfe3f6c5e98723f9751c99ce8ce798f1ba529c5", + "filename": ".travis.yml", + "status": "modified", + "additions": 4, + "deletions": 1, + "changes": 5, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/.travis.yml", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/.travis.yml", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/.travis.yml?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -4,7 +4,10 @@ services:\n python:\n - 3.6\n script:\n- - pip install \"pytest>=3.9\" flake8 -e .\n+ # --no-binary buildbot is required because buildbot doesn't bundle its tests\n+ # to binary wheels, but ursabot's test suite depends on buildbot's so install\n+ # it from source\n+ - pip install --no-binary buildbot \"pytest>=3.9\" mock flake8 -e .\n \n # run linter\n - flake8 ursabot" + }, + { + "sha": "86ad809d3f74c175b92ac58c6c645b0fbf5fa2c5", + "filename": "setup.py", + "status": "modified", + "additions": 6, + "deletions": 1, + "changes": 7, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/setup.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/setup.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/setup.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -1,8 +1,13 @@\n #!/usr/bin/env python\n \n+import sys\n from setuptools import setup\n \n \n+if sys.version_info < (3, 6):\n+ sys.exit('Python < 3.6 is not supported due to missing asyncio support')\n+\n+\n # TODO(kszucs): add package data, change maintainer\n setup(\n name='ursabot',\n@@ -15,7 +20,7 @@\n setup_requires=['setuptools_scm'],\n install_requires=['click', 'dask', 'docker', 'docker-map', 'toolz',\n 'buildbot', 'treq'],\n- tests_require=['pytest>=3.9'],\n+ tests_require=['pytest>=3.9', 'mock'],\n entry_points='''\n [console_scripts]\n ursabot=ursabot.cli:ursabot" + }, + { + "sha": "c884f3f85bba499d77d9ad28bcd0ff5edf80f957", + "filename": "ursabot/factories.py", + "status": "modified", + "additions": 6, + "deletions": 2, + "changes": 8, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/factories.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/factories.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/factories.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -79,8 +79,12 @@ def prepend_step(self, step):\n repourl='https://github.com/ursa-labs/ursabot',\n mode='full'),\n ShellCommand(command=['ls', '-lah']),\n- ShellCommand(command=['pip', 'install', 'pytest', 'flake8']),\n- ShellCommand(command=['pip', 'install', '-e', '.']),\n+ ShellCommand(command=['pip', 'install', 'pytest', 'flake8', 'mock']),\n+ # --no-binary buildbot is required because buildbot doesn't bundle its\n+ # tests to binary wheels, but ursabot's test suite depends on buildbot's\n+ # so install it from source\n+ ShellCommand(command=['pip', 'install', '--no-binary', 'buildbot',\n+ '-e', '.']),\n ShellCommand(command=['flake8']),\n ShellCommand(command=['pytest', '-v', '-m', 'not docker', 'ursabot']),\n ShellCommand(command=['buildbot', 'checkconfig', '.'])" + }, + { + "sha": "0265cfbd9c2882f492469882a7bf513a1c1b5af4", + "filename": "ursabot/hooks.py", + "status": "modified", + "additions": 17, + "deletions": 19, + "changes": 36, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/hooks.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/hooks.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/hooks.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -1,11 +1,11 @@\n from urllib.parse import urlparse\n \n from twisted.python import log\n-from twisted.internet import defer\n \n from buildbot.www.hooks.github import GitHubEventHandler\n from buildbot.util.httpclientservice import HTTPClientService\n \n+from .utils import ensure_deferred\n \n BOTNAME = 'ursabot'\n \n@@ -22,20 +22,18 @@ def _client(self):\n self.master, self.github_api_endpoint, headers=headers,\n debug=self.debug, verify=self.verify)\n \n- @defer.inlineCallbacks\n- def _get(self, url):\n+ async def _get(self, url):\n url = urlparse(url)\n- client = yield self._client()\n- response = yield client.get(url.path)\n- result = yield response.json()\n+ client = await self._client()\n+ response = await client.get(url.path)\n+ result = await response.json()\n return result\n \n- @defer.inlineCallbacks\n- def _post(self, url, data):\n+ async def _post(self, url, data):\n url = urlparse(url)\n- client = yield self._client()\n- response = yield client.post(url.path, json=data)\n- result = yield response.json()\n+ client = await self._client()\n+ response = await client.post(url.path, json=data)\n+ result = await response.json()\n log.msg(f'POST to {url} with the following result: {result}')\n return result\n \n@@ -46,8 +44,8 @@ def _parse_command(self, message):\n return message.split(mention)[-1].lower().strip()\n return None\n \n- @defer.inlineCallbacks\n- def handle_issue_comment(self, payload, event):\n+ @ensure_deferred\n+ async def handle_issue_comment(self, payload, event):\n issue = payload['issue']\n comments_url = issue['comments_url']\n command = self._parse_command(payload['comment']['body'])\n@@ -64,16 +62,16 @@ def handle_issue_comment(self, payload, event):\n elif command == 'build':\n if 'pull_request' not in issue:\n message = 'Ursabot only listens to pull request comments!'\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return [], 'git'\n else:\n message = f'Unknown command \"{command}\"'\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return [], 'git'\n \n try:\n- pull_request = yield self._get(issue['pull_request']['url'])\n- changes, _ = yield self.handle_pull_request({\n+ pull_request = await self._get(issue['pull_request']['url'])\n+ changes, _ = await self.handle_pull_request({\n 'action': 'synchronize',\n 'sender': payload['sender'],\n 'repository': payload['repository'],\n@@ -82,11 +80,11 @@ def handle_issue_comment(self, payload, event):\n }, event)\n except Exception as e:\n message = \"I've failed to start builds for this PR\"\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n raise e\n else:\n message = \"I've successfully started builds for this PR\"\n- yield self._post(comments_url, {'body': message})\n+ await self._post(comments_url, {'body': message})\n return changes, 'git'\n \n # TODO(kszucs):" + }, + { + "sha": "1e1ecf2ce47da929dbf1b93632640e7e6ae1cfe0", + "filename": "ursabot/steps.py", + "status": "modified", + "additions": 13, + "deletions": 13, + "changes": 26, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/steps.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/steps.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/steps.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -1,9 +1,9 @@\n-from twisted.internet import defer\n-\n from buildbot.plugins import steps, util\n from buildbot.process import buildstep\n from buildbot.process.results import SUCCESS\n \n+from .utils import ensure_deferred\n+\n \n class ShellMixin(buildstep.ShellMixin):\n \"\"\"Run command in a login bash shell\n@@ -49,10 +49,10 @@ def __init__(self, **kwargs):\n kwargs = self.setupShellMixin(kwargs)\n super().__init__(**kwargs)\n \n- @defer.inlineCallbacks\n- def run(self):\n- cmd = yield self.makeRemoteShellCommand(command=self.command)\n- yield self.runCommand(cmd)\n+ @ensure_deferred\n+ async def run(self):\n+ cmd = await self.makeRemoteShellCommand(command=self.command)\n+ await self.runCommand(cmd)\n return cmd.results()\n \n \n@@ -71,8 +71,8 @@ class CMake(ShellMixin, steps.CMake):\n \n name = 'CMake'\n \n- @defer.inlineCallbacks\n- def run(self):\n+ @ensure_deferred\n+ async def run(self):\n \"\"\"Create and run CMake command\n \n Copied from the original CMake implementation to handle None values as\n@@ -94,8 +94,8 @@ def run(self):\n if self.options is not None:\n command.extend(self.options)\n \n- cmd = yield self.makeRemoteShellCommand(command=command)\n- yield self.runCommand(cmd)\n+ cmd = await self.makeRemoteShellCommand(command=command)\n+ await self.runCommand(cmd)\n \n return cmd.results()\n \n@@ -117,8 +117,8 @@ def __init__(self, variables, source='WorkerEnvironment', **kwargs):\n self.source = source\n super().__init__(**kwargs)\n \n- @defer.inlineCallbacks\n- def run(self):\n+ @ensure_deferred\n+ async def run(self):\n # on Windows, environment variables are case-insensitive, but we have\n # a case-sensitive dictionary in worker_environ. Fortunately, that\n # dictionary is also folded to uppercase, so we can simply fold the\n@@ -139,7 +139,7 @@ def run(self):\n # TODO(kszucs) try with self.setProperty similarly like in\n # SetProperties\n properties.setProperty(prop, value, self.source, runtime=True)\n- yield self.addCompleteLog('set-prop', f'{prop}: {value}')\n+ await self.addCompleteLog('set-prop', f'{prop}: {value}')\n \n return SUCCESS\n " + }, + { + "sha": "6a7d5308be6608f542a810d410f9240157a1340f", + "filename": "ursabot/tests/fixtures/issue-comment-build-command.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-build-command.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-build-command.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot build\",\n+ \"created_at\": \"2019-04-05T11:55:43Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248726\",\n+ \"id\": 480248726,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODcyNg==\",\n+ \"updated_at\": \"2019-04-05T11:55:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248726\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 3,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:55:43Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "7ef554e333327f0e62aa1fd76b4b17844a39adeb", + "filename": "ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-by-ursabot.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"NONE\",\n+ \"body\": \"Unknown command \\\"\\\"\",\n+ \"created_at\": \"2019-04-05T11:35:47Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\",\n+ \"id\": 480243815,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 2,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:47Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/49275095?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursabot/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursabot/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursabot/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursabot/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursabot\",\n+ \"id\": 49275095,\n+ \"login\": \"ursabot\",\n+ \"node_id\": \"MDQ6VXNlcjQ5Mjc1MDk1\",\n+ \"organizations_url\": \"https://api.github.com/users/ursabot/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursabot/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursabot/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursabot/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursabot/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/ursabot\"\n+ }\n+}" + }, + { + "sha": "a8082dbc91fdfe815b795e49ec10e49000771ef5", + "filename": "ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"bear is no game\",\n+ \"created_at\": \"2019-04-05T11:26:56Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\",\n+ \"id\": 480241727,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 0,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:26:56Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "2770e29ba9086394455315e590c0b433d08e437e", + "filename": "ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "status": "added", + "additions": 212, + "deletions": 0, + "changes": 212, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-with-empty-command.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,212 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot \",\n+ \"created_at\": \"2019-04-05T11:35:46Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\",\n+ \"id\": 480243811,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 1,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"id\": 429706959,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"number\": 26,\n+ \"pull_request\": {\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Unittests for GithubHook\",\n+ \"updated_at\": \"2019-04-05T11:35:46Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T11:22:16Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 892,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "b7de8d838332944101812ee2a46c08dd0144efe3", + "filename": "ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "status": "added", + "additions": 206, + "deletions": 0, + "changes": 206, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/issue-comment-without-pull-request.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,206 @@\n+{\n+ \"action\": \"created\",\n+ \"comment\": {\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"@ursabot build\",\n+ \"created_at\": \"2019-04-05T13:07:57Z\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480268708\",\n+ \"id\": 480268708,\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"node_id\": \"MDEyOklzc3VlQ29tbWVudDQ4MDI2ODcwOA==\",\n+ \"updated_at\": \"2019-04-05T13:07:57Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480268708\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"issue\": {\n+ \"assignee\": null,\n+ \"assignees\": [],\n+ \"author_association\": \"MEMBER\",\n+ \"body\": \"\",\n+ \"closed_at\": null,\n+ \"comments\": 5,\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\",\n+ \"created_at\": \"2019-04-02T09:56:41Z\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/issues/19\",\n+ \"id\": 428131685,\n+ \"labels\": [],\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\",\n+ \"locked\": false,\n+ \"milestone\": null,\n+ \"node_id\": \"MDU6SXNzdWU0MjgxMzE2ODU=\",\n+ \"number\": 19,\n+ \"repository_url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"state\": \"open\",\n+ \"title\": \"Build ursabot itself via ursabot\",\n+ \"updated_at\": \"2019-04-05T13:07:57Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/19\",\n+ \"user\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+ },\n+ \"organization\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"description\": \"Innovation lab for open source data science tools, powered by Apache Arrow\",\n+ \"events_url\": \"https://api.github.com/orgs/ursa-labs/events\",\n+ \"hooks_url\": \"https://api.github.com/orgs/ursa-labs/hooks\",\n+ \"id\": 46514972,\n+ \"issues_url\": \"https://api.github.com/orgs/ursa-labs/issues\",\n+ \"login\": \"ursa-labs\",\n+ \"members_url\": \"https://api.github.com/orgs/ursa-labs/members{/member}\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"public_members_url\": \"https://api.github.com/orgs/ursa-labs/public_members{/member}\",\n+ \"repos_url\": \"https://api.github.com/orgs/ursa-labs/repos\",\n+ \"url\": \"https://api.github.com/orgs/ursa-labs\"\n+ },\n+ \"repository\": {\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"archived\": false,\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"default_branch\": \"master\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"description\": null,\n+ \"disabled\": false,\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"fork\": false,\n+ \"forks\": 0,\n+ \"forks_count\": 0,\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"has_downloads\": true,\n+ \"has_issues\": true,\n+ \"has_pages\": false,\n+ \"has_projects\": true,\n+ \"has_wiki\": true,\n+ \"homepage\": null,\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"id\": 169101701,\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"language\": \"Jupyter Notebook\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"license\": null,\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"mirror_url\": null,\n+ \"name\": \"ursabot\",\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"open_issues\": 19,\n+ \"open_issues_count\": 19,\n+ \"owner\": {\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"id\": 46514972,\n+ \"login\": \"ursa-labs\",\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"type\": \"Organization\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\"\n+ },\n+ \"private\": false,\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"size\": 898,\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"stargazers_count\": 1,\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"watchers\": 1,\n+ \"watchers_count\": 1\n+ },\n+ \"sender\": {\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"gravatar_id\": \"\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"id\": 961747,\n+ \"login\": \"kszucs\",\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"site_admin\": false,\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"type\": \"User\",\n+ \"url\": \"https://api.github.com/users/kszucs\"\n+ }\n+}" + }, + { + "sha": "33e051455e866fb4774a16ae02ad40dcf9e6a7fd", + "filename": "ursabot/tests/fixtures/pull-request-26-commit.json", + "status": "added", + "additions": 158, + "deletions": 0, + "changes": 158, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26-commit.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26-commit.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/pull-request-26-commit.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,158 @@\n+{\n+ \"sha\": \"2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"node_id\": \"MDY6Q29tbWl0MTY5MTAxNzAxOjI3MDVkYTJiNjE2Yjk4ZmE2MDEwYTI1ODEzYzVhN2EyNzQ1NmY3MWQ=\",\n+ \"commit\": {\n+ \"author\": {\n+ \"name\": \"Krisztián Szűcs\",\n+ \"email\": \"szucs.krisztian@gmail.com\",\n+ \"date\": \"2019-04-05T12:01:31Z\"\n+ },\n+ \"committer\": {\n+ \"name\": \"Krisztián Szűcs\",\n+ \"email\": \"szucs.krisztian@gmail.com\",\n+ \"date\": \"2019-04-05T12:01:31Z\"\n+ },\n+ \"message\": \"add recorded event requests\",\n+ \"tree\": {\n+ \"sha\": \"16a7bb186833a67e9c2d84a58393503b85500ceb\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees/16a7bb186833a67e9c2d84a58393503b85500ceb\"\n+ },\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"comment_count\": 0,\n+ \"verification\": {\n+ \"verified\": true,\n+ \"reason\": \"valid\",\n+ \"signature\": \"-----BEGIN PGP SIGNATURE-----\\n\\niQFOBAABCAA4FiEEOOW2r8dr6sA77zHlgjqBKYe1QKUFAlynQ58aHHN6dWNzLmty\\naXN6dGlhbkBnbWFpbC5jb20ACgkQgjqBKYe1QKUYKwf6AiXDMaLqNLNSjRY7lIXX\\nudioewz0hSb4bgIXBv30nswu9CoOA0+mHCokEVtZhYbXzXDsZ1KJrilSC4j+Ws4q\\nkRGA6iEmrne2HcSKNZXzcVnwV9zpwKxlVh2QCTNb1PuOYFBLH0kwE704uWIWMGDN\\nbo8cjQPwegePCRguCvPh/5wa5J3uiq5gmJLG6bC/d1XYE+FJVtlnyzqzLMIryGKe\\ntIciw+wwkF413Q/YVbZ49vLUeCX9H8PHC4mZYGDWuvjFW1WTfkjK5bAH+oaTVM6h\\n350I5ZFloHmMA/QeRge5qFxXoEBMDGiXHHktzYZDXnliFOQNxzqwirA5lQQ6LRSS\\naQ==\\n=7rqi\\n-----END PGP SIGNATURE-----\",\n+ \"payload\": \"tree 16a7bb186833a67e9c2d84a58393503b85500ceb\\nparent 446ae69b9385e8d0f40aa9595f723d34383af2f7\\nauthor Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\\ncommitter Krisztián Szűcs <szucs.krisztian@gmail.com> 1554465691 +0200\\n\\nadd recorded event requests\\n\"\n+ }\n+ },\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/commit/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/2705da2b616b98fa6010a25813c5a7a27456f71d/comments\",\n+ \"author\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"committer\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"parents\": [\n+ {\n+ \"sha\": \"446ae69b9385e8d0f40aa9595f723d34383af2f7\",\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits/446ae69b9385e8d0f40aa9595f723d34383af2f7\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/commit/446ae69b9385e8d0f40aa9595f723d34383af2f7\"\n+ }\n+ ],\n+ \"stats\": {\n+ \"total\": 1062,\n+ \"additions\": 1058,\n+ \"deletions\": 4\n+ },\n+ \"files\": [\n+ {\n+ \"sha\": \"dfae6eeaef384ae6180c6302a58b49e39982dc33\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-build-command.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-build-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"I've successfully started builds for this PR\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480248730\\\",\\n+ \\\"id\\\": 480248730,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODczMA==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248730\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 4,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:55:44Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"7ef554e333327f0e62aa1fd76b4b17844a39adeb\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-by-ursabot.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-by-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"Unknown command \\\\\\\"\\\\\\\"\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243815\\\",\\n+ \\\"id\\\": 480243815,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxNQ==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243815\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 2,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:47Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"a8082dbc91fdfe815b795e49ec10e49000771ef5\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-not-mentioning-ursabot.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"bear is no game\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480241727\\\",\\n+ \\\"id\\\": 480241727,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MTcyNw==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480241727\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 0,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:26:56Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"2770e29ba9086394455315e590c0b433d08e437e\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"status\": \"added\",\n+ \"additions\": 212,\n+ \"deletions\": 0,\n+ \"changes\": 212,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-with-empty-command.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-with-empty-command.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,212 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"@ursabot \\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26#issuecomment-480243811\\\",\\n+ \\\"id\\\": 480243811,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0MzgxMQ==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480243811\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 1,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:22:15Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"id\\\": 429706959,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\\\",\\n+ \\\"number\\\": 26,\\n+ \\\"pull_request\\\": {\\n+ \\\"diff_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.diff\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26\\\",\\n+ \\\"patch_url\\\": \\\"https://github.com/ursa-labs/ursabot/pull/26.patch\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\\\"\\n+ },\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Unittests for GithubHook\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:35:46Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/26\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"80ff46510a2f39ae60f7c3a98e5fdaef8e688784\",\n+ \"filename\": \"ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"status\": \"added\",\n+ \"additions\": 206,\n+ \"deletions\": 0,\n+ \"changes\": 206,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/fixtures/issue-comment-without-pull-request.json\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/issue-comment-without-pull-request.json?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -0,0 +1,206 @@\\n+{\\n+ \\\"action\\\": \\\"created\\\",\\n+ \\\"comment\\\": {\\n+ \\\"author_association\\\": \\\"NONE\\\",\\n+ \\\"body\\\": \\\"Ursabot only listens to pull request comments!\\\",\\n+ \\\"created_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/issues/19#issuecomment-480248217\\\",\\n+ \\\"id\\\": 480248217,\\n+ \\\"issue_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"node_id\\\": \\\"MDEyOklzc3VlQ29tbWVudDQ4MDI0ODIxNw==\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments/480248217\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+ },\\n+ \\\"issue\\\": {\\n+ \\\"assignee\\\": null,\\n+ \\\"assignees\\\": [],\\n+ \\\"author_association\\\": \\\"MEMBER\\\",\\n+ \\\"body\\\": \\\"\\\",\\n+ \\\"closed_at\\\": null,\\n+ \\\"comments\\\": 4,\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/comments\\\",\\n+ \\\"created_at\\\": \\\"2019-04-02T09:56:41Z\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/events\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"id\\\": 428131685,\\n+ \\\"labels\\\": [],\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19/labels{/name}\\\",\\n+ \\\"locked\\\": false,\\n+ \\\"milestone\\\": null,\\n+ \\\"node_id\\\": \\\"MDU6SXNzdWU0MjgxMzE2ODU=\\\",\\n+ \\\"number\\\": 19,\\n+ \\\"repository_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"state\\\": \\\"open\\\",\\n+ \\\"title\\\": \\\"Build ursabot itself via ursabot\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-05T11:53:43Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/19\\\",\\n+ \\\"user\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars1.githubusercontent.com/u/961747?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/kszucs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/kszucs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/kszucs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/kszucs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/kszucs\\\",\\n+ \\\"id\\\": 961747,\\n+ \\\"login\\\": \\\"kszucs\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjk2MTc0Nw==\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/kszucs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/kszucs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/kszucs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/kszucs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/kszucs/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/kszucs\\\"\\n+ }\\n+ },\\n+ \\\"organization\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"description\\\": \\\"Innovation lab for open source data science tools, powered by Apache Arrow\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/orgs/ursa-labs/events\\\",\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/orgs/ursa-labs/hooks\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"issues_url\\\": \\\"https://api.github.com/orgs/ursa-labs/issues\\\",\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/members{/member}\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"public_members_url\\\": \\\"https://api.github.com/orgs/ursa-labs/public_members{/member}\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/orgs/ursa-labs/repos\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/orgs/ursa-labs\\\"\\n+ },\\n+ \\\"repository\\\": {\\n+ \\\"archive_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\\\",\\n+ \\\"archived\\\": false,\\n+ \\\"assignees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\\\",\\n+ \\\"blobs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\\\",\\n+ \\\"branches_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\\\",\\n+ \\\"clone_url\\\": \\\"https://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"collaborators_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\\\",\\n+ \\\"comments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\\\",\\n+ \\\"commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\\\",\\n+ \\\"compare_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\\\",\\n+ \\\"contents_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\\\",\\n+ \\\"contributors_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/contributors\\\",\\n+ \\\"created_at\\\": \\\"2019-02-04T15:40:31Z\\\",\\n+ \\\"default_branch\\\": \\\"master\\\",\\n+ \\\"deployments_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/deployments\\\",\\n+ \\\"description\\\": null,\\n+ \\\"disabled\\\": false,\\n+ \\\"downloads_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/downloads\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/events\\\",\\n+ \\\"fork\\\": false,\\n+ \\\"forks\\\": 0,\\n+ \\\"forks_count\\\": 0,\\n+ \\\"forks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/forks\\\",\\n+ \\\"full_name\\\": \\\"ursa-labs/ursabot\\\",\\n+ \\\"git_commits_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\\\",\\n+ \\\"git_refs_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\\\",\\n+ \\\"git_tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\\\",\\n+ \\\"git_url\\\": \\\"git://github.com/ursa-labs/ursabot.git\\\",\\n+ \\\"has_downloads\\\": true,\\n+ \\\"has_issues\\\": true,\\n+ \\\"has_pages\\\": false,\\n+ \\\"has_projects\\\": true,\\n+ \\\"has_wiki\\\": true,\\n+ \\\"homepage\\\": null,\\n+ \\\"hooks_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/hooks\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"id\\\": 169101701,\\n+ \\\"issue_comment_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\\\",\\n+ \\\"issue_events_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\\\",\\n+ \\\"issues_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\\\",\\n+ \\\"keys_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\\\",\\n+ \\\"labels_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\\\",\\n+ \\\"language\\\": \\\"Jupyter Notebook\\\",\\n+ \\\"languages_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/languages\\\",\\n+ \\\"license\\\": null,\\n+ \\\"merges_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/merges\\\",\\n+ \\\"milestones_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\\\",\\n+ \\\"mirror_url\\\": null,\\n+ \\\"name\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\\\",\\n+ \\\"notifications_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\\\",\\n+ \\\"open_issues\\\": 19,\\n+ \\\"open_issues_count\\\": 19,\\n+ \\\"owner\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/46514972?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursa-labs/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursa-labs/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursa-labs/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursa-labs/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursa-labs\\\",\\n+ \\\"id\\\": 46514972,\\n+ \\\"login\\\": \\\"ursa-labs\\\",\\n+ \\\"node_id\\\": \\\"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursa-labs/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursa-labs/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursa-labs/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursa-labs/subscriptions\\\",\\n+ \\\"type\\\": \\\"Organization\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursa-labs\\\"\\n+ },\\n+ \\\"private\\\": false,\\n+ \\\"pulls_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\\\",\\n+ \\\"pushed_at\\\": \\\"2019-04-05T11:22:16Z\\\",\\n+ \\\"releases_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\\\",\\n+ \\\"size\\\": 892,\\n+ \\\"ssh_url\\\": \\\"git@github.com:ursa-labs/ursabot.git\\\",\\n+ \\\"stargazers_count\\\": 1,\\n+ \\\"stargazers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/stargazers\\\",\\n+ \\\"statuses_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\\\",\\n+ \\\"subscribers_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscribers\\\",\\n+ \\\"subscription_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/subscription\\\",\\n+ \\\"svn_url\\\": \\\"https://github.com/ursa-labs/ursabot\\\",\\n+ \\\"tags_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/tags\\\",\\n+ \\\"teams_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/teams\\\",\\n+ \\\"trees_url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\\\",\\n+ \\\"updated_at\\\": \\\"2019-04-04T17:49:10Z\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/repos/ursa-labs/ursabot\\\",\\n+ \\\"watchers\\\": 1,\\n+ \\\"watchers_count\\\": 1\\n+ },\\n+ \\\"sender\\\": {\\n+ \\\"avatar_url\\\": \\\"https://avatars2.githubusercontent.com/u/49275095?v=4\\\",\\n+ \\\"events_url\\\": \\\"https://api.github.com/users/ursabot/events{/privacy}\\\",\\n+ \\\"followers_url\\\": \\\"https://api.github.com/users/ursabot/followers\\\",\\n+ \\\"following_url\\\": \\\"https://api.github.com/users/ursabot/following{/other_user}\\\",\\n+ \\\"gists_url\\\": \\\"https://api.github.com/users/ursabot/gists{/gist_id}\\\",\\n+ \\\"gravatar_id\\\": \\\"\\\",\\n+ \\\"html_url\\\": \\\"https://github.com/ursabot\\\",\\n+ \\\"id\\\": 49275095,\\n+ \\\"login\\\": \\\"ursabot\\\",\\n+ \\\"node_id\\\": \\\"MDQ6VXNlcjQ5Mjc1MDk1\\\",\\n+ \\\"organizations_url\\\": \\\"https://api.github.com/users/ursabot/orgs\\\",\\n+ \\\"received_events_url\\\": \\\"https://api.github.com/users/ursabot/received_events\\\",\\n+ \\\"repos_url\\\": \\\"https://api.github.com/users/ursabot/repos\\\",\\n+ \\\"site_admin\\\": false,\\n+ \\\"starred_url\\\": \\\"https://api.github.com/users/ursabot/starred{/owner}{/repo}\\\",\\n+ \\\"subscriptions_url\\\": \\\"https://api.github.com/users/ursabot/subscriptions\\\",\\n+ \\\"type\\\": \\\"User\\\",\\n+ \\\"url\\\": \\\"https://api.github.com/users/ursabot\\\"\\n+ }\\n+}\"\n+ },\n+ {\n+ \"sha\": \"c738bb0eb54c87ba0f23e97e827d77c2be74d0b6\",\n+ \"filename\": \"ursabot/tests/test_hooks.py\",\n+ \"status\": \"modified\",\n+ \"additions\": 4,\n+ \"deletions\": 4,\n+ \"changes\": 8,\n+ \"blob_url\": \"https://github.com/ursa-labs/ursabot/blob/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py\",\n+ \"raw_url\": \"https://github.com/ursa-labs/ursabot/raw/2705da2b616b98fa6010a25813c5a7a27456f71d/ursabot/tests/test_hooks.py\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"patch\": \"@@ -54,7 +54,7 @@ class TestGithubHook(ChangeHookTestCase):\\n await self.request('ping', {})\\n assert len(self.hook.master.data.updates.changesAdded) == 0\\n \\n- @ensure_deferred\\n- async def test_issue_comment(self):\\n- payload = {}\\n- await self.request('issue_comment', payload)\\n+ # @ensure_deferred\\n+ # async def test_issue_comment(self):\\n+ # payload = {}\\n+ # await self.request('issue_comment', payload)\"\n+ }\n+ ]\n+}" + }, + { + "sha": "ad061d7244b917e6ea3853698dc3bc2a8c9c6857", + "filename": "ursabot/tests/fixtures/pull-request-26.json", + "status": "added", + "additions": 335, + "deletions": 0, + "changes": 335, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26.json", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/fixtures/pull-request-26.json", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/fixtures/pull-request-26.json?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,335 @@\n+{\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\",\n+ \"id\": 267785552,\n+ \"node_id\": \"MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy\",\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot/pull/26\",\n+ \"diff_url\": \"https://github.com/ursa-labs/ursabot/pull/26.diff\",\n+ \"patch_url\": \"https://github.com/ursa-labs/ursabot/pull/26.patch\",\n+ \"issue_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\",\n+ \"number\": 26,\n+ \"state\": \"open\",\n+ \"locked\": false,\n+ \"title\": \"Unittests for GithubHook\",\n+ \"user\": {\n+ \"login\": \"kszucs\",\n+ \"id\": 961747,\n+ \"node_id\": \"MDQ6VXNlcjk2MTc0Nw==\",\n+ \"avatar_url\": \"https://avatars1.githubusercontent.com/u/961747?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/kszucs\",\n+ \"html_url\": \"https://github.com/kszucs\",\n+ \"followers_url\": \"https://api.github.com/users/kszucs/followers\",\n+ \"following_url\": \"https://api.github.com/users/kszucs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/kszucs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/kszucs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/kszucs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/kszucs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/kszucs/repos\",\n+ \"events_url\": \"https://api.github.com/users/kszucs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/kszucs/received_events\",\n+ \"type\": \"User\",\n+ \"site_admin\": false\n+ },\n+ \"body\": \"\",\n+ \"created_at\": \"2019-04-05T11:22:15Z\",\n+ \"updated_at\": \"2019-04-05T12:01:40Z\",\n+ \"closed_at\": null,\n+ \"merged_at\": null,\n+ \"merge_commit_sha\": \"cc5dc3606988b3824be54df779ed2028776113cb\",\n+ \"assignee\": null,\n+ \"assignees\": [\n+\n+ ],\n+ \"requested_reviewers\": [\n+\n+ ],\n+ \"requested_teams\": [\n+\n+ ],\n+ \"labels\": [\n+\n+ ],\n+ \"milestone\": null,\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits\",\n+ \"review_comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments\",\n+ \"review_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"head\": {\n+ \"label\": \"ursa-labs:test-hook\",\n+ \"ref\": \"test-hook\",\n+ \"sha\": \"2705da2b616b98fa6010a25813c5a7a27456f71d\",\n+ \"user\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"repo\": {\n+ \"id\": 169101701,\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"name\": \"ursabot\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"private\": false,\n+ \"owner\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"description\": null,\n+ \"fork\": false,\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"homepage\": null,\n+ \"size\": 898,\n+ \"stargazers_count\": 1,\n+ \"watchers_count\": 1,\n+ \"language\": \"Jupyter Notebook\",\n+ \"has_issues\": true,\n+ \"has_projects\": true,\n+ \"has_downloads\": true,\n+ \"has_wiki\": true,\n+ \"has_pages\": false,\n+ \"forks_count\": 0,\n+ \"mirror_url\": null,\n+ \"archived\": false,\n+ \"disabled\": false,\n+ \"open_issues_count\": 19,\n+ \"license\": null,\n+ \"forks\": 0,\n+ \"open_issues\": 19,\n+ \"watchers\": 1,\n+ \"default_branch\": \"master\"\n+ }\n+ },\n+ \"base\": {\n+ \"label\": \"ursa-labs:master\",\n+ \"ref\": \"master\",\n+ \"sha\": \"a162ad254b589b924db47e057791191b39613fd5\",\n+ \"user\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"repo\": {\n+ \"id\": 169101701,\n+ \"node_id\": \"MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=\",\n+ \"name\": \"ursabot\",\n+ \"full_name\": \"ursa-labs/ursabot\",\n+ \"private\": false,\n+ \"owner\": {\n+ \"login\": \"ursa-labs\",\n+ \"id\": 46514972,\n+ \"node_id\": \"MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy\",\n+ \"avatar_url\": \"https://avatars2.githubusercontent.com/u/46514972?v=4\",\n+ \"gravatar_id\": \"\",\n+ \"url\": \"https://api.github.com/users/ursa-labs\",\n+ \"html_url\": \"https://github.com/ursa-labs\",\n+ \"followers_url\": \"https://api.github.com/users/ursa-labs/followers\",\n+ \"following_url\": \"https://api.github.com/users/ursa-labs/following{/other_user}\",\n+ \"gists_url\": \"https://api.github.com/users/ursa-labs/gists{/gist_id}\",\n+ \"starred_url\": \"https://api.github.com/users/ursa-labs/starred{/owner}{/repo}\",\n+ \"subscriptions_url\": \"https://api.github.com/users/ursa-labs/subscriptions\",\n+ \"organizations_url\": \"https://api.github.com/users/ursa-labs/orgs\",\n+ \"repos_url\": \"https://api.github.com/users/ursa-labs/repos\",\n+ \"events_url\": \"https://api.github.com/users/ursa-labs/events{/privacy}\",\n+ \"received_events_url\": \"https://api.github.com/users/ursa-labs/received_events\",\n+ \"type\": \"Organization\",\n+ \"site_admin\": false\n+ },\n+ \"html_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"description\": null,\n+ \"fork\": false,\n+ \"url\": \"https://api.github.com/repos/ursa-labs/ursabot\",\n+ \"forks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/forks\",\n+ \"keys_url\": \"https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}\",\n+ \"collaborators_url\": \"https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}\",\n+ \"teams_url\": \"https://api.github.com/repos/ursa-labs/ursabot/teams\",\n+ \"hooks_url\": \"https://api.github.com/repos/ursa-labs/ursabot/hooks\",\n+ \"issue_events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}\",\n+ \"events_url\": \"https://api.github.com/repos/ursa-labs/ursabot/events\",\n+ \"assignees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}\",\n+ \"branches_url\": \"https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}\",\n+ \"tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/tags\",\n+ \"blobs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}\",\n+ \"git_tags_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}\",\n+ \"git_refs_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}\",\n+ \"trees_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}\",\n+ \"statuses_url\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}\",\n+ \"languages_url\": \"https://api.github.com/repos/ursa-labs/ursabot/languages\",\n+ \"stargazers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/stargazers\",\n+ \"contributors_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contributors\",\n+ \"subscribers_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscribers\",\n+ \"subscription_url\": \"https://api.github.com/repos/ursa-labs/ursabot/subscription\",\n+ \"commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}\",\n+ \"git_commits_url\": \"https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}\",\n+ \"comments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/comments{/number}\",\n+ \"issue_comment_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}\",\n+ \"contents_url\": \"https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}\",\n+ \"compare_url\": \"https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}\",\n+ \"merges_url\": \"https://api.github.com/repos/ursa-labs/ursabot/merges\",\n+ \"archive_url\": \"https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}\",\n+ \"downloads_url\": \"https://api.github.com/repos/ursa-labs/ursabot/downloads\",\n+ \"issues_url\": \"https://api.github.com/repos/ursa-labs/ursabot/issues{/number}\",\n+ \"pulls_url\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}\",\n+ \"milestones_url\": \"https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}\",\n+ \"notifications_url\": \"https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}\",\n+ \"labels_url\": \"https://api.github.com/repos/ursa-labs/ursabot/labels{/name}\",\n+ \"releases_url\": \"https://api.github.com/repos/ursa-labs/ursabot/releases{/id}\",\n+ \"deployments_url\": \"https://api.github.com/repos/ursa-labs/ursabot/deployments\",\n+ \"created_at\": \"2019-02-04T15:40:31Z\",\n+ \"updated_at\": \"2019-04-04T17:49:10Z\",\n+ \"pushed_at\": \"2019-04-05T12:01:40Z\",\n+ \"git_url\": \"git://github.com/ursa-labs/ursabot.git\",\n+ \"ssh_url\": \"git@github.com:ursa-labs/ursabot.git\",\n+ \"clone_url\": \"https://github.com/ursa-labs/ursabot.git\",\n+ \"svn_url\": \"https://github.com/ursa-labs/ursabot\",\n+ \"homepage\": null,\n+ \"size\": 898,\n+ \"stargazers_count\": 1,\n+ \"watchers_count\": 1,\n+ \"language\": \"Jupyter Notebook\",\n+ \"has_issues\": true,\n+ \"has_projects\": true,\n+ \"has_downloads\": true,\n+ \"has_wiki\": true,\n+ \"has_pages\": false,\n+ \"forks_count\": 0,\n+ \"mirror_url\": null,\n+ \"archived\": false,\n+ \"disabled\": false,\n+ \"open_issues_count\": 19,\n+ \"license\": null,\n+ \"forks\": 0,\n+ \"open_issues\": 19,\n+ \"watchers\": 1,\n+ \"default_branch\": \"master\"\n+ }\n+ },\n+ \"_links\": {\n+ \"self\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26\"\n+ },\n+ \"html\": {\n+ \"href\": \"https://github.com/ursa-labs/ursabot/pull/26\"\n+ },\n+ \"issue\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26\"\n+ },\n+ \"comments\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments\"\n+ },\n+ \"review_comments\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments\"\n+ },\n+ \"review_comment\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}\"\n+ },\n+ \"commits\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits\"\n+ },\n+ \"statuses\": {\n+ \"href\": \"https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d\"\n+ }\n+ },\n+ \"author_association\": \"MEMBER\",\n+ \"merged\": false,\n+ \"mergeable\": true,\n+ \"rebaseable\": true,\n+ \"mergeable_state\": \"unstable\",\n+ \"merged_by\": null,\n+ \"comments\": 5,\n+ \"review_comments\": 0,\n+ \"maintainer_can_modify\": false,\n+ \"commits\": 2,\n+ \"additions\": 1124,\n+ \"deletions\": 0,\n+ \"changed_files\": 7\n+}" + }, + { + "sha": "e87b27d2d7b4956d15f7468488b96cf6a06686f4", + "filename": "ursabot/tests/test_hooks.py", + "status": "added", + "additions": 116, + "deletions": 0, + "changes": 116, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/test_hooks.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/tests/test_hooks.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/tests/test_hooks.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,116 @@\n+import json\n+from pathlib import Path\n+from twisted.trial import unittest\n+\n+from buildbot.test.util.misc import TestReactorMixin\n+from buildbot.test.fake.httpclientservice import \\\n+ HTTPClientService as FakeHTTPClientService\n+from buildbot.test.unit.test_www_hooks_github import (\n+ _prepare_request, _prepare_github_change_hook)\n+\n+from ursabot.utils import ensure_deferred\n+from ursabot.hooks import GithubHook\n+\n+\n+class ChangeHookTestCase(unittest.TestCase, TestReactorMixin):\n+\n+ klass = None\n+\n+ @ensure_deferred\n+ async def setUp(self):\n+ self.setUpTestReactor()\n+\n+ assert self.klass is not None\n+ self.hook = _prepare_github_change_hook(self, **{'class': self.klass})\n+ self.master = self.hook.master\n+ self.http = await FakeHTTPClientService.getFakeService(\n+ self.master, self, 'https://api.github.com',\n+ headers={'User-Agent': 'Buildbot'}, debug=False, verify=False)\n+\n+ await self.master.startService()\n+\n+ @ensure_deferred\n+ async def tearDown(self):\n+ await self.master.stopService()\n+\n+ async def trigger(self, event, payload, headers=None, _secret=None):\n+ payload = json.dumps(payload).encode()\n+ request = _prepare_request(event, payload, _secret=_secret,\n+ headers=headers)\n+ await request.test_render(self.hook)\n+ return request\n+\n+ def load_fixture(self, name):\n+ path = Path(__file__).parent / 'fixtures' / f'{name}.json'\n+ with path.open('r') as fp:\n+ return json.load(fp)\n+\n+\n+class TestGithubHook(ChangeHookTestCase):\n+\n+ klass = GithubHook\n+\n+ @ensure_deferred\n+ async def test_ping(self):\n+ await self.trigger('ping', {})\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_not_mentioning_ursabot(self):\n+ payload = self.load_fixture('issue-comment-not-mentioning-ursabot')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_by_ursabot(self):\n+ payload = self.load_fixture('issue-comment-by-ursabot')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_with_empty_command(self):\n+ # responds to the comment\n+ request_json = {'body': 'Unknown command \"\"'}\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/26/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-with-empty-command')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_without_pull_request(self):\n+ # responds to the comment\n+ request_json = {\n+ 'body': 'Ursabot only listens to pull request comments!'\n+ }\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/19/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-without-pull-request')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 0\n+\n+ @ensure_deferred\n+ async def test_issue_comment_build_command(self):\n+ # handle_issue_comment queries the pull request\n+ request_json = self.load_fixture('pull-request-26')\n+ self.http.expect('get', '/repos/ursa-labs/ursabot/pulls/26',\n+ content_json=request_json)\n+ # tigger handle_pull_request which fetches the commit\n+ request_json = self.load_fixture('pull-request-26-commit')\n+ commit = '2705da2b616b98fa6010a25813c5a7a27456f71d'\n+ self.http.expect('get', f'/repos/ursa-labs/ursabot/commits/{commit}',\n+ content_json=request_json)\n+\n+ # then responds to the comment\n+ request_json = {'body': \"I've successfully started builds for this PR\"}\n+ response_json = ''\n+ self.http.expect('post', '/repos/ursa-labs/ursabot/issues/26/comments',\n+ json=request_json, content_json=response_json)\n+\n+ payload = self.load_fixture('issue-comment-build-command')\n+ await self.trigger('issue_comment', payload=payload)\n+ assert len(self.hook.master.data.updates.changesAdded) == 1" + }, + { + "sha": "3ff0e88660cf186420e8bc672735e4d446963192", + "filename": "ursabot/utils.py", + "status": "added", + "additions": 10, + "deletions": 0, + "changes": 10, + "blob_url": "https://github.com/ursa-labs/ursabot/blob/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/utils.py", + "raw_url": "https://github.com/ursa-labs/ursabot/raw/70267dee34884e4b972388e1b30d57f6248c58d0/ursabot/utils.py", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/ursabot/utils.py?ref=70267dee34884e4b972388e1b30d57f6248c58d0", + "patch": "@@ -0,0 +1,10 @@\n+import functools\n+from twisted.internet import defer\n+\n+\n+def ensure_deferred(f):\n+ @functools.wraps(f)\n+ def wrapper(*args, **kwargs):\n+ result = f(*args, **kwargs)\n+ return defer.ensureDeferred(result)\n+ return wrapper" + } +]
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json new file mode 100644 index 000000000..d295afb39 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/fixtures/pull-request-26.json @@ -0,0 +1,329 @@ +{ + "url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26", + "id": 267785552, + "node_id": "MDExOlB1bGxSZXF1ZXN0MjY3Nzg1NTUy", + "html_url": "https://github.com/ursa-labs/ursabot/pull/26", + "diff_url": "https://github.com/ursa-labs/ursabot/pull/26.diff", + "patch_url": "https://github.com/ursa-labs/ursabot/pull/26.patch", + "issue_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26", + "number": 26, + "state": "open", + "locked": false, + "title": "Unittests for GithubHook", + "user": { + "login": "kszucs", + "id": 961747, + "node_id": "MDQ6VXNlcjk2MTc0Nw==", + "avatar_url": "https://avatars1.githubusercontent.com/u/961747?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/kszucs", + "html_url": "https://github.com/kszucs", + "followers_url": "https://api.github.com/users/kszucs/followers", + "following_url": "https://api.github.com/users/kszucs/following{/other_user}", + "gists_url": "https://api.github.com/users/kszucs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/kszucs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/kszucs/subscriptions", + "organizations_url": "https://api.github.com/users/kszucs/orgs", + "repos_url": "https://api.github.com/users/kszucs/repos", + "events_url": "https://api.github.com/users/kszucs/events{/privacy}", + "received_events_url": "https://api.github.com/users/kszucs/received_events", + "type": "User", + "site_admin": false + }, + "body": "", + "body_html": "", + "body_text": "", + "created_at": "2019-04-05T11:22:15Z", + "updated_at": "2019-04-05T12:01:40Z", + "closed_at": null, + "merged_at": null, + "merge_commit_sha": "cc5dc3606988b3824be54df779ed2028776113cb", + "assignee": null, + "assignees": [], + "requested_reviewers": [], + "requested_teams": [], + "labels": [], + "milestone": null, + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits", + "review_comments_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments", + "review_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d", + "head": { + "label": "ursa-labs:test-hook", + "ref": "test-hook", + "sha": "2705da2b616b98fa6010a25813c5a7a27456f71d", + "user": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "repo": { + "id": 169101701, + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "name": "ursabot", + "full_name": "ursa-labs/ursabot", + "private": false, + "owner": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/ursa-labs/ursabot", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "created_at": "2019-02-04T15:40:31Z", + "updated_at": "2019-04-04T17:49:10Z", + "pushed_at": "2019-04-05T12:01:40Z", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "svn_url": "https://github.com/ursa-labs/ursabot", + "homepage": null, + "size": 898, + "stargazers_count": 1, + "watchers_count": 1, + "language": "Jupyter Notebook", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 19, + "license": null, + "forks": 0, + "open_issues": 19, + "watchers": 1, + "default_branch": "master" + } + }, + "base": { + "label": "ursa-labs:master", + "ref": "master", + "sha": "a162ad254b589b924db47e057791191b39613fd5", + "user": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "repo": { + "id": 169101701, + "node_id": "MDEwOlJlcG9zaXRvcnkxNjkxMDE3MDE=", + "name": "ursabot", + "full_name": "ursa-labs/ursabot", + "private": false, + "owner": { + "login": "ursa-labs", + "id": 46514972, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjQ2NTE0OTcy", + "avatar_url": "https://avatars2.githubusercontent.com/u/46514972?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ursa-labs", + "html_url": "https://github.com/ursa-labs", + "followers_url": "https://api.github.com/users/ursa-labs/followers", + "following_url": "https://api.github.com/users/ursa-labs/following{/other_user}", + "gists_url": "https://api.github.com/users/ursa-labs/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ursa-labs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ursa-labs/subscriptions", + "organizations_url": "https://api.github.com/users/ursa-labs/orgs", + "repos_url": "https://api.github.com/users/ursa-labs/repos", + "events_url": "https://api.github.com/users/ursa-labs/events{/privacy}", + "received_events_url": "https://api.github.com/users/ursa-labs/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/ursa-labs/ursabot", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/ursa-labs/ursabot", + "forks_url": "https://api.github.com/repos/ursa-labs/ursabot/forks", + "keys_url": "https://api.github.com/repos/ursa-labs/ursabot/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/ursa-labs/ursabot/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/ursa-labs/ursabot/teams", + "hooks_url": "https://api.github.com/repos/ursa-labs/ursabot/hooks", + "issue_events_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/events{/number}", + "events_url": "https://api.github.com/repos/ursa-labs/ursabot/events", + "assignees_url": "https://api.github.com/repos/ursa-labs/ursabot/assignees{/user}", + "branches_url": "https://api.github.com/repos/ursa-labs/ursabot/branches{/branch}", + "tags_url": "https://api.github.com/repos/ursa-labs/ursabot/tags", + "blobs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/ursa-labs/ursabot/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/ursa-labs/ursabot/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/ursa-labs/ursabot/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/ursa-labs/ursabot/statuses/{sha}", + "languages_url": "https://api.github.com/repos/ursa-labs/ursabot/languages", + "stargazers_url": "https://api.github.com/repos/ursa-labs/ursabot/stargazers", + "contributors_url": "https://api.github.com/repos/ursa-labs/ursabot/contributors", + "subscribers_url": "https://api.github.com/repos/ursa-labs/ursabot/subscribers", + "subscription_url": "https://api.github.com/repos/ursa-labs/ursabot/subscription", + "commits_url": "https://api.github.com/repos/ursa-labs/ursabot/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/ursa-labs/ursabot/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/ursa-labs/ursabot/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/ursa-labs/ursabot/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/ursa-labs/ursabot/contents/{+path}", + "compare_url": "https://api.github.com/repos/ursa-labs/ursabot/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/ursa-labs/ursabot/merges", + "archive_url": "https://api.github.com/repos/ursa-labs/ursabot/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/ursa-labs/ursabot/downloads", + "issues_url": "https://api.github.com/repos/ursa-labs/ursabot/issues{/number}", + "pulls_url": "https://api.github.com/repos/ursa-labs/ursabot/pulls{/number}", + "milestones_url": "https://api.github.com/repos/ursa-labs/ursabot/milestones{/number}", + "notifications_url": "https://api.github.com/repos/ursa-labs/ursabot/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/ursa-labs/ursabot/labels{/name}", + "releases_url": "https://api.github.com/repos/ursa-labs/ursabot/releases{/id}", + "deployments_url": "https://api.github.com/repos/ursa-labs/ursabot/deployments", + "created_at": "2019-02-04T15:40:31Z", + "updated_at": "2019-04-04T17:49:10Z", + "pushed_at": "2019-04-05T12:01:40Z", + "git_url": "git://github.com/ursa-labs/ursabot.git", + "ssh_url": "git@github.com:ursa-labs/ursabot.git", + "clone_url": "https://github.com/ursa-labs/ursabot.git", + "svn_url": "https://github.com/ursa-labs/ursabot", + "homepage": null, + "size": 898, + "stargazers_count": 1, + "watchers_count": 1, + "language": "Jupyter Notebook", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 19, + "license": null, + "forks": 0, + "open_issues": 19, + "watchers": 1, + "default_branch": "master" + } + }, + "_links": { + "self": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26" + }, + "html": { + "href": "https://github.com/ursa-labs/ursabot/pull/26" + }, + "issue": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26" + }, + "comments": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/issues/26/comments" + }, + "review_comments": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/comments" + }, + "review_comment": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/comments{/number}" + }, + "commits": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/pulls/26/commits" + }, + "statuses": { + "href": "https://api.github.com/repos/ursa-labs/ursabot/statuses/2705da2b616b98fa6010a25813c5a7a27456f71d" + } + }, + "author_association": "MEMBER", + "merged": false, + "mergeable": true, + "rebaseable": true, + "mergeable_state": "unstable", + "merged_by": null, + "comments": 5, + "review_comments": 0, + "maintainer_can_modify": false, + "commits": 2, + "additions": 1124, + "deletions": 0, + "changed_files": 7 +}
\ No newline at end of file diff --git a/src/arrow/dev/archery/archery/tests/test_benchmarks.py b/src/arrow/dev/archery/archery/tests/test_benchmarks.py new file mode 100644 index 000000000..fab1e8d44 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/test_benchmarks.py @@ -0,0 +1,383 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from archery.benchmark.codec import JsonEncoder +from archery.benchmark.core import Benchmark, median +from archery.benchmark.compare import ( + BenchmarkComparator, RunnerComparator +) +from archery.benchmark.google import ( + GoogleBenchmark, GoogleBenchmarkObservation +) +from archery.benchmark.runner import StaticBenchmarkRunner + + +def test_benchmark_comparator(): + unit = "micros" + + assert not BenchmarkComparator( + Benchmark("contender", unit, True, [10], unit, [1]), + Benchmark("baseline", unit, True, [20], unit, [1]), + ).regression + + assert BenchmarkComparator( + Benchmark("contender", unit, False, [10], unit, [1]), + Benchmark("baseline", unit, False, [20], unit, [1]), + ).regression + + assert BenchmarkComparator( + Benchmark("contender", unit, True, [20], unit, [1]), + Benchmark("baseline", unit, True, [10], unit, [1]), + ).regression + + assert not BenchmarkComparator( + Benchmark("contender", unit, False, [20], unit, [1]), + Benchmark("baseline", unit, False, [10], unit, [1]), + ).regression + + +def test_static_runner_from_json_not_a_regression(): + archery_result = { + "suites": [ + { + "name": "arrow-value-parsing-benchmark", + "benchmarks": [ + { + "name": "FloatParsing<DoubleType>", + "unit": "items_per_second", + "less_is_better": False, + "values": [ + 109941112.87296811 + ], + "time_unit": "ns", + "times": [ + 9095.800104330105 + ] + }, + ] + } + ] + } + + contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + [comparison] = RunnerComparator(contender, baseline).comparisons + assert not comparison.regression + + +def test_static_runner_from_json_regression(): + archery_result = { + "suites": [ + { + "name": "arrow-value-parsing-benchmark", + "benchmarks": [ + { + "name": "FloatParsing<DoubleType>", + "unit": "items_per_second", + "less_is_better": False, + "values": [ + 109941112.87296811 + ], + "time_unit": "ns", + "times": [ + 9095.800104330105 + ] + }, + ] + } + ] + } + + contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + + # introduce artificial regression + archery_result['suites'][0]['benchmarks'][0]['values'][0] *= 2 + baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + + [comparison] = RunnerComparator(contender, baseline).comparisons + assert comparison.regression + + +def test_benchmark_median(): + assert median([10]) == 10 + assert median([1, 2, 3]) == 2 + assert median([1, 2]) == 1.5 + assert median([1, 2, 3, 4]) == 2.5 + assert median([1, 1, 1, 1]) == 1 + try: + median([]) + assert False + except ValueError: + pass + + +def assert_benchmark(name, google_result, archery_result): + observation = GoogleBenchmarkObservation(**google_result) + benchmark = GoogleBenchmark(name, [observation]) + result = json.dumps(benchmark, cls=JsonEncoder) + assert json.loads(result) == archery_result + + +def test_items_per_second(): + name = "ArrayArrayKernel<AddChecked, UInt8Type>/32768/0" + google_result = { + "cpu_time": 116292.58886653671, + "items_per_second": 281772039.9844759, + "iterations": 5964, + "name": name, + "null_percent": 0.0, + "real_time": 119811.77313729875, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "size": 32768.0, + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 5964, + "null_percent": 0.0, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "items_per_second", + "less_is_better": False, + "values": [281772039.9844759], + "time_unit": "ns", + "times": [119811.77313729875], + } + assert "items_per_second" in google_result + assert "bytes_per_second" not in google_result + assert_benchmark(name, google_result, archery_result) + + +def test_bytes_per_second(): + name = "BufferOutputStreamLargeWrites/real_time" + google_result = { + "bytes_per_second": 1890209037.3405428, + "cpu_time": 17018127.659574457, + "iterations": 47, + "name": name, + "real_time": 17458386.53190963, + "repetition_index": 1, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 47, + "repetition_index": 1, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "bytes_per_second", + "less_is_better": False, + "values": [1890209037.3405428], + "time_unit": "ns", + "times": [17458386.53190963], + } + assert "items_per_second" not in google_result + assert "bytes_per_second" in google_result + assert_benchmark(name, google_result, archery_result) + + +def test_both_items_and_bytes_per_second(): + name = "ArrayArrayKernel<AddChecked, UInt8Type>/32768/0" + google_result = { + "bytes_per_second": 281772039.9844759, + "cpu_time": 116292.58886653671, + "items_per_second": 281772039.9844759, + "iterations": 5964, + "name": name, + "null_percent": 0.0, + "real_time": 119811.77313729875, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "size": 32768.0, + "threads": 1, + "time_unit": "ns", + } + # Note that bytes_per_second trumps items_per_second + archery_result = { + "counters": {"iterations": 5964, + "null_percent": 0.0, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "bytes_per_second", + "less_is_better": False, + "values": [281772039.9844759], + "time_unit": "ns", + "times": [119811.77313729875], + } + assert "items_per_second" in google_result + assert "bytes_per_second" in google_result + assert_benchmark(name, google_result, archery_result) + + +def test_neither_items_nor_bytes_per_second(): + name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time" + google_result = { + "cpu_time": 1778.6004847419827, + "iterations": 352765, + "name": name, + "real_time": 1835.3137357788837, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 352765, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "ns", + "less_is_better": True, + "values": [1835.3137357788837], + "time_unit": "ns", + "times": [1835.3137357788837], + } + assert "items_per_second" not in google_result + assert "bytes_per_second" not in google_result + assert_benchmark(name, google_result, archery_result) + + +def test_prefer_real_time(): + name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time" + google_result = { + "cpu_time": 1778.6004847419827, + "iterations": 352765, + "name": name, + "real_time": 1835.3137357788837, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 352765, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "ns", + "less_is_better": True, + "values": [1835.3137357788837], + "time_unit": "ns", + "times": [1835.3137357788837], + } + assert name.endswith("/real_time") + assert_benchmark(name, google_result, archery_result) + + +def test_prefer_cpu_time(): + name = "AllocateDeallocate<Jemalloc>/size:1048576" + google_result = { + "cpu_time": 1778.6004847419827, + "iterations": 352765, + "name": name, + "real_time": 1835.3137357788837, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 352765, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "ns", + "less_is_better": True, + "values": [1778.6004847419827], + "time_unit": "ns", + "times": [1835.3137357788837], + } + assert not name.endswith("/real_time") + assert_benchmark(name, google_result, archery_result) + + +def test_omits_aggregates(): + name = "AllocateDeallocate<Jemalloc>/size:1048576/real_time" + google_aggregate = { + "aggregate_name": "mean", + "cpu_time": 1757.428694267678, + "iterations": 3, + "name": "AllocateDeallocate<Jemalloc>/size:1048576/real_time_mean", + "real_time": 1849.3869337041162, + "repetitions": 0, + "run_name": name, + "run_type": "aggregate", + "threads": 1, + "time_unit": "ns", + } + google_result = { + "cpu_time": 1778.6004847419827, + "iterations": 352765, + "name": name, + "real_time": 1835.3137357788837, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "run_type": "iteration", + "threads": 1, + "time_unit": "ns", + } + archery_result = { + "counters": {"iterations": 352765, + "repetition_index": 0, + "repetitions": 0, + "run_name": name, + "threads": 1}, + "name": name, + "unit": "ns", + "less_is_better": True, + "values": [1835.3137357788837], + "time_unit": "ns", + "times": [1835.3137357788837], + } + assert google_aggregate["run_type"] == "aggregate" + assert google_result["run_type"] == "iteration" + observation1 = GoogleBenchmarkObservation(**google_aggregate) + observation2 = GoogleBenchmarkObservation(**google_result) + benchmark = GoogleBenchmark(name, [observation1, observation2]) + result = json.dumps(benchmark, cls=JsonEncoder) + assert json.loads(result) == archery_result diff --git a/src/arrow/dev/archery/archery/tests/test_bot.py b/src/arrow/dev/archery/archery/tests/test_bot.py new file mode 100644 index 000000000..e84fb7e27 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/test_bot.py @@ -0,0 +1,215 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +from unittest.mock import Mock + +import click +import pytest +import responses as rsps + +from archery.bot import CommentBot, CommandError, group + + +@pytest.fixture +def responses(): + with rsps.RequestsMock() as mock: + yield mock + + +def github_url(path): + return 'https://api.github.com:443/{}'.format(path.strip('/')) + + +@group() +def custom_handler(): + pass + + +@custom_handler.command() +@click.pass_obj +def extra(obj): + return obj + + +@custom_handler.command() +@click.option('--force', '-f', is_flag=True) +def build(force): + return force + + +@custom_handler.command() +@click.option('--name', required=True) +def benchmark(name): + return name + + +def test_click_based_commands(): + assert custom_handler('build') is False + assert custom_handler('build -f') is True + + assert custom_handler('benchmark --name strings') == 'strings' + with pytest.raises(CommandError): + assert custom_handler('benchmark') + + assert custom_handler('extra', extra='data') == {'extra': 'data'} + + +@pytest.mark.parametrize('fixture_name', [ + # the bot is not mentioned, nothing to do + 'event-issue-comment-not-mentioning-ursabot.json', + # don't respond to itself, it prevents recursive comment storms! + 'event-issue-comment-by-ursabot.json', + # non-authorized user sent the comment, do not respond + 'event-issue-comment-by-non-authorized-user.json', +]) +def test_noop_events(load_fixture, fixture_name): + payload = load_fixture(fixture_name) + + handler = Mock() + bot = CommentBot(name='ursabot', token='', handler=handler) + bot.handle('issue_comment', payload) + + handler.assert_not_called() + + +def test_issue_comment_without_pull_request(load_fixture, responses): + responses.add( + responses.GET, + github_url('/repositories/169101701/issues/19'), + json=load_fixture('issue-19.json'), + status=200 + ) + responses.add( + responses.GET, + github_url('repos/ursa-labs/ursabot/pulls/19'), + json={}, + status=404 + ) + responses.add( + responses.POST, + github_url('/repos/ursa-labs/ursabot/issues/19/comments'), + json={} + ) + + def handler(command, **kwargs): + pass + + payload = load_fixture('event-issue-comment-without-pull-request.json') + bot = CommentBot(name='ursabot', token='', handler=handler) + bot.handle('issue_comment', payload) + + post = responses.calls[2] + assert json.loads(post.request.body) == { + 'body': "The comment bot only listens to pull request comments!" + } + + +def test_respond_with_usage(load_fixture, responses): + responses.add( + responses.GET, + github_url('/repositories/169101701/issues/26'), + json=load_fixture('issue-26.json'), + status=200 + ) + responses.add( + responses.GET, + github_url('/repos/ursa-labs/ursabot/pulls/26'), + json=load_fixture('pull-request-26.json'), + status=200 + ) + responses.add( + responses.GET, + github_url('/repos/ursa-labs/ursabot/issues/comments/480243811'), + json=load_fixture('issue-comment-480243811.json') + ) + responses.add( + responses.POST, + github_url('/repos/ursa-labs/ursabot/issues/26/comments'), + json={} + ) + + def handler(command, **kwargs): + raise CommandError('test-usage') + + payload = load_fixture('event-issue-comment-with-empty-command.json') + bot = CommentBot(name='ursabot', token='', handler=handler) + bot.handle('issue_comment', payload) + + post = responses.calls[3] + assert json.loads(post.request.body) == {'body': '```\ntest-usage\n```'} + + +@pytest.mark.parametrize(('command', 'reaction'), [ + ('@ursabot build', '+1'), + ('@ursabot build\nwith a comment', '+1'), + ('@ursabot listen', '-1'), +]) +def test_issue_comment_with_commands(load_fixture, responses, command, + reaction): + responses.add( + responses.GET, + github_url('/repositories/169101701/issues/26'), + json=load_fixture('issue-26.json'), + status=200 + ) + responses.add( + responses.GET, + github_url('/repos/ursa-labs/ursabot/pulls/26'), + json=load_fixture('pull-request-26.json'), + status=200 + ) + responses.add( + responses.GET, + github_url('/repos/ursa-labs/ursabot/issues/comments/480248726'), + json=load_fixture('issue-comment-480248726.json') + ) + responses.add( + responses.POST, + github_url( + '/repos/ursa-labs/ursabot/issues/comments/480248726/reactions' + ), + json={} + ) + + def handler(command, **kwargs): + if command == 'build': + return True + else: + raise ValueError('Only `build` command is supported.') + + payload = load_fixture('event-issue-comment-build-command.json') + payload["comment"]["body"] = command + + bot = CommentBot(name='ursabot', token='', handler=handler) + bot.handle('issue_comment', payload) + + post = responses.calls[3] + assert json.loads(post.request.body) == {'content': reaction} + + +def test_issue_comment_with_commands_bot_not_first(load_fixture, responses): + # when the @-mention is not first, this is a no-op + handler = Mock() + + payload = load_fixture('event-issue-comment-build-command.json') + payload["comment"]["body"] = 'with a comment\n@ursabot build' + + bot = CommentBot(name='ursabot', token='', handler=handler) + bot.handle('issue_comment', payload) + + handler.assert_not_called() diff --git a/src/arrow/dev/archery/archery/tests/test_cli.py b/src/arrow/dev/archery/archery/tests/test_cli.py new file mode 100644 index 000000000..3891a2c28 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/test_cli.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path +from unittest.mock import patch + +from click.testing import CliRunner + +from archery.cli import archery + + +@patch("archery.linking.check_dynamic_library_dependencies") +def test_linking_check_dependencies(fn): + args = [ + "linking", + "check-dependencies", + "-a", "libarrow", + "-d", "libcurl", + "somelib.so" + ] + result = CliRunner().invoke(archery, args) + assert result.exit_code == 0 + fn.assert_called_once_with( + Path('somelib.so'), allowed={'libarrow'}, disallowed={'libcurl'} + ) diff --git a/src/arrow/dev/archery/archery/tests/test_release.py b/src/arrow/dev/archery/archery/tests/test_release.py new file mode 100644 index 000000000..75aac8921 --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/test_release.py @@ -0,0 +1,333 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from archery.release import ( + Release, MajorRelease, MinorRelease, PatchRelease, + Jira, Version, Issue, CommitTitle, Commit +) +from archery.testing import DotDict + + +# subset of issues per revision +_issues = { + "1.0.1": [ + Issue("ARROW-9684", type="Bug", summary="[C++] Title"), + Issue("ARROW-9667", type="New Feature", summary="[Crossbow] Title"), + Issue("ARROW-9659", type="Bug", summary="[C++] Title"), + Issue("ARROW-9644", type="Bug", summary="[C++][Dataset] Title"), + Issue("ARROW-9643", type="Bug", summary="[C++] Title"), + Issue("ARROW-9609", type="Bug", summary="[C++] Title"), + Issue("ARROW-9606", type="Bug", summary="[C++][Dataset] Title") + ], + "1.0.0": [ + Issue("ARROW-300", type="New Feature", summary="[Format] Title"), + Issue("ARROW-4427", type="Task", summary="[Doc] Title"), + Issue("ARROW-5035", type="Improvement", summary="[C#] Title"), + Issue("ARROW-8473", type="Bug", summary="[Rust] Title"), + Issue("ARROW-8472", type="Bug", summary="[Go][Integration] Title"), + Issue("ARROW-8471", type="Bug", summary="[C++][Integration] Title"), + Issue("ARROW-8974", type="Improvement", summary="[C++] Title"), + Issue("ARROW-8973", type="New Feature", summary="[Java] Title") + ], + "0.17.1": [ + Issue("ARROW-8684", type="Bug", summary="[Python] Title"), + Issue("ARROW-8657", type="Bug", summary="[C++][Parquet] Title"), + Issue("ARROW-8641", type="Bug", summary="[Python] Title"), + Issue("ARROW-8609", type="Bug", summary="[C++] Title"), + ], + "0.17.0": [ + Issue("ARROW-2882", type="New Feature", summary="[C++][Python] Title"), + Issue("ARROW-2587", type="Bug", summary="[Python] Title"), + Issue("ARROW-2447", type="Improvement", summary="[C++] Title"), + Issue("ARROW-2255", type="Bug", summary="[Integration] Title"), + Issue("ARROW-1907", type="Bug", summary="[C++/Python] Title"), + Issue("ARROW-1636", type="New Feature", summary="[Format] Title") + ] +} + + +class FakeJira(Jira): + + def __init__(self): + pass + + def project_versions(self, project='ARROW'): + return [ + Version.parse("3.0.0", released=False), + Version.parse("2.0.0", released=False), + Version.parse("1.1.0", released=False), + Version.parse("1.0.1", released=False), + Version.parse("1.0.0", released=True), + Version.parse("0.17.1", released=True), + Version.parse("0.17.0", released=True), + Version.parse("0.16.0", released=True), + Version.parse("0.15.2", released=True), + Version.parse("0.15.1", released=True), + Version.parse("0.15.0", released=True), + ] + + def project_issues(self, version, project='ARROW'): + return _issues[str(version)] + + +@pytest.fixture +def fake_jira(): + return FakeJira() + + +def test_version(fake_jira): + v = Version.parse("1.2.5") + assert str(v) == "1.2.5" + assert v.major == 1 + assert v.minor == 2 + assert v.patch == 5 + assert v.released is False + assert v.release_date is None + + v = Version.parse("1.0.0", released=True, release_date="2020-01-01") + assert str(v) == "1.0.0" + assert v.major == 1 + assert v.minor == 0 + assert v.patch == 0 + assert v.released is True + assert v.release_date == "2020-01-01" + + +def test_issue(fake_jira): + i = Issue("ARROW-1234", type='Bug', summary="title") + assert i.key == "ARROW-1234" + assert i.type == "Bug" + assert i.summary == "title" + assert i.project == "ARROW" + assert i.number == 1234 + + i = Issue("PARQUET-1111", type='Improvement', summary="another title") + assert i.key == "PARQUET-1111" + assert i.type == "Improvement" + assert i.summary == "another title" + assert i.project == "PARQUET" + assert i.number == 1111 + + fake_jira_issue = DotDict({ + 'key': 'ARROW-2222', + 'fields': { + 'issuetype': { + 'name': 'Feature' + }, + 'summary': 'Issue title' + } + }) + i = Issue.from_jira(fake_jira_issue) + assert i.key == "ARROW-2222" + assert i.type == "Feature" + assert i.summary == "Issue title" + assert i.project == "ARROW" + assert i.number == 2222 + + +def test_commit_title(): + t = CommitTitle.parse( + "ARROW-9598: [C++][Parquet] Fix writing nullable structs" + ) + assert t.project == "ARROW" + assert t.issue == "ARROW-9598" + assert t.components == ["C++", "Parquet"] + assert t.summary == "Fix writing nullable structs" + + t = CommitTitle.parse( + "ARROW-8002: [C++][Dataset][R] Support partitioned dataset writing" + ) + assert t.project == "ARROW" + assert t.issue == "ARROW-8002" + assert t.components == ["C++", "Dataset", "R"] + assert t.summary == "Support partitioned dataset writing" + + t = CommitTitle.parse( + "ARROW-9600: [Rust][Arrow] pin older version of proc-macro2 during " + "build" + ) + assert t.project == "ARROW" + assert t.issue == "ARROW-9600" + assert t.components == ["Rust", "Arrow"] + assert t.summary == "pin older version of proc-macro2 during build" + + t = CommitTitle.parse("[Release] Update versions for 1.0.0") + assert t.project is None + assert t.issue is None + assert t.components == ["Release"] + assert t.summary == "Update versions for 1.0.0" + + t = CommitTitle.parse("[Python][Doc] Fix rst role dataset.rst (#7725)") + assert t.project is None + assert t.issue is None + assert t.components == ["Python", "Doc"] + assert t.summary == "Fix rst role dataset.rst (#7725)" + + t = CommitTitle.parse( + "PARQUET-1882: [C++] Buffered Reads should allow for 0 length" + ) + assert t.project == 'PARQUET' + assert t.issue == 'PARQUET-1882' + assert t.components == ["C++"] + assert t.summary == "Buffered Reads should allow for 0 length" + + t = CommitTitle.parse( + "ARROW-9340 [R] Use CRAN version of decor package " + "\nsomething else\n" + "\nwhich should be truncated" + ) + assert t.project == 'ARROW' + assert t.issue == 'ARROW-9340' + assert t.components == ["R"] + assert t.summary == "Use CRAN version of decor package " + + +def test_release_basics(fake_jira): + r = Release.from_jira("1.0.0", jira=fake_jira) + assert isinstance(r, MajorRelease) + assert r.is_released is True + assert r.branch == 'master' + assert r.tag == 'apache-arrow-1.0.0' + + r = Release.from_jira("1.1.0", jira=fake_jira) + assert isinstance(r, MinorRelease) + assert r.is_released is False + assert r.branch == 'maint-1.x.x' + assert r.tag == 'apache-arrow-1.1.0' + + # minor releases before 1.0 are treated as major releases + r = Release.from_jira("0.17.0", jira=fake_jira) + assert isinstance(r, MajorRelease) + assert r.is_released is True + assert r.branch == 'master' + assert r.tag == 'apache-arrow-0.17.0' + + r = Release.from_jira("0.17.1", jira=fake_jira) + assert isinstance(r, PatchRelease) + assert r.is_released is True + assert r.branch == 'maint-0.17.x' + assert r.tag == 'apache-arrow-0.17.1' + + +def test_previous_and_next_release(fake_jira): + r = Release.from_jira("3.0.0", jira=fake_jira) + assert isinstance(r.previous, MajorRelease) + assert r.previous.version == Version.parse("2.0.0") + with pytest.raises(ValueError, match="There is no upcoming release set"): + assert r.next + + r = Release.from_jira("2.0.0", jira=fake_jira) + assert isinstance(r.previous, MajorRelease) + assert isinstance(r.next, MajorRelease) + assert r.previous.version == Version.parse("1.0.0") + assert r.next.version == Version.parse("3.0.0") + + r = Release.from_jira("1.1.0", jira=fake_jira) + assert isinstance(r.previous, MajorRelease) + assert isinstance(r.next, MajorRelease) + assert r.previous.version == Version.parse("1.0.0") + assert r.next.version == Version.parse("2.0.0") + + r = Release.from_jira("1.0.0", jira=fake_jira) + assert isinstance(r.next, MajorRelease) + assert isinstance(r.previous, MajorRelease) + assert r.previous.version == Version.parse("0.17.0") + assert r.next.version == Version.parse("2.0.0") + + r = Release.from_jira("0.17.0", jira=fake_jira) + assert isinstance(r.previous, MajorRelease) + assert r.previous.version == Version.parse("0.16.0") + + r = Release.from_jira("0.15.2", jira=fake_jira) + assert isinstance(r.previous, PatchRelease) + assert isinstance(r.next, MajorRelease) + assert r.previous.version == Version.parse("0.15.1") + assert r.next.version == Version.parse("0.16.0") + + r = Release.from_jira("0.15.1", jira=fake_jira) + assert isinstance(r.previous, MajorRelease) + assert isinstance(r.next, PatchRelease) + assert r.previous.version == Version.parse("0.15.0") + assert r.next.version == Version.parse("0.15.2") + + +def test_release_issues(fake_jira): + # major release issues + r = Release.from_jira("1.0.0", jira=fake_jira) + assert r.issues.keys() == set([ + "ARROW-300", + "ARROW-4427", + "ARROW-5035", + "ARROW-8473", + "ARROW-8472", + "ARROW-8471", + "ARROW-8974", + "ARROW-8973" + ]) + # minor release issues + r = Release.from_jira("0.17.0", jira=fake_jira) + assert r.issues.keys() == set([ + "ARROW-2882", + "ARROW-2587", + "ARROW-2447", + "ARROW-2255", + "ARROW-1907", + "ARROW-1636", + ]) + # patch release issues + r = Release.from_jira("1.0.1", jira=fake_jira) + assert r.issues.keys() == set([ + "ARROW-9684", + "ARROW-9667", + "ARROW-9659", + "ARROW-9644", + "ARROW-9643", + "ARROW-9609", + "ARROW-9606" + ]) + + +@pytest.mark.parametrize(('version', 'ncommits'), [ + ("1.0.0", 771), + ("0.17.1", 27), + ("0.17.0", 569), + ("0.15.1", 41) +]) +def test_release_commits(fake_jira, version, ncommits): + r = Release.from_jira(version, jira=fake_jira) + assert len(r.commits) == ncommits + for c in r.commits: + assert isinstance(c, Commit) + assert isinstance(c.title, CommitTitle) + assert c.url.endswith(c.hexsha) + + +def test_maintenance_patch_selection(fake_jira): + r = Release.from_jira("0.17.1", jira=fake_jira) + + shas_to_pick = [ + c.hexsha for c in r.commits_to_pick(exclude_already_applied=False) + ] + expected = [ + '8939b4bd446ee406d5225c79d563a27d30fd7d6d', + 'bcef6c95a324417e85e0140f9745d342cd8784b3', + '6002ec388840de5622e39af85abdc57a2cccc9b2', + '9123dadfd123bca7af4eaa9455f5b0d1ca8b929d', + ] + assert shas_to_pick == expected diff --git a/src/arrow/dev/archery/archery/tests/test_testing.py b/src/arrow/dev/archery/archery/tests/test_testing.py new file mode 100644 index 000000000..117b9288d --- /dev/null +++ b/src/arrow/dev/archery/archery/tests/test_testing.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import subprocess + +import pytest + +from archery.testing import PartialEnv, assert_subprocess_calls + + +def test_partial_env(): + assert PartialEnv(a=1, b=2) == {'a': 1, 'b': 2, 'c': 3} + assert PartialEnv(a=1) == {'a': 1, 'b': 2, 'c': 3} + assert PartialEnv(a=1, b=2) == {'a': 1, 'b': 2} + assert PartialEnv(a=1, b=2) != {'b': 2, 'c': 3} + assert PartialEnv(a=1, b=2) != {'a': 1, 'c': 3} + + +def test_assert_subprocess_calls(): + expected_calls = [ + "echo Hello", + ["echo", "World"] + ] + with assert_subprocess_calls(expected_calls): + subprocess.run(['echo', 'Hello']) + subprocess.run(['echo', 'World']) + + expected_env = PartialEnv( + CUSTOM_ENV_A='a', + CUSTOM_ENV_C='c' + ) + with assert_subprocess_calls(expected_calls, env=expected_env): + env = { + 'CUSTOM_ENV_A': 'a', + 'CUSTOM_ENV_B': 'b', + 'CUSTOM_ENV_C': 'c' + } + subprocess.run(['echo', 'Hello'], env=env) + subprocess.run(['echo', 'World'], env=env) + + with pytest.raises(AssertionError): + with assert_subprocess_calls(expected_calls, env=expected_env): + env = { + 'CUSTOM_ENV_B': 'b', + 'CUSTOM_ENV_C': 'c' + } + subprocess.run(['echo', 'Hello'], env=env) + subprocess.run(['echo', 'World'], env=env) diff --git a/src/arrow/dev/archery/archery/utils/__init__.py b/src/arrow/dev/archery/archery/utils/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/src/arrow/dev/archery/archery/utils/cache.py b/src/arrow/dev/archery/archery/utils/cache.py new file mode 100644 index 000000000..d92c5f32e --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/cache.py @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path +import os +from urllib.request import urlopen + +from .logger import logger + +ARCHERY_CACHE_DIR = Path.home() / ".cache" / "archery" + + +class Cache: + """ Cache stores downloaded objects, notably apache-rat.jar. """ + + def __init__(self, path=ARCHERY_CACHE_DIR): + self.root = path + + if not path.exists(): + os.makedirs(path) + + def key_path(self, key): + """ Return the full path of a key. """ + return self.root/key + + def get(self, key): + """ Return the full path of a key if cached, None otherwise. """ + path = self.key_path(key) + return path if path.exists() else None + + def delete(self, key): + """ Remove a key (and the file) from the cache. """ + path = self.get(key) + if path: + path.unlink() + + def get_or_insert(self, key, create): + """ + Get or Insert a key from the cache. If the key is not found, the + `create` closure will be evaluated. + + The `create` closure takes a single parameter, the path where the + object should be store. The file should only be created upon success. + """ + path = self.key_path(key) + + if not path.exists(): + create(path) + + return path + + def get_or_insert_from_url(self, key, url): + """ + Get or Insert a key from the cache. If the key is not found, the file + is downloaded from `url`. + """ + def download(path): + """ Tiny wrapper that download a file and save as key. """ + logger.debug("Downloading {} as {}".format(url, path)) + conn = urlopen(url) + # Ensure the download is completed before writing to disks. + content = conn.read() + with open(path, "wb") as path_fd: + path_fd.write(content) + + return self.get_or_insert(key, download) diff --git a/src/arrow/dev/archery/archery/utils/cli.py b/src/arrow/dev/archery/archery/utils/cli.py new file mode 100644 index 000000000..701abe925 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/cli.py @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import importlib + +import click + +from .source import ArrowSources, InvalidArrowSource + + +class ArrowBool(click.types.BoolParamType): + """ + ArrowBool supports the 'ON' and 'OFF' values on top of the values + supported by BoolParamType. This is convenient to port script which exports + CMake options variables. + """ + name = "boolean" + + def convert(self, value, param, ctx): + if isinstance(value, str): + lowered = value.lower() + if lowered == "on": + return True + elif lowered == "off": + return False + + return super().convert(value, param, ctx) + + +def validate_arrow_sources(ctx, param, src): + """ + Ensure a directory contains Arrow cpp sources. + """ + try: + return ArrowSources.find(src) + except InvalidArrowSource as e: + raise click.BadParameter(str(e)) + + +def add_optional_command(name, module, function, parent): + try: + module = importlib.import_module(module, package="archery") + command = getattr(module, function) + except ImportError as exc: + error_message = exc.name + + @parent.command( + name, + context_settings={ + "allow_extra_args": True, + "ignore_unknown_options": True, + } + ) + def command(): + raise click.ClickException( + f"Couldn't import command `{name}` due to {error_message}" + ) + else: + parent.add_command(command) diff --git a/src/arrow/dev/archery/archery/utils/cmake.py b/src/arrow/dev/archery/archery/utils/cmake.py new file mode 100644 index 000000000..f93895b1a --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/cmake.py @@ -0,0 +1,215 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import re +from shutil import rmtree, which + +from .command import Command, default_bin + + +class CMake(Command): + def __init__(self, cmake_bin=None): + self.bin = default_bin(cmake_bin, "cmake") + + @staticmethod + def default_generator(): + """ Infer default generator. + + Gives precedence to ninja if there exists an executable named `ninja` + in the search path. + """ + found_ninja = which("ninja") + return "Ninja" if found_ninja else "Unix Makefiles" + + +cmake = CMake() + + +class CMakeDefinition: + """ CMakeDefinition captures the cmake invocation arguments. + + It allows creating build directories with the same definition, e.g. + ``` + build_1 = cmake_def.build("/tmp/build-1") + build_2 = cmake_def.build("/tmp/build-2") + + ... + + build1.all() + build2.all() + """ + + def __init__(self, source, build_type="release", generator=None, + definitions=None, env=None): + """ Initialize a CMakeDefinition + + Parameters + ---------- + source : str + Source directory where the top-level CMakeLists.txt is + located. This is usually the root of the project. + generator : str, optional + definitions: list(str), optional + env : dict(str,str), optional + Environment to use when invoking cmake. This can be required to + work around cmake deficiencies, e.g. CC and CXX. + """ + self.source = os.path.abspath(source) + self.build_type = build_type + self.generator = generator if generator else cmake.default_generator() + self.definitions = definitions if definitions else [] + self.env = env + + @property + def arguments(self): + """" Return the arguments to cmake invocation. """ + arguments = [ + "-G{}".format(self.generator), + ] + self.definitions + [ + self.source + ] + return arguments + + def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs): + """ Invoke cmake into a build directory. + + Parameters + ---------- + build_dir : str + Directory in which the CMake build will be instantiated. + force : bool + If the build folder exists, delete it before. Otherwise if it's + present, an error will be returned. + """ + if os.path.exists(build_dir): + # Extra safety to ensure we're deleting a build folder. + if not CMakeBuild.is_build_dir(build_dir): + raise FileExistsError( + "{} is not a cmake build".format(build_dir) + ) + if not force: + raise FileExistsError( + "{} exists use force=True".format(build_dir) + ) + rmtree(build_dir) + + os.mkdir(build_dir) + + cmd_kwargs = cmd_kwargs if cmd_kwargs else {} + cmake(*self.arguments, cwd=build_dir, env=self.env, **cmd_kwargs) + return CMakeBuild(build_dir, self.build_type, definition=self, + **kwargs) + + def __repr__(self): + return "CMakeDefinition[source={}]".format(self.source) + + +CMAKE_BUILD_TYPE_RE = re.compile("CMAKE_BUILD_TYPE:STRING=([a-zA-Z]+)") + + +class CMakeBuild(CMake): + """ CMakeBuild represents a build directory initialized by cmake. + + The build instance can be used to build/test/install. It alleviates the + user to know which generator is used. + """ + + def __init__(self, build_dir, build_type, definition=None): + """ Initialize a CMakeBuild. + + The caller must ensure that cmake was invoked in the build directory. + + Parameters + ---------- + definition : CMakeDefinition + The definition to build from. + build_dir : str + The build directory to setup into. + """ + assert CMakeBuild.is_build_dir(build_dir) + super().__init__() + self.build_dir = os.path.abspath(build_dir) + self.build_type = build_type + self.definition = definition + + @property + def binaries_dir(self): + return os.path.join(self.build_dir, self.build_type) + + def run(self, *argv, verbose=False, **kwargs): + cmake_args = ["--build", self.build_dir, "--"] + extra = [] + if verbose: + extra.append("-v" if self.bin.endswith("ninja") else "VERBOSE=1") + # Commands must be ran under the build directory + return super().run(*cmake_args, *extra, + *argv, **kwargs, cwd=self.build_dir) + + def all(self): + return self.run("all") + + def clean(self): + return self.run("clean") + + def install(self): + return self.run("install") + + def test(self): + return self.run("test") + + @staticmethod + def is_build_dir(path): + """ Indicate if a path is CMake build directory. + + This method only checks for the existence of paths and does not do any + validation whatsoever. + """ + cmake_cache = os.path.join(path, "CMakeCache.txt") + cmake_files = os.path.join(path, "CMakeFiles") + return os.path.exists(cmake_cache) and os.path.exists(cmake_files) + + @staticmethod + def from_path(path): + """ Instantiate a CMakeBuild from a path. + + This is used to recover from an existing physical directory (created + with or without CMakeBuild). + + Note that this method is not idempotent as the original definition will + be lost. Only build_type is recovered. + """ + if not CMakeBuild.is_build_dir(path): + raise ValueError("Not a valid CMakeBuild path: {}".format(path)) + + build_type = None + # Infer build_type by looking at CMakeCache.txt and looking for a magic + # definition + cmake_cache_path = os.path.join(path, "CMakeCache.txt") + with open(cmake_cache_path, "r") as cmake_cache: + candidates = CMAKE_BUILD_TYPE_RE.findall(cmake_cache.read()) + build_type = candidates[0].lower() if candidates else "release" + + return CMakeBuild(path, build_type) + + def __repr__(self): + return ("CMakeBuild[" + "build = {}," + "build_type = {}," + "definition = {}]".format(self.build_dir, + self.build_type, + self.definition)) diff --git a/src/arrow/dev/archery/archery/utils/command.py b/src/arrow/dev/archery/archery/utils/command.py new file mode 100644 index 000000000..f655e2ef2 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/command.py @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import shlex +import shutil +import subprocess + +from .logger import logger, ctx + + +def default_bin(name, default): + assert(default) + env_name = "ARCHERY_{0}_BIN".format(default.upper()) + return name if name else os.environ.get(env_name, default) + + +# Decorator running a command and returning stdout +class capture_stdout: + def __init__(self, strip=False, listify=False): + self.strip = strip + self.listify = listify + + def __call__(self, f): + def strip_it(x): + return x.strip() if self.strip else x + + def list_it(x): + return x.decode('utf-8').splitlines() if self.listify else x + + def wrapper(*argv, **kwargs): + # Ensure stdout is captured + kwargs["stdout"] = subprocess.PIPE + return list_it(strip_it(f(*argv, **kwargs).stdout)) + return wrapper + + +class Command: + """ + A runnable command. + + Class inheriting from the Command class must provide the bin + property/attribute. + """ + + def __init__(self, bin): + self.bin = bin + + def run(self, *argv, **kwargs): + assert hasattr(self, "bin") + invocation = shlex.split(self.bin) + invocation.extend(argv) + + for key in ["stdout", "stderr"]: + # Preserve caller intention, otherwise silence + if key not in kwargs and ctx.quiet: + kwargs[key] = subprocess.PIPE + + # Prefer safe by default + if "check" not in kwargs: + kwargs["check"] = True + + logger.debug("Executing `{}`".format(invocation)) + return subprocess.run(invocation, **kwargs) + + @property + def available(self): + """ + Indicate if the command binary is found in PATH. + """ + binary = shlex.split(self.bin)[0] + return shutil.which(binary) is not None + + def __call__(self, *argv, **kwargs): + return self.run(*argv, **kwargs) + + +class CommandStackMixin: + def run(self, *argv, **kwargs): + stacked_args = self.argv + argv + return super(CommandStackMixin, self).run(*stacked_args, **kwargs) + + +class Bash(Command): + def __init__(self, bash_bin=None): + self.bin = default_bin(bash_bin, "bash") diff --git a/src/arrow/dev/archery/archery/utils/git.py b/src/arrow/dev/archery/archery/utils/git.py new file mode 100644 index 000000000..798bc5d70 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/git.py @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .command import Command, capture_stdout, default_bin +from ..compat import _stringify_path + + +# Decorator prepending argv with the git sub-command found with the method +# name. +def git_cmd(fn): + # function name is the subcommand + sub_cmd = fn.__name__.replace("_", "-") + + def wrapper(self, *argv, **kwargs): + return fn(self, sub_cmd, *argv, **kwargs) + return wrapper + + +class Git(Command): + def __init__(self, git_bin=None): + self.bin = default_bin(git_bin, "git") + + def run_cmd(self, cmd, *argv, git_dir=None, **kwargs): + """ Inject flags before sub-command in argv. """ + opts = [] + if git_dir is not None: + opts.extend(["-C", _stringify_path(git_dir)]) + + return self.run(*opts, cmd, *argv, **kwargs) + + @capture_stdout(strip=False) + @git_cmd + def archive(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @git_cmd + def clone(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @git_cmd + def fetch(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @git_cmd + def checkout(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + def dirty(self, **kwargs): + return len(self.status("--short", **kwargs)) > 0 + + @git_cmd + def log(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @capture_stdout(strip=True, listify=True) + @git_cmd + def ls_files(self, *argv, listify=False, **kwargs): + stdout = self.run_cmd(*argv, **kwargs) + return stdout + + @capture_stdout(strip=True) + @git_cmd + def rev_parse(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @capture_stdout(strip=True) + @git_cmd + def status(self, *argv, **kwargs): + return self.run_cmd(*argv, **kwargs) + + @capture_stdout(strip=True) + def head(self, **kwargs): + """ Return commit pointed by HEAD. """ + return self.rev_parse("HEAD", **kwargs) + + @capture_stdout(strip=True) + def current_branch(self, **kwargs): + return self.rev_parse("--abbrev-ref", "HEAD", **kwargs) + + def repository_root(self, git_dir=None, **kwargs): + """ Locates the repository's root path from a subdirectory. """ + stdout = self.rev_parse("--show-toplevel", git_dir=git_dir, **kwargs) + return stdout.decode('utf-8') + + +git = Git() diff --git a/src/arrow/dev/archery/archery/utils/lint.py b/src/arrow/dev/archery/archery/utils/lint.py new file mode 100644 index 000000000..d95bfeea3 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/lint.py @@ -0,0 +1,429 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import fnmatch +import gzip +import os +from pathlib import Path + +import click + +from .command import Bash, Command, default_bin +from .cmake import CMake +from .git import git +from .logger import logger +from ..lang.cpp import CppCMakeDefinition, CppConfiguration +from ..lang.python import Autopep8, Flake8, NumpyDoc +from .rat import Rat, exclusion_from_globs +from .tmpdir import tmpdir + + +_archery_install_msg = ( + "Please install archery using: `pip install -e dev/archery[lint]`. " +) + + +class LintValidationException(Exception): + pass + + +class LintResult: + def __init__(self, success, reason=None): + self.success = success + + def ok(self): + if not self.success: + raise LintValidationException + + @staticmethod + def from_cmd(command_result): + return LintResult(command_result.returncode == 0) + + +def cpp_linter(src, build_dir, clang_format=True, cpplint=True, + clang_tidy=False, iwyu=False, iwyu_all=False, + fix=False): + """ Run clang-format, cpplint and clang-tidy on cpp/ codebase. """ + logger.info("Running C++ linters") + + cmake = CMake() + if not cmake.available: + logger.error("cpp linter requested but cmake binary not found.") + return + + # A cmake build directory is required to populate `compile_commands.json` + # which in turn is required by clang-tidy. It also provides a convenient + # way to hide clang-format/clang-tidy invocation via the Generate + # (ninja/make) targets. + + # ARROW_LINT_ONLY exits early but ignore building compile_command.json + lint_only = not (iwyu or clang_tidy) + cmake_args = {"with_python": False, "with_lint_only": lint_only} + cmake_def = CppCMakeDefinition(src.cpp, CppConfiguration(**cmake_args)) + + build = cmake_def.build(build_dir) + if clang_format: + target = "format" if fix else "check-format" + yield LintResult.from_cmd(build.run(target, check=False)) + + if cpplint: + yield LintResult.from_cmd(build.run("lint", check=False)) + yield LintResult.from_cmd(build.run("lint_cpp_cli", check=False)) + + if clang_tidy: + yield LintResult.from_cmd(build.run("check-clang-tidy", check=False)) + + if iwyu: + if iwyu_all: + iwyu_cmd = "iwyu-all" + else: + iwyu_cmd = "iwyu" + yield LintResult.from_cmd(build.run(iwyu_cmd, check=False)) + + +class CMakeFormat(Command): + + def __init__(self, paths, cmake_format_bin=None): + self.check_version() + self.bin = default_bin(cmake_format_bin, "cmake-format") + self.paths = paths + + @classmethod + def from_patterns(cls, base_path, include_patterns, exclude_patterns): + paths = { + str(path.as_posix()) + for pattern in include_patterns + for path in base_path.glob(pattern) + } + for pattern in exclude_patterns: + pattern = (base_path / pattern).as_posix() + paths -= set(fnmatch.filter(paths, str(pattern))) + return cls(paths) + + @staticmethod + def check_version(): + try: + # cmake_format is part of the cmakelang package + import cmakelang + except ImportError: + raise ImportError( + + ) + # pin a specific version of cmake_format, must be updated in setup.py + if cmakelang.__version__ != "0.6.13": + raise LintValidationException( + f"Wrong version of cmake_format is detected. " + f"{_archery_install_msg}" + ) + + def check(self): + return self.run("-l", "error", "--check", *self.paths, check=False) + + def fix(self): + return self.run("--in-place", *self.paths, check=False) + + +def cmake_linter(src, fix=False): + """ + Run cmake-format on all CMakeFiles.txt + """ + logger.info("Running cmake-format linters") + + cmake_format = CMakeFormat.from_patterns( + src.path, + include_patterns=[ + 'ci/**/*.cmake', + 'cpp/CMakeLists.txt', + 'cpp/src/**/CMakeLists.txt', + 'cpp/cmake_modules/*.cmake', + 'go/**/CMakeLists.txt', + 'java/**/CMakeLists.txt', + 'matlab/**/CMakeLists.txt', + 'python/CMakeLists.txt', + ], + exclude_patterns=[ + 'cpp/cmake_modules/FindNumPy.cmake', + 'cpp/cmake_modules/FindPythonLibsNew.cmake', + 'cpp/cmake_modules/UseCython.cmake', + 'cpp/src/arrow/util/config.h.cmake', + ] + ) + method = cmake_format.fix if fix else cmake_format.check + + yield LintResult.from_cmd(method()) + + +def python_linter(src, fix=False): + """Run Python linters on python/pyarrow, python/examples, setup.py + and dev/. """ + setup_py = os.path.join(src.python, "setup.py") + setup_cfg = os.path.join(src.python, "setup.cfg") + + logger.info("Running Python formatter (autopep8)") + + autopep8 = Autopep8() + if not autopep8.available: + logger.error( + "Python formatter requested but autopep8 binary not found. " + f"{_archery_install_msg}") + return + + # Gather files for autopep8 + patterns = ["python/pyarrow/**/*.py", + "python/pyarrow/**/*.pyx", + "python/pyarrow/**/*.pxd", + "python/pyarrow/**/*.pxi", + "python/examples/**/*.py", + "dev/archery/**/*.py"] + files = [setup_py] + for pattern in patterns: + files += list(map(str, Path(src.path).glob(pattern))) + + args = ['--global-config', setup_cfg, '--ignore-local-config'] + if fix: + args += ['-j0', '--in-place'] + args += sorted(files) + yield LintResult.from_cmd(autopep8(*args)) + else: + # XXX `-j0` doesn't work well with `--exit-code`, so instead + # we capture the diff and check whether it's empty + # (https://github.com/hhatto/autopep8/issues/543) + args += ['-j0', '--diff'] + args += sorted(files) + diff = autopep8.run_captured(*args) + if diff: + print(diff.decode('utf8')) + yield LintResult(success=False) + else: + yield LintResult(success=True) + + # Run flake8 after autopep8 (the latter may have modified some files) + logger.info("Running Python linter (flake8)") + + flake8 = Flake8() + if not flake8.available: + logger.error( + "Python linter requested but flake8 binary not found. " + f"{_archery_install_msg}") + return + + flake8_exclude = ['.venv*'] + + yield LintResult.from_cmd( + flake8("--extend-exclude=" + ','.join(flake8_exclude), + setup_py, src.pyarrow, os.path.join(src.python, "examples"), + src.dev, check=False)) + config = os.path.join(src.python, ".flake8.cython") + yield LintResult.from_cmd( + flake8("--config=" + config, src.pyarrow, check=False)) + + +def python_numpydoc(symbols=None, allow_rules=None, disallow_rules=None): + """Run numpydoc linter on python. + + Pyarrow must be available for import. + """ + logger.info("Running Python docstring linters") + # by default try to run on all pyarrow package + symbols = symbols or { + 'pyarrow', + 'pyarrow.compute', + 'pyarrow.csv', + 'pyarrow.dataset', + 'pyarrow.feather', + 'pyarrow.flight', + 'pyarrow.fs', + 'pyarrow.gandiva', + 'pyarrow.ipc', + 'pyarrow.json', + 'pyarrow.orc', + 'pyarrow.parquet', + 'pyarrow.plasma', + 'pyarrow.types', + } + try: + numpydoc = NumpyDoc(symbols) + except RuntimeError as e: + logger.error(str(e)) + yield LintResult(success=False) + return + + results = numpydoc.validate( + # limit the validation scope to the pyarrow package + from_package='pyarrow', + allow_rules=allow_rules, + disallow_rules=disallow_rules + ) + + if len(results) == 0: + yield LintResult(success=True) + return + + number_of_violations = 0 + for obj, result in results: + errors = result['errors'] + + # inspect doesn't play nice with cython generated source code, + # to use a hacky way to represent a proper __qualname__ + doc = getattr(obj, '__doc__', '') + name = getattr(obj, '__name__', '') + qualname = getattr(obj, '__qualname__', '') + module = getattr(obj, '__module__', '') + instance = getattr(obj, '__self__', '') + if instance: + klass = instance.__class__.__name__ + else: + klass = '' + + try: + cython_signature = doc.splitlines()[0] + except Exception: + cython_signature = '' + + desc = '.'.join(filter(None, [module, klass, qualname or name])) + + click.echo() + click.echo(click.style(desc, bold=True, fg='yellow')) + if cython_signature: + qualname_with_signature = '.'.join([module, cython_signature]) + click.echo( + click.style( + '-> {}'.format(qualname_with_signature), + fg='yellow' + ) + ) + + for error in errors: + number_of_violations += 1 + click.echo('{}: {}'.format(*error)) + + msg = 'Total number of docstring violations: {}'.format( + number_of_violations + ) + click.echo() + click.echo(click.style(msg, fg='red')) + + yield LintResult(success=False) + + +def rat_linter(src, root): + """Run apache-rat license linter.""" + logger.info("Running apache-rat linter") + + if src.git_dirty: + logger.warn("Due to the usage of git-archive, uncommitted files will" + " not be checked for rat violations. ") + + exclusion = exclusion_from_globs( + os.path.join(src.dev, "release", "rat_exclude_files.txt")) + + # Creates a git-archive of ArrowSources, apache-rat expects a gzip + # compressed tar archive. + archive_path = os.path.join(root, "apache-arrow.tar.gz") + src.archive(archive_path, compressor=gzip.compress) + report = Rat().report(archive_path) + + violations = list(report.validate(exclusion=exclusion)) + for violation in violations: + print("apache-rat license violation: {}".format(violation)) + + yield LintResult(len(violations) == 0) + + +def r_linter(src): + """Run R linter.""" + logger.info("Running R linter") + r_lint_sh = os.path.join(src.r, "lint.sh") + yield LintResult.from_cmd(Bash().run(r_lint_sh, check=False)) + + +class Hadolint(Command): + def __init__(self, hadolint_bin=None): + self.bin = default_bin(hadolint_bin, "hadolint") + + +def is_docker_image(path): + dirname = os.path.dirname(path) + filename = os.path.basename(path) + + excluded = dirname.startswith( + "dev") or dirname.startswith("python/manylinux") + + return filename.startswith("Dockerfile") and not excluded + + +def docker_linter(src): + """Run Hadolint docker linter.""" + logger.info("Running Docker linter") + + hadolint = Hadolint() + + if not hadolint.available: + logger.error( + "hadolint linter requested but hadolint binary not found.") + return + + for path in git.ls_files(git_dir=src.path): + if is_docker_image(path): + yield LintResult.from_cmd(hadolint.run(path, check=False, + cwd=src.path)) + + +def linter(src, fix=False, *, clang_format=False, cpplint=False, + clang_tidy=False, iwyu=False, iwyu_all=False, + python=False, numpydoc=False, cmake_format=False, rat=False, + r=False, docker=False): + """Run all linters.""" + with tmpdir(prefix="arrow-lint-") as root: + build_dir = os.path.join(root, "cpp-build") + + # Linters yield LintResult without raising exceptions on failure. + # This allows running all linters in one pass and exposing all + # errors to the user. + results = [] + + if clang_format or cpplint or clang_tidy or iwyu: + results.extend(cpp_linter(src, build_dir, + clang_format=clang_format, + cpplint=cpplint, + clang_tidy=clang_tidy, + iwyu=iwyu, + iwyu_all=iwyu_all, + fix=fix)) + + if python: + results.extend(python_linter(src, fix=fix)) + + if numpydoc: + results.extend(python_numpydoc()) + + if cmake_format: + results.extend(cmake_linter(src, fix=fix)) + + if rat: + results.extend(rat_linter(src, root)) + + if r: + results.extend(r_linter(src)) + + if docker: + results.extend(docker_linter(src)) + + # Raise error if one linter failed, ensuring calling code can exit with + # non-zero. + for result in results: + result.ok() diff --git a/src/arrow/dev/archery/archery/utils/logger.py b/src/arrow/dev/archery/archery/utils/logger.py new file mode 100644 index 000000000..9d0feda88 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/logger.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging + +""" Global logger. """ +logger = logging.getLogger("archery") + + +class LoggingContext: + def __init__(self, quiet=False): + self.quiet = quiet + + +ctx = LoggingContext() diff --git a/src/arrow/dev/archery/archery/utils/maven.py b/src/arrow/dev/archery/archery/utils/maven.py new file mode 100644 index 000000000..96a3bf5bd --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/maven.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from .command import Command, default_bin + + +class Maven(Command): + def __init__(self, maven_bin=None): + self.bin = default_bin(maven_bin, "mvn") + + +maven = Maven() + + +class MavenDefinition: + """ MavenDefinition captures the maven invocation arguments. + + It allows creating build directories with the same definition, e.g. + ``` + build_1 = maven_def.build("/tmp/build-1") + build_2 = maven_def.build("/tmp/build-2") + + ... + + build1.install() + build2.install() + """ + + def __init__(self, source, build_definitions=None, + benchmark_definitions=None, env=None): + """ Initialize a MavenDefinition + + Parameters + ---------- + source : str + Source directory where the top-level pom.xml is + located. This is usually the root of the project. + build_definitions: list(str), optional + benchmark_definitions: list(str), optional + """ + self.source = os.path.abspath(source) + self.build_definitions = build_definitions if build_definitions else [] + self.benchmark_definitions =\ + benchmark_definitions if benchmark_definitions else [] + self.env = env + + @property + def build_arguments(self): + """" Return the arguments to maven invocation for build. """ + arguments = self.build_definitions + [ + "-B", "-DskipTests", "-Drat.skip=true", + "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer." + "Slf4jMavenTransferListener=warn", + "-T", "2C", "install" + ] + return arguments + + def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs): + """ Invoke maven into a build directory. + + Parameters + ---------- + build_dir : str + Directory in which the Maven build will be instantiated. + force : bool + not used now + """ + if os.path.exists(build_dir): + # Extra safety to ensure we're deleting a build folder. + if not MavenBuild.is_build_dir(build_dir): + raise FileExistsError( + "{} is not a maven build".format(build_dir) + ) + + cmd_kwargs = cmd_kwargs if cmd_kwargs else {} + assert MavenBuild.is_build_dir(build_dir) + maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs) + return MavenBuild(build_dir, definition=self, **kwargs) + + @property + def list_arguments(self): + """" Return the arguments to maven invocation for list """ + arguments = [ + "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install" + ] + return arguments + + @property + def benchmark_arguments(self): + """" Return the arguments to maven invocation for benchmark """ + arguments = self.benchmark_definitions + [ + "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1", + "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false " + "-Darrow.enable_unsafe_memory_access=true\"", + "install" + ] + return arguments + + def __repr__(self): + return "MavenDefinition[source={}]".format(self.source) + + +class MavenBuild(Maven): + """ MavenBuild represents a build directory initialized by maven. + + The build instance can be used to build/test/install. It alleviates the + user to know which generator is used. + """ + + def __init__(self, build_dir, definition=None): + """ Initialize a MavenBuild. + + The caller must ensure that maven was invoked in the build directory. + + Parameters + ---------- + definition : MavenDefinition + The definition to build from. + build_dir : str + The build directory to setup into. + """ + assert MavenBuild.is_build_dir(build_dir) + super().__init__() + self.build_dir = os.path.abspath(build_dir) + self.definition = definition + + @property + def binaries_dir(self): + return self.build_dir + + def run(self, *argv, verbose=False, cwd=None, **kwargs): + extra = [] + if verbose: + extra.append("-X") + if cwd is None: + cwd = self.build_dir + # Commands must be ran under the directory where pom.xml exists + return super().run(*extra, *argv, **kwargs, cwd=cwd) + + def build(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.build_arguments + cwd = self.binaries_dir + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + def list(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.list_arguments + cwd = self.binaries_dir + "/performance" + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + def benchmark(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.benchmark_arguments + cwd = self.binaries_dir + "/performance" + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + @staticmethod + def is_build_dir(path): + """ Indicate if a path is Maven top directory. + + This method only checks for the existence of paths and does not do any + validation whatsoever. + """ + pom_xml = os.path.join(path, "pom.xml") + performance_dir = os.path.join(path, "performance") + return os.path.exists(pom_xml) and os.path.isdir(performance_dir) + + @staticmethod + def from_path(path): + """ Instantiate a Maven from a path. + + This is used to recover from an existing physical directory (created + with or without Maven). + + Note that this method is not idempotent as the original definition will + be lost. + """ + if not MavenBuild.is_build_dir(path): + raise ValueError("Not a valid MavenBuild path: {}".format(path)) + + return MavenBuild(path, definition=None) + + def __repr__(self): + return ("MavenBuild[" + "build = {}," + "definition = {}]".format(self.build_dir, + self.definition)) diff --git a/src/arrow/dev/archery/archery/utils/rat.py b/src/arrow/dev/archery/archery/utils/rat.py new file mode 100644 index 000000000..e7fe19a7e --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/rat.py @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import fnmatch +import re +from xml.etree import ElementTree + +from ..lang.java import Jar +from .cache import Cache +from .command import capture_stdout + +RAT_VERSION = 0.13 +RAT_JAR_FILENAME = "apache-rat-{}.jar".format(RAT_VERSION) +RAT_URL_ = "https://repo1.maven.org/maven2/org/apache/rat/apache-rat" +RAT_URL = "/".join([RAT_URL_, str(RAT_VERSION), RAT_JAR_FILENAME]) + + +class Rat(Jar): + def __init__(self): + jar = Cache().get_or_insert_from_url(RAT_JAR_FILENAME, RAT_URL) + Jar.__init__(self, jar) + + @capture_stdout(strip=False) + def run_report(self, archive_path, **kwargs): + return self.run("--xml", archive_path, **kwargs) + + def report(self, archive_path, **kwargs): + return RatReport(self.run_report(archive_path, **kwargs)) + + +def exclusion_from_globs(exclusions_path): + with open(exclusions_path, 'r') as exclusions_fd: + exclusions = [e.strip() for e in exclusions_fd] + return lambda path: any([fnmatch.fnmatch(path, e) for e in exclusions]) + + +class RatReport: + def __init__(self, xml): + self.xml = xml + self.tree = ElementTree.fromstring(xml) + + def __repr__(self): + return "RatReport({})".format(self.xml) + + def validate(self, exclusion=None): + for r in self.tree.findall('resource'): + approvals = r.findall('license-approval') + if not approvals or approvals[0].attrib['name'] == 'true': + continue + + clean_name = re.sub('^[^/]+/', '', r.attrib['name']) + + if exclusion and exclusion(clean_name): + continue + + yield clean_name diff --git a/src/arrow/dev/archery/archery/utils/report.py b/src/arrow/dev/archery/archery/utils/report.py new file mode 100644 index 000000000..6c7587ddd --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/report.py @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABCMeta, abstractmethod +import datetime + +import jinja2 + + +def markdown_escape(s): + for char in ('*', '#', '_', '~', '`', '>'): + s = s.replace(char, '\\' + char) + return s + + +class Report(metaclass=ABCMeta): + + def __init__(self, **kwargs): + for field in self.fields: + if field not in kwargs: + raise ValueError('Missing keyword argument {}'.format(field)) + self._data = kwargs + + def __getattr__(self, key): + return self._data[key] + + @abstractmethod + def fields(self): + pass + + @property + @abstractmethod + def templates(self): + pass + + +class JinjaReport(Report): + + def __init__(self, **kwargs): + self.env = jinja2.Environment( + loader=jinja2.PackageLoader('archery', 'templates') + ) + self.env.filters['md'] = markdown_escape + self.env.globals['today'] = datetime.date.today + super().__init__(**kwargs) + + def render(self, template_name): + template_path = self.templates[template_name] + template = self.env.get_template(template_path) + return template.render(**self._data) diff --git a/src/arrow/dev/archery/archery/utils/source.py b/src/arrow/dev/archery/archery/utils/source.py new file mode 100644 index 000000000..1080cb75d --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/source.py @@ -0,0 +1,211 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from pathlib import Path +import subprocess + +from .git import git + + +class InvalidArrowSource(Exception): + pass + + +class ArrowSources: + """ ArrowSources is a companion class representing a directory containing + Apache Arrow's sources. + """ + # Note that WORKSPACE is a reserved git revision name by this module to + # reference the current git workspace. In other words, this indicates to + # ArrowSources.at_revision that no cloning/checkout is required. + WORKSPACE = "WORKSPACE" + + def __init__(self, path): + """ Initialize an ArrowSources + + The caller must ensure that path is valid arrow source directory (can + be checked with ArrowSources.valid) + + Parameters + ---------- + path : src + """ + path = Path(path) + # validate by checking a specific path in the arrow source tree + if not (path / 'cpp' / 'CMakeLists.txt').exists(): + raise InvalidArrowSource( + "No Arrow C++ sources found in {}.".format(path) + ) + self.path = path + + @property + def archery(self): + """ Returns the archery directory of an Arrow sources. """ + return self.dev / "archery" + + @property + def cpp(self): + """ Returns the cpp directory of an Arrow sources. """ + return self.path / "cpp" + + @property + def dev(self): + """ Returns the dev directory of an Arrow sources. """ + return self.path / "dev" + + @property + def java(self): + """ Returns the java directory of an Arrow sources. """ + return self.path / "java" + + @property + def python(self): + """ Returns the python directory of an Arrow sources. """ + return self.path / "python" + + @property + def pyarrow(self): + """ Returns the python/pyarrow directory of an Arrow sources. """ + return self.python / "pyarrow" + + @property + def r(self): + """ Returns the r directory of an Arrow sources. """ + return self.path / "r" + + @property + def git_backed(self): + """ Indicate if the sources are backed by git. """ + return (self.path / ".git").exists() + + @property + def git_dirty(self): + """ Indicate if the sources is a dirty git directory. """ + return self.git_backed and git.dirty(git_dir=self.path) + + def archive(self, path, dereference=False, compressor=None, revision=None): + """ Saves a git archive at path. """ + if not self.git_backed: + raise ValueError("{} is not backed by git".format(self)) + + rev = revision if revision else "HEAD" + archive = git.archive("--prefix=apache-arrow/", rev, + git_dir=self.path) + + # TODO(fsaintjacques): fix dereference for + + if compressor: + archive = compressor(archive) + + with open(path, "wb") as archive_fd: + archive_fd.write(archive) + + def at_revision(self, revision, clone_dir): + """ Return a copy of the current sources for a specified git revision. + + This method may return the current object if no checkout is required. + The caller is responsible to remove the cloned repository directory. + + The user can use the special WORKSPACE token to mean the current git + workspace (no checkout performed). + + The second value of the returned tuple indicates if a clone was + performed. + + Parameters + ---------- + revision : str + Revision to checkout sources at. + clone_dir : str + Path to checkout the local clone. + """ + if not self.git_backed: + raise ValueError("{} is not backed by git".format(self)) + + if revision == ArrowSources.WORKSPACE: + return self, False + + # A local clone is required to leave the current sources intact such + # that builds depending on said sources are not invalidated (or worse + # slightly affected when re-invoking the generator). + # "--local" only works when dest dir is on same volume of source dir. + # "--shared" works even if dest dir is on different volume. + git.clone("--shared", self.path, clone_dir) + + # Revision can reference "origin/" (or any remotes) that are not found + # in the local clone. Thus, revisions are dereferenced in the source + # repository. + original_revision = git.rev_parse(revision) + + git.checkout(original_revision, git_dir=clone_dir) + + return ArrowSources(clone_dir), True + + @staticmethod + def find(path=None): + """ Infer Arrow sources directory from various method. + + The following guesses are done in order until a valid match is found: + + 1. Checks the given optional parameter. + + 2. Checks if the environment variable `ARROW_SRC` is defined and use + this. + + 3. Checks if the current working directory (cwd) is an Arrow source + directory. + + 4. Checks if this file (cli.py) is still in the original source + repository. If so, returns the relative path to the source + directory. + """ + + # Explicit via environment + env = os.environ.get("ARROW_SRC") + + # Implicit via cwd + cwd = Path.cwd() + + # Implicit via current file + try: + this = Path(__file__).parents[4] + except IndexError: + this = None + + # Implicit via git repository (if archery is installed system wide) + try: + repo = git.repository_root(git_dir=cwd) + except subprocess.CalledProcessError: + # We're not inside a git repository. + repo = None + + paths = list(filter(None, [path, env, cwd, this, repo])) + for p in paths: + try: + return ArrowSources(p) + except InvalidArrowSource: + pass + + searched_paths = "\n".join([" - {}".format(p) for p in paths]) + raise InvalidArrowSource( + "Unable to locate Arrow's source directory. " + "Searched paths are:\n{}".format(searched_paths) + ) + + def __repr__(self): + return self.path diff --git a/src/arrow/dev/archery/archery/utils/tmpdir.py b/src/arrow/dev/archery/archery/utils/tmpdir.py new file mode 100644 index 000000000..07d7355c8 --- /dev/null +++ b/src/arrow/dev/archery/archery/utils/tmpdir.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from contextlib import contextmanager +from tempfile import mkdtemp, TemporaryDirectory + + +@contextmanager +def tmpdir(preserve=False, prefix="arrow-archery-"): + if preserve: + yield mkdtemp(prefix=prefix) + else: + with TemporaryDirectory(prefix=prefix) as tmp: + yield tmp diff --git a/src/arrow/dev/archery/conftest.py b/src/arrow/dev/archery/conftest.py new file mode 100644 index 000000000..06a643bea --- /dev/null +++ b/src/arrow/dev/archery/conftest.py @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pathlib + +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--enable-integration", + action="store_true", + default=False, + help="run slow tests" + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + ( + "integration: mark test as integration tests involving more " + "extensive setup (only used for crossbow at the moment)" + ) + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--enable-integration"): + return + marker = pytest.mark.skip(reason="need --enable-integration option to run") + for item in items: + if "integration" in item.keywords: + item.add_marker(marker) + + +@pytest.fixture +def load_fixture(request): + current_test_directory = pathlib.Path(request.node.fspath).parent + + def decoder(path): + with path.open('r') as fp: + if path.suffix == '.json': + import json + return json.load(fp) + elif path.suffix == '.yaml': + import yaml + return yaml.load(fp) + else: + return fp.read() + + def loader(name, decoder=decoder): + path = current_test_directory / 'fixtures' / name + return decoder(path) + + return loader diff --git a/src/arrow/dev/archery/generate_files_for_endian_test.sh b/src/arrow/dev/archery/generate_files_for_endian_test.sh new file mode 100755 index 000000000..ba3ce9f16 --- /dev/null +++ b/src/arrow/dev/archery/generate_files_for_endian_test.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script generates json and arrow files of each type (e.g. primitive) for integration endian test +# Usage: generate_files_for_endian_test.sh +# ARROW_CPP_EXE_PATH : where Arrow C++ binaries can be found +# TMP_DIR : where files will be generated + +set -e + +: ${ARROW_CPP_EXE_PATH:=/arrow/cpp/build/debug/} +: ${TMP_DIR:=/tmp/arrow} + +json_dir=$TMP_DIR/arrow.$$ +mkdir -p $json_dir + +archery integration --stop-on-error --with-cpp=1 --tempdir=$json_dir + +for f in $json_dir/*.json ; do + $ARROW_CPP_EXE_PATH/arrow-json-integration-test -mode JSON_TO_ARROW -json $f -arrow ${f%.*}.arrow_file -integration true ; +done +for f in $json_dir/*.arrow_file ; do + $ARROW_CPP_EXE_PATH/arrow-file-to-stream $f > ${f%.*}.stream; +done +for f in $json_dir/*.json ; do + gzip $f ; +done +echo "The files are under $json_dir" diff --git a/src/arrow/dev/archery/requirements.txt b/src/arrow/dev/archery/requirements.txt new file mode 100644 index 000000000..0e1258adb --- /dev/null +++ b/src/arrow/dev/archery/requirements.txt @@ -0,0 +1,4 @@ +click +pygithub +python-dotenv +ruamel.yaml diff --git a/src/arrow/dev/archery/setup.py b/src/arrow/dev/archery/setup.py new file mode 100755 index 000000000..664807375 --- /dev/null +++ b/src/arrow/dev/archery/setup.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import functools +import operator +import sys +from setuptools import setup, find_packages + +if sys.version_info < (3, 6): + sys.exit('Python < 3.6 is not supported') + +# For pathlib.Path compatibility +jinja_req = 'jinja2>=2.11' + +extras = { + 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.6.13'], + 'benchmark': ['pandas'], + 'docker': ['ruamel.yaml', 'python-dotenv'], + 'release': [jinja_req, 'jira', 'semver', 'gitpython'], + 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'ruamel.yaml', + 'setuptools_scm'], + 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml', + 'setuptools_scm'], +} +extras['bot'] = extras['crossbow'] + ['pygithub', 'jira'] +extras['all'] = list(set(functools.reduce(operator.add, extras.values()))) + +setup( + name='archery', + version="0.1.0", + description='Apache Arrow Developers Tools', + url='http://github.com/apache/arrow', + maintainer='Arrow Developers', + maintainer_email='dev@arrow.apache.org', + packages=find_packages(), + include_package_data=True, + install_requires=['click>=7'], + tests_require=['pytest', 'responses'], + extras_require=extras, + entry_points=''' + [console_scripts] + archery=archery.cli:archery + ''' +) diff --git a/src/arrow/dev/benchmarking/.env b/src/arrow/dev/benchmarking/.env new file mode 100644 index 000000000..7485f5866 --- /dev/null +++ b/src/arrow/dev/benchmarking/.env @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +PG_USER=benchmark +PG_PASS=benchmark diff --git a/src/arrow/dev/benchmarking/.gitignore b/src/arrow/dev/benchmarking/.gitignore new file mode 100644 index 000000000..cda00d658 --- /dev/null +++ b/src/arrow/dev/benchmarking/.gitignore @@ -0,0 +1 @@ +/machine.json diff --git a/src/arrow/dev/benchmarking/Dockerfile b/src/arrow/dev/benchmarking/Dockerfile new file mode 100644 index 000000000..f47033397 --- /dev/null +++ b/src/arrow/dev/benchmarking/Dockerfile @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +FROM postgres:11-alpine + +# Any `.sh` and `.sql` files copied to the entrypoint directory +# will be run during startup. See `docker-entrypoint.sh` in +# https://github.com/docker-library/postgres/blob/master/11/alpine/ +COPY ddl/* /docker-entrypoint-initdb.d/ diff --git a/src/arrow/dev/benchmarking/README.md b/src/arrow/dev/benchmarking/README.md new file mode 100644 index 000000000..c5ddd62e0 --- /dev/null +++ b/src/arrow/dev/benchmarking/README.md @@ -0,0 +1,255 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> + + +> NOTE: For those deploying this database, Postgres does not by default use +> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in +> some columns to always work. This [stackoverflow post][so-utf8] describes +> how to do it for Amazon RDS. This [section of the docs][pg-charset] +> states how to do it in general, i.e.: `initdb -E UTF8`. + +# Benchmark database + +This directory contains files related to the benchmark database. + +- 'ddl/\*.sql' contains the database definition. +- 'examples/' contain code to test the database and demonstrate its use. +- 'Dockerfile' and 'docker-compose.yml' are for developing benchmarks + against a testing database. +- An auto-generated summary of views in the [Data model][./data_model.rst]. + +## Setup + +To create a 'machine.json' file that will uniquely identify a computer for +benchmark submission, run the provided shell script and fill in the prompts +to identify the GPU. + +> NOTE: this does not work on VMs or Windows. + +```shell +./make_machine_json.sh +``` + +Submit the machine details via http using the command + +> NOTE: This will only work if we have selected graphql as a client +> and have it running in production or if during development +> you have run `docker-compose up` to create and run both a +> database Docker container and graphql client Docker container. + +```shell +./graphql_submit.sh machine machine.json localhost:5000/graphql +``` + +or submit after starting up the psql client from this directory, using + +``` +\set content `cat machine.json` +SELECT ingest_machine_view(:'content'::jsonb); +``` + +> NOTE: If you don't have a "machine.json" file generated, +> use the example file "examples/machine.json" instead. + +## Local testing + +There is a file named "[.env][.env]" in this directory that is used by +`docker-compose` to set up the postgres user and password for the +local containers. Currently the name and password are both +`benchmark`. This will be the password for the psql client as well. + +The Postgres Alpine image runs any added '\*.sql' and '\*.sh' scripts placed +in '/docker-entrypoint-initdb.d/' during its startup script, so the local +database will be set up automatically once the container is running. + +To start the containers, be sure to have [Docker installed][docker], +and then run the following from this directory (arrow/dev/benchmarking). + + +``` +docker-compose up +``` + +This will start a process that will show logs from both the running +Postgres container and the running GraphQL container. +To stop the running containers gracefully, background the process +and run + +``` +docker-compose down +fg # To re-foreground the backgrounded process while it exits +``` + +You will still have the container images "benchmarking_pg", +"graphile/postgraphile", and "postgres:11-alpine" on your +computer. You should keep them if you want to run this again. +If you don't, then remove them with the command: + +``` +docker rmi benchmarking_pg postgres:11-alpine graphile/postgraphile +``` + +### Postgres client + +The `psql` shell client is bundled with the PostgreSQL core distribution +available from the [Postgres download page][postgres-downloads]. +Using the `PG_USER` defined in the `.env` file (currently "benchmark"), +the command to connect to the container is: +```shell +psql -h localhost -p 5432 -U benchmark +``` +There is an example script in [examples/example.sql](examples/example.sql) that +runs some queries against the database. To run it in the psql client, type +the following in the psql command-line interface: + +``` +\i examples/example.sql +``` + +#### Bulk ingestion using CSV + +An example CSV file for bulk ingestion is in +[examples/benchmark_run_example.csv](examples/benchmark_run_example.csv). +The columns are listed in the same order as they are defined, to avoid having +to explicitly name every column in ingestion. The "id" column is left empty +and will be automatically assigned on insert. + +To ingest the example CSV file from the command line, +use the command below: + +```shell +CSV='examples/benchmark_run_example.csv' && \ +psql -U benchmark -h localhost -p 5432 \ + -c "\copy benchmark_run_view FROM '${CSV}' WITH (FORMAT csv, HEADER);" +``` + +#### Bulk ingestion using JSON + +To ingest the example JSON file using the psql client, use the command below. + +``` +\set content `cat examples/benchmark_example.json` +SELECT ingest_benchmark_view(:'content'::jsonb); +``` + +### HTTP client + +This section requires an actual HTTP client to be up, either +for the production database or via the testing setup. +(See the [local testing section](#local-testing) for how to set it up). + +The 'graphile/postgraphile' container provides an HTTP interface +to the database via two url routes: + +- A GraphiQL page ([localhost:5000/graphiql][graphiql]) + to aid visual exploration of the data model. + (The `--watch` flag on the command line. Not recommended for production.) +- An endpoint that receives POST requests only (localhost:5000/graphql). + +#### Ingestion + +The script [graphql_submit.sh](./graphql_submit.sh) simplifies submission +to the database via curl. Examples: + +```shell +./graphql_submit.sh benchmarks examples/benchmark_example.json +./graphql_submit.sh runs examples/benchmark_run_example.json +``` + +#### Querying + +The output of the query is a JSON object that is hard to read on the command line. +Here is an example query in the shell: +```shell +curl -X POST \ + -H "Content-Type: application/json" \ + --data '{"query": "{projectDetails{ projectName }}"}' \ + localhost:5000/graphql +``` + +which (if you have previously run the "examples.sql" command) yields + +``` +{"data":{"projectDetails":{"projectName":"Apache Arrow"}}} +``` + +Here is an example query using Python: +```python +import json +import requests + +uri = "http://localhost:5000/graphql" +query = json.load(open("examples/graphql_query_environment_view.json")) +response = requests.post(uri, json=query) +message = "{benchmarkLanguage}: {languageImplementationVersion}, {dependencies}" + +for row in response.json()['data']['allEnvironmentViews']['edges']: + print(message.format(**row['node'])) + +# result: +# +# Python: CPython 2.7, {"six":"","numpy":"1.14","other_lib":"1.0"} +# Python: CPython 2.7, {"six":"","numpy":"1.15","other_lib":"1.0"} +# Python: CPython 3.6, {"boost":"1.42","numpy":"1.15"} +``` + +## Deployment + +(work in progress). + +> NOTE: For those deploying this database, Postgres does not by default use +> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in +> some columns to always work. This [stackoverflow post][so-utf8] describes +> how to do it for Amazon RDS. This [section of the docs][pg-charset] +> states how to do it in general, i.e.: `initdb -E UTF8`. + + +## Quick reference + +- String variables `'have single quotes'` +- Arrays `'{"have", "curly", "braces"}'::text[]` or `'{1, 2, 3}'::integer[]` +- JSONb `'{"has":"this", "format":42}'::jsonb` +- Elements inserted using JSON-formatted strings can use standard + JSON-formatted arrays (`[1, 2, 3]`) and do not have to use the above + string formats. +- When comparing nullable values use `x IS NOT DISTINCT FROM y` rather than `x = y` +- An auto-generated summary of the [Data model][./data_model.rst]. + +## Data model documentation + +To recreate the data model documentation, +(1) install the [psql client][postgres-downloads] +(sorry you need to download the whole thing), +(2) start the docker container using `docker-compose up`, +(3) and then run these scripts: + +``` +./make_dotfile.sh +./make_data_model_rst.sh +``` + +[pg-jsonb]: https://www.postgresql.org/docs/11/datatype-json.html#id-1.5.7.22.3 +[so-utf8]: https://stackoverflow.com/a/33557023 +[pg-charset]: https://www.postgresql.org/docs/9.3/multibyte.html#AEN34424 +[docker]: https://www.docker.com/get-started +[citext-limitations]: https://www.postgresql.org/docs/11/citext.html#id-1.11.7.17.7 +[postgres-downloads]: https://www.postgresql.org/download/ +[graphiql]: http://localhost:5000/graphiql +[postgraphile-lambda]: https://github.com/graphile/postgraphile-lambda-example +[postgraphile-cli]: https://www.graphile.org/postgraphile/usage-cli/ diff --git a/src/arrow/dev/benchmarking/data_model.dot b/src/arrow/dev/benchmarking/data_model.dot new file mode 100644 index 000000000..d311acd4e --- /dev/null +++ b/src/arrow/dev/benchmarking/data_model.dot @@ -0,0 +1,219 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements.See the NOTICE file + distributed with this work for additional information + regarding copyright ownership.The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License.You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied.See the License for the + specific language governing permissions and limitations + under the License. +*/ + +/* + WARNING + This is an auto-generated file. Please do not edit. + + To reproduce, please run :code:`./make_data_model_rst.sh`. + (This requires you have the + `psql client <https://www.postgresql.org/download/>`_ + and have started the docker containers using + :code:`docker-compose up`). +*/ +digraph database { + concentrate = true; + rankdir = LR; + ratio = ".75"; + node [shape = none, fontsize="11", fontname="Helvetica"]; + edge [fontsize="8", fontname="Helvetica"]; +legend +[fontsize = "14" +label = +<<table border="0" cellpadding="0"> + <tr><td align="left"><font point-size="16">Legend</font></td></tr> + <tr><td align="left">pk = primary key</td></tr> + <tr><td align="left">fk = foreign key</td></tr> + <tr><td align="left">u = unique*</td></tr> + <tr><td align="left">o = optional</td></tr> + <tr><td align="left">* multiple uniques in the same table are a unique group</td></tr> +</table>> +]; +benchmark +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark</font></td></tr> + <tr><td port="benchmark_id"><b>benchmark_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>benchmark_name (u)</td></tr> + <tr><td>parameter_names (o)</td></tr> + <tr><td>benchmark_description</td></tr> + <tr><td>benchmark_version (u)</td></tr> + <tr><td port="unit_id">unit_id (fk) </td></tr> + </table>> +]; +benchmark_language +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_language</font></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>benchmark_language (u)</td></tr> + </table>> +]; +benchmark_run +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_run</font></td></tr> + <tr><td port="benchmark_run_id"><b>benchmark_run_id (pk)</b></td></tr> + <tr><td>parameter_values (u)</td></tr> + <tr><td>value</td></tr> + <tr><td>git_commit_timestamp (u)</td></tr> + <tr><td>git_hash</td></tr> + <tr><td>val_min (o)</td></tr> + <tr><td>val_q1 (o)</td></tr> + <tr><td>val_q3 (o)</td></tr> + <tr><td>val_max (o)</td></tr> + <tr><td>std_dev</td></tr> + <tr><td>n_obs</td></tr> + <tr><td>run_timestamp (u)</td></tr> + <tr><td>run_metadata (o)</td></tr> + <tr><td>run_notes (o)</td></tr> + <tr><td port="machine_id">machine_id (u) (fk) </td></tr> + <tr><td port="environment_id">environment_id (u) (fk) </td></tr> + <tr><td port="language_implementation_version_id">language_implementation_version_id (fk) </td></tr> + <tr><td port="benchmark_language_id">benchmark_language_id (fk) </td></tr> + <tr><td port="benchmark_id">benchmark_id (u) (fk) </td></tr> + </table>> +]; +benchmark_type +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_type</font></td></tr> + <tr><td port="benchmark_type_id"><b>benchmark_type_id (pk)</b></td></tr> + <tr><td>benchmark_type (u)</td></tr> + <tr><td>lessisbetter</td></tr> + </table>> +]; +cpu +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">cpu</font></td></tr> + <tr><td port="cpu_id"><b>cpu_id (pk)</b></td></tr> + <tr><td>cpu_model_name (u)</td></tr> + <tr><td>cpu_core_count</td></tr> + <tr><td>cpu_thread_count</td></tr> + <tr><td>cpu_frequency_max_hz</td></tr> + <tr><td>cpu_frequency_min_hz</td></tr> + <tr><td>cpu_l1d_cache_bytes</td></tr> + <tr><td>cpu_l1i_cache_bytes</td></tr> + <tr><td>cpu_l2_cache_bytes</td></tr> + <tr><td>cpu_l3_cache_bytes</td></tr> + </table>> +]; +dependencies +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">dependencies</font></td></tr> + <tr><td port="dependencies_id"><b>dependencies_id (pk)</b></td></tr> + <tr><td>dependencies (u)</td></tr> + </table>> +]; +gpu +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">gpu</font></td></tr> + <tr><td port="gpu_id"><b>gpu_id (pk)</b></td></tr> + <tr><td>gpu_information (u)</td></tr> + <tr><td>gpu_part_number</td></tr> + <tr><td>gpu_product_name</td></tr> + </table>> +]; +language_implementation_version +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">language_implementation_version</font></td></tr> + <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>language_implementation_version (u)</td></tr> + </table>> +]; +machine +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">machine</font></td></tr> + <tr><td port="machine_id"><b>machine_id (pk)</b></td></tr> + <tr><td>machine_name</td></tr> + <tr><td>mac_address (u)</td></tr> + <tr><td>memory_bytes</td></tr> + <tr><td>cpu_actual_frequency_hz</td></tr> + <tr><td>machine_other_attributes (o)</td></tr> + <tr><td port="cpu_id">cpu_id (fk) </td></tr> + <tr><td port="gpu_id">gpu_id (fk) </td></tr> + <tr><td port="os_id">os_id (fk) </td></tr> + </table>> +]; +os +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">os</font></td></tr> + <tr><td port="os_id"><b>os_id (pk)</b></td></tr> + <tr><td>os_name (u)</td></tr> + <tr><td>architecture_name (u)</td></tr> + <tr><td>kernel_name (u)</td></tr> + </table>> +]; +project +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">project</font></td></tr> + <tr><td port="project_id"><b>project_id (pk)</b></td></tr> + <tr><td>project_name (u)</td></tr> + <tr><td>project_url (u)</td></tr> + <tr><td>repo_url (u)</td></tr> + <tr><td>last_changed</td></tr> + </table>> +]; +unit +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">unit</font></td></tr> + <tr><td port="unit_id"><b>unit_id (pk)</b></td></tr> + <tr><td>units (u)</td></tr> + <tr><td port="benchmark_type_id">benchmark_type_id (fk) </td></tr> + </table>> +]; +environment +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">environment</font></td></tr> + <tr><td port="environment_id"><b>environment_id (pk)</b></td></tr> + <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td port="dependencies_id">dependencies_id (u) (fk) </td></tr> + </table>> +]; +machine:cpu_id -> cpu:cpu_id; +machine:gpu_id -> gpu:gpu_id; +machine:os_id -> os:os_id; +benchmark:benchmark_language_id -> benchmark_language:benchmark_language_id; +environment:benchmark_language_id -> benchmark_language:benchmark_language_id; +language_implementation_version:benchmark_language_id -> benchmark_language:benchmark_language_id; +environment:dependencies_id -> dependencies:dependencies_id; +environment:benchmark_language_id -> language_implementation_version:benchmark_language_id; +environment:language_implementation_version_id -> language_implementation_version:language_implementation_version_id; +unit:benchmark_type_id -> benchmark_type:benchmark_type_id; +benchmark_run:machine_id -> machine:machine_id; +benchmark:unit_id -> unit:unit_id; +benchmark_run:language_implementation_version_id -> environment:language_implementation_version_id; +benchmark_run:benchmark_language_id -> environment:benchmark_language_id; +benchmark_run:environment_id -> environment:environment_id; +benchmark_run:benchmark_language_id -> benchmark:benchmark_language_id; +benchmark_run:benchmark_id -> benchmark:benchmark_id; +} + diff --git a/src/arrow/dev/benchmarking/data_model.rst b/src/arrow/dev/benchmarking/data_model.rst new file mode 100644 index 000000000..d0f3dc7fc --- /dev/null +++ b/src/arrow/dev/benchmarking/data_model.rst @@ -0,0 +1,373 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +.. WARNING +.. This is an auto-generated file. Please do not edit. + +.. To reproduce, please run :code:`./make_data_model_rst.sh`. +.. (This requires you have the +.. `psql client <https://www.postgresql.org/download/>`_ +.. and have started the docker containers using +.. :code:`docker-compose up`). + + +.. _benchmark-data-model: + +Benchmark data model +==================== + + +.. graphviz:: data_model.dot + + +.. _benchmark-ingestion: + +Benchmark ingestion helper functions +==================================== + +ingest_benchmark_run_view +------------------------- + +:code:`ingest_benchmark_run_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + [ + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:05 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "std_dev": 3.14, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:10 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + } + ] +To identify which columns in "benchmark_run_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_benchmark_view +--------------------- + +:code:`ingest_benchmark_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + [ + { + "benchmark_name": "Benchmark 1", + "parameter_names": ["arg0", "arg1", "arg2"], + "benchmark_description": "First benchmark", + "benchmark_type": "Time", + "units": "miliseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 2", + "parameter_names": ["arg0", "arg1"], + "benchmark_description": "Description 2.", + "benchmark_type": "Time", + "units": "nanoseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + } + ] + +To identify which columns in "benchmark_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_benchmark_runs_with_context +---------------------------------- + +:code:`ingest_benchmark_runs_with_context(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +The object contains three key-value pairs:: + + {"context": { + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 3.6", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, + "git_commit_timestamp": "2019-02-14 22:42:22 +0100", + "git_hash": "123456789abcde", + "run_timestamp": "2019-02-14 03:00:40 -0600", + "extra stuff": "does not hurt anything and will not be added." + }, + "benchmark_version": { + "Benchmark Name 1": "Any string can be a version.", + "Benchmark Name 2": "A git hash can be a version.", + "An Unused Benchmark Name": "Will be ignored." + }, + "benchmarks": [ + { + "benchmark_name": "Benchmark Name 1", + "parameter_values": {"argument1": 1, "argument2": "value2"}, + "value": 42, + "val_min": 41.2, + "val_q1": 41.5, + "val_q3": 42.5, + "val_max": 42.8, + "std_dev": 0.5, + "n_obs": 100, + "run_metadata": {"any": "key-value pairs"}, + "run_notes": "Any relevant notes." + }, + { + "benchmark_name": "Benchmark Name 2", + "parameter_values": {"not nullable": "Use {} if no params."}, + "value": 8, + "std_dev": 1, + "n_obs": 2, + } + ] + } + +- The entry for "context" contains the machine, environment, and timestamp + information common to all of the runs +- The entry for "benchmark_version" maps benchmark + names to their version strings. (Which can be a git hash, + the entire code string, a number, or any other string of your choice.) +- The entry for "benchmarks" is a list of benchmark run data + for the given context and benchmark versions. The first example + benchmark run entry contains all possible values, even + nullable ones, and the second entry omits all nullable values. + + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_machine_view +------------------- + +:code:`ingest_machine_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + { + "mac_address": "0a:00:2d:01:02:03", + "machine_name": "Yet-Another-Machine-Name", + "memory_bytes": 8589934592, + "cpu_actual_frequency_hz": 2300000000, + "os_name": "OSX", + "architecture_name": "x86_64", + "kernel_name": "18.2.0", + "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", + "cpu_core_count": 2, + "cpu_thread_count": 4, + "cpu_frequency_max_hz": 2300000000, + "cpu_frequency_min_hz": 2300000000, + "cpu_l1d_cache_bytes": 32768, + "cpu_l1i_cache_bytes": 32768, + "cpu_l2_cache_bytes": 262144, + "cpu_l3_cache_bytes": 4194304, + "machine_other_attributes": {"just": "an example"}, + "gpu_information": "", + "gpu_part_number": "", + "gpu_product_name": "" + } + +To identify which columns in "machine_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + + +.. _benchmark-views: + +Benchmark views +=============== + + +benchmark_run_view +------------------ + +Each benchmark run. + +- Each entry is unique on the machine, environment, benchmark, + and git commit timestamp. + +=============================== =========== ======== =========== =========== +Column Type Nullable Default Description +=============================== =========== ======== =========== =========== +benchmark_run_id int8 not null serial primary key +benchmark_name citext not null unique +benchmark_version citext not null unique +parameter_values jsonb not null '{}'::jsonb unique +value numeric not null +git_commit_timestamp timestamptz not null unique +git_hash text not null +val_min numeric +val_q1 numeric +val_q3 numeric +val_max numeric +std_dev numeric not null +n_obs int4 not null +run_timestamp timestamptz not null unique +run_metadata jsonb +run_notes text +mac_address macaddr not null unique +benchmark_language citext not null unique +language_implementation_version citext not null ''::citext unique +dependencies jsonb not null '{}'::jsonb unique +=============================== =========== ======== =========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +benchmark_view +-------------- + +The details about a particular benchmark. + +- "benchmark_name" is unique for a given "benchmark_language" +- Each entry is unique on + ("benchmark_language", "benchmark_name", "benchmark_version") + +===================== ====== ======== ======= =========== +Column Type Nullable Default Description +===================== ====== ======== ======= =========== +benchmark_id int4 not null serial primary key +benchmark_name citext not null unique +parameter_names _text +benchmark_description text not null +benchmark_type citext not null unique +units citext not null unique +lessisbetter bool not null +benchmark_version citext not null unique +benchmark_language citext not null unique +===================== ====== ======== ======= =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +environment_view +---------------- + +The build environment used for a reported benchmark run. +(Will be inferred from each "benchmark_run" if not explicitly added). + +- Each entry is unique on + ("benchmark_language", "language_implementation_version", "dependencies") +- "benchmark_language" is unique in the "benchmark_language" table +- "benchmark_language" plus "language_implementation_version" is unique in + the "language_implementation_version" table +- "dependencies" is unique in the "dependencies" table + +=============================== ====== ======== =========== =========== +Column Type Nullable Default Description +=============================== ====== ======== =========== =========== +environment_id int4 not null serial primary key +benchmark_language citext not null unique +language_implementation_version citext not null ''::citext unique +dependencies jsonb not null '{}'::jsonb unique +=============================== ====== ======== =========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +machine_view +------------ + +The machine environment (CPU, GPU, OS) used for each benchmark run. + +- "mac_address" is unique in the "machine" table +- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table + Empty string (''), not null, is used for machines that won't use the GPU +- "cpu_model_name" is unique in the "cpu" (central processing unit) table +- "os_name", "os_architecture_name", and "os_kernel_name" + are unique in the "os" (operating system) table +- "machine_other_attributes" is a key-value store for any other relevant + data, e.g. '{"hard_disk_type": "solid state"}' + +======================== ======= ======== ========== =========== +Column Type Nullable Default Description +======================== ======= ======== ========== =========== +machine_id int4 not null serial primary key +mac_address macaddr not null unique +machine_name citext not null +memory_bytes int8 not null +cpu_actual_frequency_hz int8 not null +os_name citext not null unique +architecture_name citext not null unique +kernel_name citext not null ''::citext unique +cpu_model_name citext not null unique +cpu_core_count int4 not null +cpu_thread_count int4 not null +cpu_frequency_max_hz int8 not null +cpu_frequency_min_hz int8 not null +cpu_l1d_cache_bytes int4 not null +cpu_l1i_cache_bytes int4 not null +cpu_l2_cache_bytes int4 not null +cpu_l3_cache_bytes int4 not null +gpu_information citext not null ''::citext unique +gpu_part_number citext not null ''::citext +gpu_product_name citext not null ''::citext +machine_other_attributes jsonb +======================== ======= ======== ========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + + diff --git a/src/arrow/dev/benchmarking/ddl/0_setup.sql b/src/arrow/dev/benchmarking/ddl/0_setup.sql new file mode 100644 index 000000000..ec1044641 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/0_setup.sql @@ -0,0 +1,23 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text + +-- For future fine-grained control over function execution by user group. +ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public; diff --git a/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql new file mode 100644 index 000000000..c52d66cfd --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql @@ -0,0 +1,45 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT +CREATE TABLE IF NOT EXISTS public.project +( + project_id SERIAL PRIMARY KEY + , project_name citext NOT NULL + , project_url text NOT NULL + , repo_url text NOT NULL + , last_changed timestamp (0) without time zone NOT NULL DEFAULT now() +); +COMMENT ON TABLE public.project + IS 'Project name and relevant URLs.'; +COMMENT ON COLUMN public.project.project_url + IS 'Homepage URL.'; +COMMENT ON COLUMN public.project.repo_url + IS 'Git repo URL to link stored commit hashes to code in a webpage.'; +COMMENT ON COLUMN public.project.last_changed + IS 'New project details are added with a new timestamp. ' + 'The project details with the newest timestamp will be used.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX project_unique_index_on_project_name_urls + ON public.project(project_name, project_url, repo_url); +COMMENT ON INDEX + public.project_unique_index_on_project_name_urls + IS 'Enforce uniqueness of project name and urls.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql new file mode 100644 index 000000000..df1a9e757 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql @@ -0,0 +1,63 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- CPU +CREATE TABLE IF NOT EXISTS public.cpu +( + cpu_id SERIAL PRIMARY KEY + , cpu_model_name citext NOT NULL UNIQUE + , cpu_core_count integer NOT NULL + , cpu_thread_count integer NOT NULL + , cpu_frequency_max_Hz bigint NOT NULL + , cpu_frequency_min_Hz bigint NOT NULL + , cpu_L1d_cache_bytes integer NOT NULL + , cpu_L1i_cache_bytes integer NOT NULL + , cpu_L2_cache_bytes integer NOT NULL + , cpu_L3_cache_bytes integer NOT NULL +); +COMMENT ON TABLE public.cpu + IS 'CPU model and its specifications.'; +COMMENT ON COLUMN public.cpu.cpu_id + IS 'The primary key for the CPU table. ' + 'NOTE: This is a synthetic primary key and not meant to represent a ' + 'processor instruction to read capabilities.'; +COMMENT ON COLUMN public.cpu.cpu_model_name + IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.'; +COMMENT ON COLUMN public.cpu.cpu_core_count + IS 'The output of `sysctl -n hw.physicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_thread_count + IS 'The output of `sysctl -n hw.logicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz + IS 'The output of `sysctl -n hw.cpufrequency_max`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz + IS 'The output of `sysctl -n hw.cpufrequency_min`.'; +COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes + IS 'The output of `sysctl -n hw.l1dcachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes + IS 'The output of `sysctl -n hw.l1icachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes + IS 'The output of `sysctl -n hw.l2cachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes + IS 'The output of `sysctl -n hw.l3cachesize`.'; + +-- CONSTRAINTS +ALTER TABLE public.cpu + ADD CONSTRAINT cpu_check_cpu_model_name_length + CHECK (char_length(cpu_model_name) < 255); diff --git a/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql new file mode 100644 index 000000000..564af19de --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql @@ -0,0 +1,43 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- GPU +CREATE TABLE IF NOT EXISTS public.gpu +( + gpu_id SERIAL PRIMARY KEY + , gpu_information citext UNIQUE NOT NULL DEFAULT '' + , gpu_part_number citext NOT NULL DEFAULT '' + , gpu_product_name citext NOT NULL DEFAULT '' +); +COMMENT ON TABLE public.gpu IS 'GPU specifications.'; +COMMENT ON COLUMN public.gpu.gpu_information + IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` ' + 'or `kextstat | grep -i cuda` on OSX, or another command; anything ' + 'that gets a string to uniquely identify the GPU.'; + +-- CONSTRAINTS +CREATE INDEX gpu_index_on_part_number + ON public.gpu (gpu_part_number); + +CREATE INDEX gpu_index_on_product_name + ON public.gpu (gpu_product_name); + +CREATE INDEX gpu_index_on_product_name_and_part_number + ON public.gpu (gpu_product_name, gpu_part_number); diff --git a/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql new file mode 100644 index 000000000..7b03d82f4 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql @@ -0,0 +1,57 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- OS +CREATE TABLE IF NOT EXISTS public.os +( + os_id SERIAL PRIMARY KEY + , os_name citext NOT NULL + , architecture_name citext NOT NULL + , kernel_name citext NOT NULL DEFAULT '' +); +-- @name os. forces retention of an 's' in the Graphile GraphQL api. +COMMENT ON TABLE public.os + IS E'@name os.\nOperating system name and kernel (version).'; +COMMENT ON COLUMN public.os.os_name + IS 'Operating system name. For example, OSX, Ubuntu, Windows`.'; +COMMENT ON COLUMN public.os.architecture_name + IS 'Operating system architecture; the output of `uname -m`.'; +COMMENT ON COLUMN public.os.kernel_name + IS 'Operating system kernel, or NULL. ' + 'On Linux/OSX, the output of `uname -r`. ' + 'On Windows, the output of `ver`.'; + +-- CONSTRAINTS +ALTER TABLE public.os + ADD CONSTRAINT os_check_os_name_length + CHECK (char_length(os_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_architecture_name_length + CHECK (char_length(architecture_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_kernel_name_length + CHECK (char_length(kernel_name) < 63); + +CREATE UNIQUE INDEX os_unique_index + ON public.os(os_name, architecture_name, kernel_name); +COMMENT ON INDEX public.os_unique_index + IS 'Enforce uniqueness of os, architecture, and kernel names.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql new file mode 100644 index 000000000..2e3553677 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql @@ -0,0 +1,35 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_LANGUAGE +CREATE TABLE IF NOT EXISTS public.benchmark_language +( + benchmark_language_id SERIAL PRIMARY KEY + , benchmark_language citext NOT NULL UNIQUE +); +COMMENT ON TABLE public.benchmark_language + IS 'The language the benchmark was written in (and presumably for).'; +COMMENT ON COLUMN public.benchmark_language.benchmark_language + IS 'The benchmark language. For example: Python'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_language + ADD CONSTRAINT benchmark_language_check_language_length + CHECK (char_length(benchmark_language) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql new file mode 100644 index 000000000..3744a0c35 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql @@ -0,0 +1,31 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- DEPENDENCIES +CREATE TABLE IF NOT EXISTS public.dependencies +( + dependencies_id SERIAL PRIMARY KEY + , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb +); +COMMENT ON TABLE public.dependencies + IS E'@name dependencies.\n' + 'A JSON object mapping dependencies to their versions.'; +COMMENT ON COLUMN public.dependencies.dependencies + IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql new file mode 100644 index 000000000..f7d26e4e2 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql @@ -0,0 +1,46 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION +CREATE TABLE IF NOT EXISTS public.language_implementation_version +( + language_implementation_version_id SERIAL + , language_implementation_version citext NOT NULL DEFAULT '' + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (language_implementation_version_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language +); +COMMENT ON TABLE public.language_implementation_version + IS 'The benchmark language implementation or compiler version, e.g. ' + '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or ' + '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.'; +COMMENT ON COLUMN public.language_implementation_version.language_implementation_version + IS 'The version number used in the benchmark environment (e.g. ''2.7'').'; + +-- CONSTRAINTS +ALTER TABLE public.language_implementation_version + ADD CONSTRAINT language_implementation_version_check_version_length + CHECK (char_length(language_implementation_version) < 255); + +CREATE UNIQUE INDEX language_implementation_version_unique_index + ON public.language_implementation_version + (benchmark_language_id, language_implementation_version); +COMMENT ON INDEX language_implementation_version_unique_index + IS 'Enforce unique implementation versions for the languages.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql new file mode 100644 index 000000000..1143cdb00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql @@ -0,0 +1,39 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_TYPE +CREATE TABLE IF NOT EXISTS public.benchmark_type +( + benchmark_type_id SERIAL PRIMARY KEY + , benchmark_type citext NOT NULL UNIQUE + , lessisbetter boolean NOT NULL +); +COMMENT ON TABLE public.benchmark_type + IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"'; +COMMENT ON COLUMN public.benchmark_type.benchmark_type + IS 'The type of units, so ''time'' for seconds, miliseconds, or ' + '''mem'' for kilobytes, megabytes.'; +COMMENT ON COLUMN public.benchmark_type.lessisbetter + IS 'True if a smaller benchmark value is better.'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_type + ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length + CHECK (char_length(benchmark_type) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql new file mode 100644 index 000000000..8f219d3e0 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql @@ -0,0 +1,69 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- MACHINE +CREATE TABLE IF NOT EXISTS public.machine +( + machine_id SERIAL PRIMARY KEY + , machine_name citext NOT NULL + , mac_address macaddr NOT NULL + , memory_bytes bigint NOT NULL + , cpu_actual_frequency_Hz bigint NOT NULL + , machine_other_attributes jsonb + , cpu_id integer NOT NULL + , gpu_id integer NOT NULL + , os_id integer NOT NULL + , FOREIGN KEY (cpu_id) REFERENCES public.cpu + , FOREIGN KEY (gpu_id) REFERENCES public.gpu + , FOREIGN KEY (os_id) REFERENCES public.os +); +COMMENT ON TABLE public.machine + IS 'Unique identifiers for a machine.'; +COMMENT ON COLUMN public.machine.machine_name + IS 'A machine name of your choice.'; +COMMENT ON COLUMN public.machine.mac_address + IS 'The mac_address of a physical network interface to uniquely ' + 'identify a computer. Postgres accepts standard formats, including ' + '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203'''; +COMMENT ON COLUMN public.machine.memory_bytes + IS 'The output of `sysctl -n hw.memsize`.'; +COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz + IS 'The output of `sysctl -n hw.cpufrequency`.'; +COMMENT ON COLUMN public.machine.machine_other_attributes + IS 'Additional attributes of interest, as a JSON object. ' + 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX machine_index_on_mac_address + ON public.machine(mac_address); +COMMENT ON INDEX machine_index_on_mac_address + IS 'Enforce unique mac address'; + +CREATE INDEX machine_index_on_cpu_id + ON public.machine(cpu_id); + +CREATE INDEX machine_index_on_gpu_id + ON public.machine(gpu_id); + +CREATE INDEX machine_index_on_os_id + ON public.machine(os_id); + +CREATE INDEX machine_index_on_cpu_gpu_os_id + ON public.machine(cpu_id, gpu_id, os_id); diff --git a/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql new file mode 100644 index 000000000..a8cf57669 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql @@ -0,0 +1,37 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- UNIT +CREATE TABLE IF NOT EXISTS public.unit +( + unit_id SERIAL PRIMARY KEY + , units citext NOT NULL UNIQUE + , benchmark_type_id integer NOT NULL + , FOREIGN KEY (benchmark_type_id) + REFERENCES public.benchmark_type(benchmark_type_id) +); +COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.'; +COMMENT ON COLUMN public.unit.units + IS 'For example: nanoseconds, microseconds, bytes, megabytes.'; + +-- CONSTRAINTS +ALTER TABLE public.unit + ADD CONSTRAINT unit_check_units_string_length + CHECK (char_length(units) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql new file mode 100644 index 000000000..e3a6d2395 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql @@ -0,0 +1,51 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- ENVIRONMENT +CREATE TABLE IF NOT EXISTS public.environment +( + environment_id SERIAL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , dependencies_id integer NOT NULL + , PRIMARY KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + , FOREIGN KEY + (benchmark_language_id) + REFERENCES public.benchmark_language + , FOREIGN KEY + (language_implementation_version_id, benchmark_language_id) + REFERENCES public.language_implementation_version( + language_implementation_version_id + , benchmark_language_id + ) + , FOREIGN KEY + (dependencies_id) + REFERENCES public.dependencies +); +COMMENT ON TABLE public.environment + IS 'Identifies a build environment for a specific suite of benchmarks.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX environment_unique_index + ON public.environment + (benchmark_language_id, language_implementation_version_id, dependencies_id); +COMMENT ON INDEX environment_unique_index + IS 'Enforce unique combinations of language version and dependencies.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql new file mode 100644 index 000000000..18895823d --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql @@ -0,0 +1,54 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK +CREATE TABLE IF NOT EXISTS public.benchmark +( + benchmark_id SERIAL + , benchmark_name citext NOT NULL + , parameter_names text[] + , benchmark_description text NOT NULL + , benchmark_version citext NOT NULL + , unit_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (benchmark_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language + , FOREIGN KEY (unit_id) REFERENCES public.unit +); +COMMENT ON TABLE public.benchmark + IS 'Identifies an individual benchmark.'; +COMMENT ON COLUMN public.benchmark.parameter_names + IS 'A list of strings identifying the parameter names in the benchmark.'; +COMMENT ON COLUMN public.benchmark.benchmark_version + IS 'Can be any string. In Airspeed Velocity, the version is ' + 'by default the hash of the entire code string for the benchmark.'; + +-- CONSTRAINTS +CREATE INDEX benchmark_index_on_benchmark_language_id + ON public.benchmark(benchmark_language_id); + +CREATE INDEX benchmark_index_on_unit_id + ON public.benchmark(unit_id); + +CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version + ON public.benchmark + (benchmark_language_id, benchmark_name, benchmark_version); +COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version + IS 'Enforce uniqueness of benchmark name and version for a given language.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql new file mode 100644 index 000000000..20b9ef0bb --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql @@ -0,0 +1,112 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_RUN +CREATE TABLE IF NOT EXISTS public.benchmark_run +( + benchmark_run_id BIGSERIAL PRIMARY KEY + , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb + , value numeric NOT NULL + , git_commit_timestamp timestamp (0) with time zone NOT NULL + , git_hash text NOT NULL + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric NOT NULL + , n_obs integer NOT NULL + , run_timestamp timestamp (0) with time zone NOT NULL + , run_metadata jsonb + , run_notes text + , machine_id integer NOT NULL + , environment_id integer NOT NULL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , benchmark_id integer NOT NULL + , FOREIGN KEY (machine_id) REFERENCES public.machine + , FOREIGN KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + REFERENCES public.environment + , FOREIGN KEY (benchmark_id, benchmark_language_id) + REFERENCES public.benchmark(benchmark_id, benchmark_language_id) +); +COMMENT ON TABLE public.benchmark_run + IS 'One run per benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.parameter_values + IS 'A JSON object mapping the parameter names from ' + '"benchmark.parameter_names" to values.'; +COMMENT ON COLUMN public.benchmark_run.value + IS 'The average value from the benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp + IS 'Get this using `git show -s --date=local --format="%ci" <hash>`. ' + 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.'; +COMMENT ON COLUMN public.benchmark_run.git_hash + IS 'The commit has of the codebase currently being benchmarked.'; +COMMENT ON COLUMN public.benchmark_run.val_min + IS 'The smallest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q1 + IS 'The first quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q3 + IS 'The third quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_max + IS 'The largest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.std_dev + IS 'The standard deviation of the run values for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.n_obs + IS 'The number of observations for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.run_metadata + IS 'Additional metadata of interest, as a JSON object. ' + 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.'; +COMMENT ON COLUMN public.benchmark_run.run_notes + IS 'Additional notes of interest, as a text string. '; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative + CHECK (std_dev >= 0); + +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_n_obs_positive + CHECK (n_obs > 0); + +CREATE INDEX benchmark_run_index_on_environment_id + ON public.benchmark_run(environment_id); + +CREATE INDEX benchmark_run_index_on_machine_id + ON public.benchmark_run(machine_id); + +CREATE INDEX benchmark_run_index_on_benchmark_id + ON public.benchmark_run(benchmark_id, benchmark_language_id); + +CREATE INDEX benchmark_run_index_on_benchmark_environment_time + ON public.benchmark_run + (benchmark_id, environment_id, git_commit_timestamp); +COMMENT ON INDEX + public.benchmark_run_index_on_benchmark_environment_time + IS 'Index to improve sorting by benchmark, environment, and timestamp.'; + +CREATE UNIQUE INDEX + benchmark_run_unique_index_on_env_benchmark_timestamp_params + ON public.benchmark_run + (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp); +COMMENT ON INDEX + public.benchmark_run_unique_index_on_env_benchmark_timestamp_params + IS 'Enforce uniqueness of benchmark run for a given machine, ' + 'environment, benchmark, git commit timestamp, and parameter values.'; diff --git a/src/arrow/dev/benchmarking/ddl/2_00_views.sql b/src/arrow/dev/benchmarking/ddl/2_00_views.sql new file mode 100644 index 000000000..cbd295e50 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/2_00_views.sql @@ -0,0 +1,324 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +-- NOTE: +-- The function for documentation depends on view columns +-- being named exactly the same as in the table view. + +-- MACHINE_VIEW +CREATE OR REPLACE VIEW public.machine_view AS + SELECT + machine.machine_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + FROM public.machine AS machine + JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id + JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id + JOIN public.os AS os ON machine.os_id = os.os_id; +COMMENT ON VIEW public.machine_view IS +E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n' + '- "mac_address" is unique in the "machine" table\n' + '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n' + ' Empty string (''''), not null, is used for machines that won''t use the GPU\n' + '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n' + '- "os_name", "os_architecture_name", and "os_kernel_name"\n' + ' are unique in the "os" (operating system) table\n' + '- "machine_other_attributes" is a key-value store for any other relevant\n' + ' data, e.g. ''{"hard_disk_type": "solid state"}'''; + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW +CREATE OR REPLACE VIEW public.language_implementation_version_view AS + SELECT + lv.language_implementation_version_id + , bl.benchmark_language + , lv.language_implementation_version + FROM public.language_implementation_version AS lv + JOIN public.benchmark_language AS bl + ON lv.benchmark_language_id = bl.benchmark_language_id; + +-- ENVIRONMENT_VIEW +CREATE OR REPLACE VIEW public.environment_view AS + SELECT + env.environment_id + , benchmark_language + , language_implementation_version + , dependencies + FROM public.environment AS env + JOIN public.benchmark_language AS language + ON env.benchmark_language_id = language.benchmark_language_id + JOIN public.language_implementation_version AS version + ON env.language_implementation_version_id = version.language_implementation_version_id + JOIN public.dependencies AS deps + ON env.dependencies_id = deps.dependencies_id; +COMMENT ON VIEW public.environment_view IS +E'The build environment used for a reported benchmark run.\n' + '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "language_implementation_version", "dependencies")\n' + '- "benchmark_language" is unique in the "benchmark_language" table\n' + '- "benchmark_language" plus "language_implementation_version" is unique in\n' + ' the "language_implementation_version" table\n' + '- "dependencies" is unique in the "dependencies" table'; + +-- UNIT_VIEW +CREATE OR REPLACE VIEW public.unit_view AS + SELECT + unit.unit_id + , units + , benchmark_type + , lessisbetter + FROM public.unit AS unit + JOIN public.benchmark_type AS bt + ON unit.benchmark_type_id = bt.benchmark_type_id; + +-- BENCHMARK_VIEW +CREATE OR REPLACE VIEW public.benchmark_view AS + SELECT + b.benchmark_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + , benchmark_language + FROM public.benchmark AS b + JOIN public.benchmark_language AS benchmark_language + ON b.benchmark_language_id = benchmark_language.benchmark_language_id + JOIN public.unit AS unit + ON b.unit_id = unit.unit_id + JOIN public.benchmark_type AS benchmark_type + ON unit.benchmark_type_id = benchmark_type.benchmark_type_id; +COMMENT ON VIEW public.benchmark_view IS +E'The details about a particular benchmark.\n\n' + '- "benchmark_name" is unique for a given "benchmark_language"\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "benchmark_name", "benchmark_version")'; + +-- BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view (name, version, language only) + , benchmark_name + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view (mac address only) + , mac_address + -- environment_view + , env.benchmark_language + , language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; +COMMENT ON VIEW public.benchmark_run_view IS +E'Each benchmark run.\n\n' + '- Each entry is unique on the machine, environment, benchmark,\n' + ' and git commit timestamp.'; + +-- FULL_BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.full_benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + -- environment_view + , env.benchmark_language + , env.language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; + +-- SUMMARIZED_TABLES_VIEW +CREATE VIEW public.summarized_tables_view AS + WITH chosen AS ( + SELECT + cls.oid AS id + , cls.relname as tbl_name + FROM pg_catalog.pg_class AS cls + JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid + WHERE + cls.relkind = 'r' + AND ns.nspname = 'public' + ), all_constraints AS ( + SELECT + chosen.id AS tbl_id + , chosen.tbl_name + , unnest(conkey) AS col_id + , 'foreign key' AS col_constraint + FROM pg_catalog.pg_constraint + JOIN chosen ON chosen.id = conrelid + WHERE contype = 'f' + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'unique' + FROM pg_catalog.pg_index i + JOIN chosen ON chosen.id = i.indrelid + WHERE i.indisunique AND NOT i.indisprimary + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'primary key' + FROM pg_catalog.pg_index i + JOIN chosen on chosen.id = i.indrelid + WHERE i.indisprimary + ), gathered_constraints AS ( + SELECT + tbl_id + , tbl_name + , col_id + , string_agg(col_constraint, ', ' ORDER BY col_constraint) + AS col_constraint + FROM all_constraints + GROUP BY tbl_id, tbl_name, col_id + ) + SELECT + chosen.tbl_name AS table_name + , columns.attnum AS column_number + , columns.attname AS column_name + , typ.typname AS type_name + , CASE + WHEN columns.attnotnull + THEN 'not null' + ELSE '' + END AS nullable + , CASE + WHEN defaults.adsrc like 'nextval%' + THEN 'serial' + ELSE defaults.adsrc + END AS default_value + , CASE + WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL + THEN cnstrnt.consrc + WHEN cnstrnt.consrc IS NULL + THEN gc.col_constraint + ELSE gc.col_constraint || ', ' || cnstrnt.consrc + END AS description + FROM pg_catalog.pg_attribute AS columns + JOIN chosen ON columns.attrelid = chosen.id + JOIN pg_catalog.pg_type AS typ + ON typ.oid = columns.atttypid + LEFT JOIN gathered_constraints AS gc + ON gc.col_id = columns.attnum + AND gc.tbl_id = columns.attrelid + LEFT JOIN pg_attrdef AS defaults + ON defaults.adrelid = chosen.id + AND defaults.adnum = columns.attnum + LEFT JOIN pg_catalog.pg_constraint AS cnstrnt + ON cnstrnt.conrelid = columns.attrelid + AND columns.attrelid = ANY(cnstrnt.conkey) + WHERE + columns.attnum > 0 + ORDER BY table_name, column_number; +COMMENT ON VIEW public.summarized_tables_view + IS 'A summary of all columns from all tables in the public schema, ' + ' identifying nullability, primary/foreign keys, and data type.'; diff --git a/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql new file mode 100644 index 000000000..b10b69a4e --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql @@ -0,0 +1,643 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT_DETAILS +CREATE TYPE public.type_project_details AS ( + project_name text + , project_url text + , repo_url text +); + +CREATE OR REPLACE FUNCTION public.project_details() +RETURNS public.type_project_details AS +$$ + SELECT project_name, project_url, repo_url + FROM public.project + ORDER BY last_changed DESC + LIMIT 1 +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.project_details() +IS 'Get the current project name, url, and repo url.'; + + +-------------------------- GET-OR-SET FUNCTIONS -------------------------- +-- The following functions have the naming convention "get_<tablename>_id". +-- All of them attempt to SELECT the desired row given the column +-- values, and if it does not exist will INSERT it. +-- +-- When functions are overloaded with fewer columns, it is to allow +-- selection only, given columns that comprise a unique index. + +-- GET_CPU_ID +CREATE OR REPLACE FUNCTION public.get_cpu_id( + cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , cpu_L1d_cache_bytes integer + , cpu_L1i_cache_bytes integer + , cpu_L2_cache_bytes integer + , cpu_L3_cache_bytes integer +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT cpu_id INTO result FROM public.cpu AS cpu + WHERE cpu.cpu_model_name = $1 + AND cpu.cpu_core_count = $2 + AND cpu.cpu_thread_count = $3 + AND cpu.cpu_frequency_max_Hz = $4 + AND cpu.cpu_frequency_min_Hz = $5 + AND cpu.cpu_L1d_cache_bytes = $6 + AND cpu.cpu_L1i_cache_bytes = $7 + AND cpu.cpu_L2_cache_bytes = $8 + AND cpu.cpu_L3_cache_bytes = $9; + + IF result IS NULL THEN + INSERT INTO public.cpu( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + RETURNING cpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_cpu_id( + citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer +) +IS 'Insert or select CPU data, returning "cpu.cpu_id".'; + +-- GET_GPU_ID +CREATE OR REPLACE FUNCTION public.get_gpu_id( + gpu_information citext DEFAULT NULL + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT gpu_id INTO result FROM public.gpu AS gpu + WHERE + gpu.gpu_information = COALESCE($1, '') + AND gpu.gpu_part_number = COALESCE($2, '') + AND gpu.gpu_product_name = COALESCE($3, ''); + + IF result IS NULL THEN + INSERT INTO public.gpu( + gpu_information + , gpu_part_number + , gpu_product_name + ) + VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, '')) + RETURNING gpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext) +IS 'Insert or select GPU data, returning "gpu.gpu_id".'; + +-- GET_OS_ID +CREATE OR REPLACE FUNCTION public.get_os_id( + os_name citext + , architecture_name citext + , kernel_name citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT os_id INTO result FROM public.os AS os + WHERE os.os_name = $1 + AND os.architecture_name = $2 + AND os.kernel_name = COALESCE($3, ''); + + IF result is NULL THEN + INSERT INTO public.os(os_name, architecture_name, kernel_name) + VALUES ($1, $2, COALESCE($3, '')) + RETURNING os_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_os_id(citext, citext, citext) +IS 'Insert or select OS data, returning "os.os_id".'; + +-- GET_MACHINE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_machine_id( + mac_address macaddr + , machine_name citext + , memory_bytes bigint + , cpu_actual_frequency_Hz bigint + -- os + , os_name citext + , architecture_name citext + , kernel_name citext + -- cpu + , cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , L1d_cache_bytes integer + , L1i_cache_bytes integer + , L2_cache_bytes integer + , L3_cache_bytes integer + -- gpu + , gpu_information citext DEFAULT '' + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL + -- nullable machine attributes + , machine_other_attributes jsonb DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + -- Can't bypass looking up all the values because of unique constraint. + SELECT public.get_cpu_id( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , L1d_cache_bytes + , L1i_cache_bytes + , L2_cache_bytes + , L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + gpu_information + , gpu_part_number + , gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + os_name + , architecture_name + , kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.mac_address = $1 + AND m.machine_name = $2 + AND m.memory_bytes = $3 + AND m.cpu_actual_frequency_Hz = $4; + + IF result IS NULL THEN + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20) + RETURNING machine_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_machine_id( + macaddr + , citext + , bigint -- memory_bytes + , bigint -- cpu_frequency_actual_Hz + -- os + , citext + , citext + , citext + -- cpu + , citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer + -- gpu + , citext + , citext + , citext + -- nullable machine attributes + , jsonb +) +IS 'Insert or select machine data, returning "machine.machine_id".'; + +-- GET_MACHINE_ID (given unique mac_address) +CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr) +RETURNS integer AS +$$ + SELECT machine_id FROM public.machine AS m + WHERE m.mac_address = $1; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_machine_id(macaddr) +IS 'Select machine_id given its mac address, returning "machine.machine_id".'; + +-- GET_BENCHMARK_LANGUAGE_ID +CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_language_id INTO result + FROM public.benchmark_language AS bl + WHERE bl.benchmark_language = language; + + IF result IS NULL THEN + INSERT INTO public.benchmark_language(benchmark_language) + VALUES (language) + RETURNING benchmark_language_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_language_id(citext) +IS 'Insert or select benchmark_language returning ' + '"benchmark_language.benchmark_language_id".'; + +-- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + language citext + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext) +IS 'Insert or select language and version data, ' + 'returning "language_implementation_version.language_implementation_version_id".'; + +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + -- overload for when language_id is known + language_id integer + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; + +-- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID +CREATE OR REPLACE FUNCTION public.get_dependencies_id( + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT dependencies_id INTO result + FROM public.dependencies AS ldl + WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb); + + IF result IS NULL THEN + INSERT INTO + public.dependencies(dependencies) + VALUES (COALESCE($1, '{}'::jsonb)) + RETURNING dependencies_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_dependencies_id(jsonb) +IS 'Insert or select dependencies, returning "dependencies.dependencies_id".'; + +-- GET_ENVIRONMENT_ID +CREATE OR REPLACE FUNCTION public.get_environment_id( + language citext, + language_implementation_version citext DEFAULT '', + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO found_language_id; + SELECT + public.get_language_implementation_version_id(found_language_id, $2) + INTO found_version_id; + SELECT + public.get_dependencies_id ($3) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NULL THEN + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb) +IS 'Insert or select language, language version, and dependencies, ' + 'returning "environment.environment_id".'; + +-- GET_BENCHMARK_TYPE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1 + AND bt.lessisbetter = $2; + + IF result IS NULL THEN + INSERT INTO public.benchmark_type(benchmark_type, lessisbetter) + VALUES($1, $2) + RETURNING benchmark_type_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean) +IS 'Insert or select benchmark type and lessisbetter, ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext) +IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_UNIT_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_unit_id( + benchmark_type citext + , units citext + , lessisbetter boolean DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + + IF ($3 IS NOT NULL) -- if lessisbetter is not null + THEN + SELECT public.get_benchmark_type_id($1, $3) + INTO found_benchmark_type_id; + ELSE + SELECT public.get_benchmark_type_id($1) + INTO found_benchmark_type_id; + END IF; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = $2; + + IF result IS NULL THEN + INSERT INTO public.unit(benchmark_type_id, units) + VALUES(found_benchmark_type_id, $2) + RETURNING unit_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean) +IS 'Insert or select benchmark type (e.g. ''time''), ' + 'units string (e.g. ''miliseconds''), ' + 'and "lessisbetter" (true if smaller benchmark values are better), ' + 'returning "unit.unit_id".'; + +-- GET_UNIT_ID (given unique units string only) +CREATE OR REPLACE FUNCTION public.get_unit_id(units citext) +RETURNS integer AS +$$ + SELECT unit_id FROM public.unit AS u + WHERE u.units = units; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_unit_id(citext) +IS 'Select unit_id given unit name, returning "unit.unit_id".'; + +-- GET_BENCHMARK_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , parameter_names text[] + , benchmark_description text + , benchmark_version citext + , benchmark_type citext + , units citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_language_id integer; + found_unit_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id( + benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id( + benchmark_type + , units + , lessisbetter + ) INTO found_unit_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = $2 + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM $3 + AND b.benchmark_description = $4 + AND b.benchmark_version = $5 + AND b.unit_id = found_unit_id; + + IF result IS NULL THEN + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id) + RETURNING benchmark_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_id( + citext + , citext + , text[] + , text + , citext + , citext + , citext + , boolean +) +IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".'; + +-- GET_BENCHMARK_ID (by unique columns) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , benchmark_version citext +) +RETURNS integer AS +$$ + WITH language AS ( + SELECT public.get_benchmark_language_id(benchmark_language) AS id + ) + SELECT b.benchmark_id + FROM public.benchmark AS b + JOIN language ON b.benchmark_language_id = language.id + WHERE b.benchmark_name = benchmark_name + AND benchmark_version = benchmark_version +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext) +IS 'Select existing benchmark given unique columns, ' + 'returning "benchmark.benchmark_id".'; diff --git a/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql new file mode 100644 index 000000000..b6ce4741a --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql @@ -0,0 +1,574 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- TRIGGER FUNCTIONS -------------------------- +-- Views that do not select from a single table or view are not +-- automatically updatable. These trigger functions are intended +-- to be run instead of INSERT into the complicated views. + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = NEW.language_implementation_version; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.language_implementation_version( + benchmark_language_id + , language_implementation_version + ) + VALUES (language_id, NEW.language_implementation_version) + RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- ENVIRONMENT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.environment_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL + THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO found_language_id; + + SELECT public.get_language_implementation_version_id( + found_language_id + , NEW.language_implementation_version + ) + INTO found_version_id; + + SELECT public.get_dependencies_id(NEW.dependencies) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO NEW.environment_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- MACHINE_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.machine_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + IF ( + NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + ) + THEN + RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. ' + 'all columns in table "gpu" will default to the empty string '''', ' + 'as will blank "os.kernel_name". This is to allow uniqueness ' + 'constraints to work. Thank you!.'; + END IF; + + SELECT public.get_cpu_id( + NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.machine_name = NEW.machine_name + AND m.memory_bytes = NEW.memory_bytes + AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES ( + found_os_id + , found_cpu_id + , found_gpu_id + , NEW.machine_name + , NEW.mac_address + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + , NEW.machine_other_attributes + ) + RETURNING machine_id INTO NEW.machine_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- UNIT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.unit_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL) + THEN + RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n' + 'Further, if the "benchmark_type" has never been defined, ' + '"lessisbetter" must be defined or there will be an error.'; + END IF; + + -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists. + SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter) + INTO found_benchmark_type_id; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = NEW.units; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.unit ( + benchmark_type_id + , units + ) + VALUES ( + found_benchmark_type_id + , NEW.units + ) + RETURNING unit_id INTO NEW.unit_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_language_id integer; + found_units_id integer; + result integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.benchmark_type IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.units IS NULL + ) + THEN + RAISE EXCEPTION 'The only nullable column in this view is ' + '"benchmark.parameter_names".'; + END IF; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id(NEW.units) INTO found_units_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = NEW.benchmark_name + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names + AND b.benchmark_description = NEW.benchmark_description + AND b.benchmark_version = NEW.benchmark_version + AND b.unit_id = found_units_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES ( + found_benchmark_language_id + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , found_units_id + ) + RETURNING benchmark_id INTO NEW.benchmark_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.value IS NULL + OR NEW.run_timestamp IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.git_hash IS NULL + OR NEW.language_implementation_version IS NULL + OR NEW.mac_address IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.benchmark_version + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + COALESCE(NEW.parameter_values, '{}'::jsonb) + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.value IS NULL + OR NEW.git_hash IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.run_timestamp IS NULL + -- benchmark + OR NEW.benchmark_name IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + -- unit + OR NEW.benchmark_type IS NULL + OR NEW.units IS NULL + OR NEW.lessisbetter IS NULL + -- machine + OR NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + OR NEW.kernel_name IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"machine_other_attributes", "parameter_names", "val_min", ' + '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". ' + 'If "gpu_information", "gpu_part_number", "gpu_product_name", or ' + '"kernel_name" are null, they will be silently turned into an ' + 'empty string ('''').'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , NEW.benchmark_type + , NEW.units + , NEW.lessisbetter + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + , NEW.machine_name + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + -- os + , NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + -- cpu + , NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + -- gpu + , NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + -- nullable machine attributes + , NEW.machine_other_attributes + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + NEW.parameter_values + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; diff --git a/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql new file mode 100644 index 000000000..000c61d00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql @@ -0,0 +1,323 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- IMPORT HELPERS -------------------------- +-- Load from JSON (from https://stackoverflow.com/a/48396608) +-- How to use it in the psql client: +-- \set content `cat /examples/machine.json` +-- select ingest_machine(:'content'::jsonb); +-- INGEST_MACHINE_VIEW +CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + INSERT INTO public.machine_view + SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb) + RETURNING machine_id INTO result; + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' {\n' + ' "mac_address": "0a:00:2d:01:02:03",\n' + ' "machine_name": "Yet-Another-Machine-Name",\n' + ' "memory_bytes": 8589934592,\n' + ' "cpu_actual_frequency_hz": 2300000000,\n' + ' "os_name": "OSX",\n' + ' "architecture_name": "x86_64",\n' + ' "kernel_name": "18.2.0",\n' + ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n' + ' "cpu_core_count": 2,\n' + ' "cpu_thread_count": 4,\n' + ' "cpu_frequency_max_hz": 2300000000,\n' + ' "cpu_frequency_min_hz": 2300000000,\n' + ' "cpu_l1d_cache_bytes": 32768,\n' + ' "cpu_l1i_cache_bytes": 32768,\n' + ' "cpu_l2_cache_bytes": 262144,\n' + ' "cpu_l3_cache_bytes": 4194304,\n' + ' "machine_other_attributes": {"just": "an example"},\n' + ' "gpu_information": "",\n' + ' "gpu_part_number": "",\n' + ' "gpu_product_name": ""\n' + ' }\n\n' + 'To identify which columns in "machine_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb) +RETURNS setof integer AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_view + SELECT * FROM jsonb_populate_recordset( + null::public.benchmark_view + , from_jsonb + ) + RETURNING benchmark_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 1",\n' + ' "parameter_names": ["arg0", "arg1", "arg2"],\n' + ' "benchmark_description": "First benchmark",\n' + ' "benchmark_type": "Time",\n' + ' "units": "miliseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "parameter_names": ["arg0", "arg1"],\n' + ' "benchmark_description": "Description 2.",\n' + ' "benchmark_type": "Time",\n' + ' "units": "nanoseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' }\n' + ' ]\n\n' + 'To identify which columns in "benchmark_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUN_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_run_view + SELECT * FROM + jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb) + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 100, "arg1": 5},\n' + ' "value": 2.5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "val_min": 1,\n' + ' "val_q1": 2,\n' + ' "val_q3": 3,\n' + ' "val_max": 4,\n' + ' "std_dev": 1.41,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 1000, "arg1": 5},\n' + ' "value": 5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "std_dev": 3.14,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' }\n' + ' ]\n' + 'To identify which columns in "benchmark_run_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUNS_WITH_CONTEXT +CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + DECLARE + context_jsonb jsonb; + found_environment_id integer; + found_machine_id integer; + BEGIN + SELECT from_jsonb -> 'context' INTO context_jsonb; + + SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr) + INTO found_machine_id; + + SELECT get_environment_id( + (context_jsonb ->> 'benchmark_language')::citext + , (context_jsonb ->> 'language_implementation_version')::citext + , context_jsonb -> 'dependencies' + ) INTO found_environment_id; + + RETURN QUERY + WITH run_datum AS ( + SELECT * + FROM jsonb_to_recordset(from_jsonb -> 'benchmarks') + AS x( + benchmark_name citext + , parameter_values jsonb + , value numeric + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric + , n_obs integer + , run_timestamp timestamp (0) with time zone + , run_metadata jsonb + , run_notes text + ) + ), benchmark_name_and_id AS ( + SELECT + key AS benchmark_name + , public.get_benchmark_id( + (context_jsonb ->> 'benchmark_language')::citext + , key::citext -- benchmark_name + , value::citext -- benchmark_version + ) AS benchmark_id + FROM jsonb_each_text(from_jsonb -> 'benchmark_version') + ) + INSERT INTO public.benchmark_run ( + benchmark_id + -- run_datum + , parameter_values + , value + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_metadata + , run_notes + -- additional context information + , git_commit_timestamp + , git_hash + , run_timestamp + -- machine + , machine_id + -- environment + , environment_id + , language_implementation_version_id + , benchmark_language_id + ) + SELECT + b.benchmark_id + -- run_datum + , run_datum.parameter_values + , run_datum.value + , run_datum.val_min + , run_datum.val_q1 + , run_datum.val_q3 + , run_datum.val_max + , run_datum.std_dev + , run_datum.n_obs + , run_datum.run_metadata + , run_datum.run_notes + -- additional context information + , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone + , context_jsonb ->> 'git_hash' + , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone + -- machine + , found_machine_id + -- environment + , e.environment_id + , e.language_implementation_version_id + , e.benchmark_language_id + FROM run_datum + JOIN public.environment AS e + ON e.environment_id = found_environment_id + JOIN benchmark_name_and_id AS b + ON b.benchmark_name = run_datum.benchmark_name + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'The object contains three key-value pairs::\n\n' + ' {"context": {\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 3.6",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n' + ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n' + ' "git_hash": "123456789abcde",\n' + ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n' + ' "extra stuff": "does not hurt anything and will not be added."\n' + ' },\n' + ' "benchmark_version": {\n' + ' "Benchmark Name 1": "Any string can be a version.",\n' + ' "Benchmark Name 2": "A git hash can be a version.",\n' + ' "An Unused Benchmark Name": "Will be ignored."\n' + ' },\n' + ' "benchmarks": [\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 1",\n' + ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n' + ' "value": 42,\n' + ' "val_min": 41.2,\n' + ' "val_q1": 41.5,\n' + ' "val_q3": 42.5,\n' + ' "val_max": 42.8,\n' + ' "std_dev": 0.5,\n' + ' "n_obs": 100,\n' + ' "run_metadata": {"any": "key-value pairs"},\n' + ' "run_notes": "Any relevant notes."\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 2",\n' + ' "parameter_values": {"not nullable": "Use {} if no params."},\n' + ' "value": 8,\n' + ' "std_dev": 1,\n' + ' "n_obs": 2,\n' + ' }\n' + ' ]\n' + ' }\n\n' + '- The entry for "context" contains the machine, environment, and timestamp\n' + ' information common to all of the runs\n' + '- The entry for "benchmark_version" maps benchmark\n' + ' names to their version strings. (Which can be a git hash,\n' + ' the entire code string, a number, or any other string of your choice.)\n' + '- The entry for "benchmarks" is a list of benchmark run data\n' + ' for the given context and benchmark versions. The first example\n' + ' benchmark run entry contains all possible values, even\n' + ' nullable ones, and the second entry omits all nullable values.\n\n'; diff --git a/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql new file mode 100644 index 000000000..6b2a05790 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql @@ -0,0 +1,395 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- _DOCUMENTATION_INGESTION +CREATE OR REPLACE FUNCTION public._documentation_ingestion() +RETURNS text AS +$$ + WITH ingestion_docs AS ( + SELECT + proname || E'\n' + || rpad('', character_length(proname), '-') + || E'\n\n:code:`' + || proname || '(' + || string_agg(a.argname || ' ' || typname , ', ') + || E')`\n\n' + || description + || E'\n\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS docs + FROM pg_catalog.pg_proc + JOIN pg_catalog.pg_namespace + ON nspname='public' + AND pg_namespace.oid = pronamespace + AND proname LIKE '%ingest%' + JOIN pg_catalog.pg_description + ON pg_description.objoid=pg_proc.oid, + LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype) + JOIN pg_catalog.pg_type + ON pg_type.oid = a.argtype + GROUP BY proname, description + ) + SELECT + string_agg(docs, E'\n\n') AS docs + FROM ingestion_docs; +$$ +LANGUAGE sql STABLE; + +-- _DOCUMENTATION_VIEW_DETAILS +CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext) +RETURNS TABLE( + column_name name + , type_name name + , nullable text + , default_value text + , description text +) AS +$$ + WITH view_columns AS ( + SELECT + attname AS column_name + , attnum AS column_order + FROM pg_catalog.pg_attribute + WHERE attrelid=view_name::regclass + ) + SELECT + t.column_name + , type_name + , coalesce(nullable, '') + , coalesce(default_value, '') + , coalesce(description, '') + FROM public.summarized_tables_view AS t + JOIN view_columns AS v ON v.column_name = t.column_name + WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id' + ORDER BY column_order; +$$ +LANGUAGE sql STABLE; + + +-- _DOCUMENTATION_VIEW_PIECES +CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext) +RETURNS TABLE (rst_formatted text) +AS +$$ +DECLARE + column_length integer; + type_length integer; + nullable_length integer; + default_length integer; + description_length integer; + sep text; + border text; +BEGIN + + -- All of the hard-coded constants here are the string length of the table + -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description' + SELECT greatest(6, max(character_length(column_name))) + FROM public._documentation_view_details(view_name) INTO column_length; + + SELECT greatest(4, max(character_length(type_name))) + FROM public._documentation_view_details(view_name) INTO type_length; + + SELECT greatest(8, max(character_length(nullable))) + FROM public._documentation_view_details(view_name) INTO nullable_length; + + SELECT greatest(7, max(character_length(default_value))) + FROM public._documentation_view_details(view_name) INTO default_length; + + SELECT greatest(11, max(character_length(description))) + FROM public._documentation_view_details(view_name) INTO description_length; + + SELECT ' ' INTO sep; + + SELECT + concat_ws(sep + , rpad('', column_length, '=') + , rpad('', type_length, '=') + , rpad('', nullable_length, '=') + , rpad('', default_length, '=') + , rpad('', description_length, '=') + ) + INTO border; + + RETURN QUERY + SELECT + border + UNION ALL + SELECT + concat_ws(sep + , rpad('Column', column_length, ' ') + , rpad('Type', type_length, ' ') + , rpad('Nullable', nullable_length, ' ') + , rpad('Default', default_length, ' ') + , rpad('Description', description_length, ' ') + ) + UNION ALL + SELECT border + UNION ALL + SELECT + concat_ws(sep + , rpad(v.column_name, column_length, ' ') + , rpad(v.type_name, type_length, ' ') + , rpad(v.nullable, nullable_length, ' ') + , rpad(v.default_value, default_length, ' ') + , rpad(v.description, description_length, ' ') + ) + FROM public._documentation_view_details(view_name) AS v + UNION ALL + SELECT border; + +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_FOR +CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext) +RETURNS text AS +$$ + DECLARE + view_description text; + view_table_markup text; + BEGIN + SELECT description FROM pg_catalog.pg_description + WHERE pg_description.objoid = view_name::regclass + INTO view_description; + + SELECT + view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' || + view_description || E'\n\n' || + string_agg(rst_formatted, E'\n') + INTO view_table_markup + FROM public._documentation_view_pieces(view_name); + + RETURN view_table_markup; + END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_for(citext) +IS E'Create an ".rst"-formatted table describing a specific view.\n' + 'Example: SELECT public.documentation_for(''endpoint'');'; + + +-- DOCUMENTATION +CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text) +RETURNS TABLE (full_text text) AS +$$ + WITH v AS ( + SELECT + public.documentation_for(relname::citext) + || E'\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS view_documentation + FROM pg_catalog.pg_trigger + JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid + WHERE NOT tgisinternal + ) + SELECT + E'\n.. _benchmark-data-model:\n\n' + 'Benchmark data model\n' + '====================\n\n\n' + '.. graphviz:: ' + || dotfile_name + || E'\n\n\n.. _benchmark-ingestion:\n\n' + 'Benchmark ingestion helper functions\n' + '====================================\n\n' + || public._documentation_ingestion() + || E'\n\n\n.. _benchmark-views:\n\n' + 'Benchmark views\n' + '===============\n\n\n' + || string_agg(v.view_documentation, E'\n') + FROM v + GROUP BY True; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.documentation(text) +IS E'Create an ".rst"-formatted file that shows the columns in ' + 'every insertable view in the "public" schema.\n' + 'The text argument is the name of the generated dotfile to be included.\n' + 'Example: SELECT public.documentation(''data_model.dot'');'; + + +-- _DOCUMENTATION_DOTFILE_NODE_FOR +CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name) +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH node AS ( + SELECT + tablename::text AS lines + UNION ALL + SELECT + E'[label = \n' + ' <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">' + UNION ALL + -- table name + SELECT + ' <tr><td border="0"><font point-size="14">' + || tablename + || '</font></td></tr>' + UNION ALL + -- primary keys + SELECT + ' <tr><td port="' || column_name || '"><b>' + || column_name + || ' (pk)</b></td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%primary key%' + UNION ALL + -- columns + SELECT + ' <tr><td>' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END + || '</td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND (description IS NULL OR description not like '%key%') + UNION ALL + -- foreign keys + SELECT + ' <tr><td port="' || column_name || '">' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || ' (fk) </td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%foreign key%' + AND description NOT LIKE '%primary key%' + UNION ALL + SELECT + E' </table>>\n];' + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM node; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- _DOCUMENTATION_DOTFILE_EDGES +CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges() +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH relationship AS ( + SELECT + conrelid AS fk_table_id + , confrelid AS pk_table_id + , unnest(conkey) AS fk_colnum + , unnest(confkey) AS pk_colnum + FROM pg_catalog.pg_constraint + WHERE confkey IS NOT NULL + AND connamespace='public'::regnamespace + ), all_edges AS ( + SELECT + fk_tbl.relname || ':' || fk_col.attname + || ' -> ' + || pk_tbl.relname || ':' || pk_col.attname + || ';' AS lines + FROM relationship + -- foreign key table + column + JOIN pg_catalog.pg_attribute AS fk_col + ON fk_col.attrelid = relationship.fk_table_id + AND fk_col.attnum = relationship.fk_colnum + JOIN pg_catalog.pg_class AS fk_tbl + ON fk_tbl.oid = relationship.fk_table_id + -- primary key table + column + JOIN pg_catalog.pg_attribute AS pk_col + ON pk_col.attrelid = relationship.pk_table_id + AND pk_col.attnum = relationship.pk_colnum + JOIN pg_catalog.pg_class AS pk_tbl + ON pk_tbl.oid = relationship.pk_table_id + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM all_edges; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_DOTFILE +CREATE OR REPLACE FUNCTION public.documentation_dotfile() +RETURNS text AS +$$ +DECLARE + schemaname name := 'public'; + result text; +BEGIN + WITH file_contents AS ( + SELECT + E'digraph database {\n concentrate = true;\n' + ' rankdir = LR;\n' + ' ratio = ".75";\n' + ' node [shape = none, fontsize="11", fontname="Helvetica"];\n' + ' edge [fontsize="8", fontname="Helvetica"];' + AS lines + UNION ALL + SELECT + E'legend\n[fontsize = "14"\nlabel =\n' + '<<table border="0" cellpadding="0">\n' + ' <tr><td align="left"><font point-size="16">Legend</font></td></tr>\n' + ' <tr><td align="left">pk = primary key</td></tr>\n' + ' <tr><td align="left">fk = foreign key</td></tr>\n' + ' <tr><td align="left">u = unique*</td></tr>\n' + ' <tr><td align="left">o = optional</td></tr>\n' + ' <tr><td align="left">' + '* multiple uniques in the same table are a unique group</td></tr>\n' + '</table>>\n];' + UNION ALL + SELECT + string_agg( + public._documentation_dotfile_node_for(relname), + E'\n' -- Forcing the 'env' table to the end makes a better image + ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END) + ) + FROM pg_catalog.pg_class + WHERE relkind='r' AND relnamespace = schemaname::regnamespace + UNION ALL + SELECT + public._documentation_dotfile_edges() + UNION ALL + SELECT + '}' + ) + SELECT + string_agg(lines, E'\n') AS dotfile + INTO result + FROM file_contents; + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_dotfile() +IS E'Create a Graphviz dotfile of the data model: ' + 'every table in the "public" schema.\n' + 'Example: SELECT public.documentation_dotfile();'; diff --git a/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql new file mode 100644 index 000000000..5fb0e5018 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql @@ -0,0 +1,61 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT +CREATE TRIGGER language_implementation_version_view_trigger_insert + INSTEAD OF INSERT ON public.language_implementation_version_view + FOR EACH ROW + EXECUTE FUNCTION public.language_implementation_version_view_insert_row(); + +-- ENVIRONMENT_VIEW_TRIGGER_INSERT +CREATE TRIGGER environment_view_trigger_insert + INSTEAD OF INSERT ON public.environment_view + FOR EACH ROW + EXECUTE FUNCTION public.environment_view_insert_row(); + +-- MACHINE_VIEW_TRIGGER_INSERT +CREATE TRIGGER machine_view_trigger_insert + INSTEAD OF INSERT ON public.machine_view + FOR EACH ROW + EXECUTE FUNCTION public.machine_view_insert_row(); + +-- UNIT_VIEW_TRIGGER_INSERT +CREATE TRIGGER unit_view_trigger_insert + INSTEAD OF INSERT ON public.unit_view + FOR EACH ROW + EXECUTE FUNCTION public.unit_view_insert_row(); + +-- BENCHMARK_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_view_insert_row(); + +-- BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_run_view_insert_row(); + +-- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER full_benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.full_benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.full_benchmark_run_view_insert_row(); diff --git a/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql new file mode 100644 index 000000000..dd72c40db --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql @@ -0,0 +1,73 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ +---------------------------- ROLES ---------------------------- +-- ARROW_WEB +CREATE ROLE arrow_web login password 'arrow'; +COMMENT ON ROLE arrow_web IS 'Anonymous login user.'; + +-- ARROW_ADMIN +CREATE ROLE arrow_admin; +COMMENT ON ROLE arrow_admin + IS 'Can select, insert, update, and delete on all public tables.'; + +-- ARROW_ANONYMOUS +CREATE ROLE arrow_anonymous; +COMMENT ON ROLE arrow_anonymous + IS 'Can insert and select on all public tables.'; + +GRANT arrow_anonymous TO arrow_web; + + +---------------------------- PRIVILEGES ---------------------------- +GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin; + +-- ARROW_ADMIN +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public + TO arrow_admin; + +-- ARROW_ANONYMOUS +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous; +GRANT INSERT ON + public.benchmark + , public.benchmark_language + , public.dependencies + , public.language_implementation_version + , public.benchmark_run + , public.benchmark_type + , public.cpu + , public.environment + , public.environment_view + , public.gpu + , public.machine + , public.machine_view + , public.os + , public.unit + --, public.project -- The only disallowed table is `project`. + , public.benchmark_run_view + , public.benchmark_view + , public.environment_view + , public.full_benchmark_run_view + , public.language_implementation_version_view + , public.machine_view + , public.unit_view +TO arrow_anonymous; diff --git a/src/arrow/dev/benchmarking/docker-compose.yml b/src/arrow/dev/benchmarking/docker-compose.yml new file mode 100644 index 000000000..ca60206bf --- /dev/null +++ b/src/arrow/dev/benchmarking/docker-compose.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version: '3' +services: + + pg: + build: + context: . + dockerfile: Dockerfile + restart: always + ports: + - '5432:5432' + environment: + - POSTGRES_PASSWORD=${PG_PASS} + - POSTGRES_USER=${PG_USER} + + graphile: + image: graphile/postgraphile + restart: always + ports: + - 5000:5000 + depends_on: + - pg + command: + - --connection + - postgres://${PG_USER}:${PG_PASS}@pg:5432/${PG_USER} + - --schema + - public + - --watch diff --git a/src/arrow/dev/benchmarking/examples/benchmark_example.json b/src/arrow/dev/benchmarking/examples/benchmark_example.json new file mode 100644 index 000000000..d6f58c286 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_example.json @@ -0,0 +1,32 @@ +[ + { + "benchmark_name": "Benchmark 1", + "parameter_names": ["arg0", "arg1", "arg2"], + "benchmark_description": "First benchmark", + "benchmark_type": "Time", + "units": "miliseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 2", + "parameter_names": ["arg0", "arg1"], + "benchmark_description": "Description 2.", + "benchmark_type": "Time", + "units": "nanoseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 3", + "parameter_names": ["arg0"], + "benchmark_description": "Third benchmark", + "benchmark_type": "Memory", + "units": "kilobytes", + "lessisbetter": true, + "benchmark_version": "1", + "benchmark_language": "Python" + } +] diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv new file mode 100644 index 000000000..eab208a1c --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv @@ -0,0 +1,6 @@ +benchmark_run_id,benchmark_name,benchmark_version,parameter_values,value,git_commit_timestamp,git_hash,val_min,val_q1,val_q3,val_max,std_dev,n_obs,run_timestamp,run_metadata,run_notes,mac_address,benchmark_language,language_implementation_version,dependencies
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,3,4,1.41,8,2019-02-14 02:00:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,4,6,8,3.14,8,2019-02-14 02:01:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,0.5,1,3,5,3,8,2019-02-14 02:02:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,2.5,4,4.5,1.5,8,2019-02-14 02:03:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 10}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,4,5,2,8,2019-02-14 02:03:30 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.15"", ""other_lib"": ""1.0""}"
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.json b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json new file mode 100644 index 000000000..2ded776c9 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json @@ -0,0 +1,97 @@ +[ + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:05 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 2, + "val_q1": 4, + "val_q3": 6, + "val_max": 8, + "std_dev": 3.14, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:10 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 0.5, + "val_q1": 1, + "val_q3": 3, + "val_max": 5, + "std_dev": 3, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:20 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"boost": "1.42", "numpy": "1.15"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 3, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 2, + "val_q1": 2.5, + "val_q3": 4, + "val_max": 4.5, + "std_dev": 1.5, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:30 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"boost": "1.42", "numpy": "1.15"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 10}, + "value": 3, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 4, + "val_max": 5, + "std_dev": 2, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:40 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.15", "other_lib": "1.0"} + } +] diff --git a/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json new file mode 100644 index 000000000..f9e6e3130 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json @@ -0,0 +1,73 @@ +{ + "context": { + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, + "git_commit_timestamp": "2019-02-14 22:42:22 +0100", + "git_hash": "123456789abcde", + "run_timestamp": "2019-02-25 03:00:40 -0600", + "Extra stuff": "does not hurt anything and won't be added.", + "However": "all of the entries above 'Extra stuff' are required." + }, + "benchmark_version": { + "Benchmark 2": "version 0", + "Benchmark 3": "any string is a version. (Benchmark 3 not actually used)" + }, + "benchmarks": [ + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 1, "arg1": 5}, + "value": 2.5, + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_metadata": {"any": "json object is admissible"}, + "run_notes": "This value is an arbitrary-length string." + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 2, "arg1": 5}, + "value": 5, + "std_dev": 3.14, + "n_obs": 8 + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 3, "arg1": 5}, + "value": 2.5, + "val_min": 0.5, + "val_q1": 1, + "val_q3": 3, + "val_max": 5, + "std_dev": 3, + "n_obs": 8, + "run_notes": "The previous run in this list has the minimal set of keys." + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 4, "arg1": 5}, + "value": 3, + "val_min": 2, + "val_q1": 2.5, + "val_q3": 4, + "val_max": 4.5, + "std_dev": 1.5, + "n_obs": 8 + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 5, "arg1": 5}, + "value": 3, + "val_min": 1, + "val_q1": 2, + "val_q3": 4, + "val_max": 5, + "std_dev": 2, + "n_obs": 8 + } + ] +} diff --git a/src/arrow/dev/benchmarking/examples/example.sql b/src/arrow/dev/benchmarking/examples/example.sql new file mode 100644 index 000000000..e93269af7 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/example.sql @@ -0,0 +1,232 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- Example insert into each of the views: +INSERT INTO public.project(project_name, project_url, repo_url) +VALUES ( + 'Apache Arrow' + , 'https://arrow.apache.org/' + , 'https://github.com/apache/arrow'); + +INSERT INTO public.environment_view + (benchmark_language, language_implementation_version, dependencies) +VALUES + ('Python', 'CPython 2.7', '{"six": "", "numpy": "1.14", "other_lib": "1.0"}'), + ('Python', 'CPython 3.6', '{"boost": "1.42", "numpy": "1.15"}'); + +INSERT INTO public.dependencies(dependencies) +VALUES + ('{"boost": "1.68", "numpy": "1.14"}'), + ('{"boost": "1.42", "numpy": "1.16"}'); + +INSERT INTO public.language_implementation_version_view + (benchmark_language, language_implementation_version) +VALUES + ('Python', 'CPython 2.7'), + ('Python', 'CPython 3.6'); + +INSERT INTO public.unit_view + (benchmark_type, units, lessisbetter) +VALUES + ('Memory', 'gigabytes', True), + ('Memory', 'kilobytes', True); + + +\echo 'use \\dv to list the views views'; +\dv + + +SELECT * FROM environment_view; +SELECT * FROM unit_view; + + +INSERT INTO public.machine_view ( + mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_hz + , cpu_frequency_min_hz + , cpu_l1d_cache_bytes + , cpu_l1i_cache_bytes + , cpu_l2_cache_bytes + , cpu_l3_cache_bytes + , machine_other_attributes +) VALUES ( + '08:00:2b:01:02:03' -- mac_address + , 'My-Machine-Name' -- machine_name + , 8589934592 -- memory_bytes + -- All (?) standard mac address formats are allowable: + -- https://www.postgresql.org/docs/11/datatype-net-types.html + , 2300000000 -- cpu_actual_frequency_Hz + , 'OSX' -- os_name + , 'x86_64' -- architecture_name + , '18.2.0' -- kernel + , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' -- cpu_model_name + , 2 -- cpu_core_count + , 4 -- cpu_thread_count + , 2300000000 -- cpu_frequency_max_Hz + , 2300000000 -- cpu_frequency_min_Hz + , 32768 -- cpu_l1d_cache_bytes + , 32768 -- cpu_l1i_cache_bytes + , 262144 -- cpu_l2_cache_bytes + , 4194304 -- cpu_l3_cache_bytes + , '{"example": "for machine_other_attributes"}'::jsonb +); + + +INSERT INTO public.full_benchmark_run_view ( + benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_hz + , cpu_frequency_min_hz + , cpu_l1d_cache_bytes + , cpu_l1i_cache_bytes + , cpu_l2_cache_bytes + , cpu_l3_cache_bytes + , machine_other_attributes + -- environment_view + , benchmark_language + , language_implementation_version + , dependencies +) VALUES ( + 'Benchmark 3' + , '{"arg0"}'::text[] + , 'Third benchmark' + , 'Memory' + , 'kilobytes' + , TRUE + , '0' + -- datum + , '{"arg0": 10}'::jsonb + , 0.5 + , '2019-01-31 14:31:10 -0600' + , '8136c46d5c60fb' + , 0.5 + , 0.5 + , 0.5 + , 0.5 + , 0 + , 2 + , '2019-02-14 14:00:00 -0600' + , '{"ci_99": [2.7e-06, 3.1e-06]}'::jsonb + , 'Additional run_notes.' + -- machine_view + , 'My-Machine-Name' + , '09-00-2c-01-02-03' + , 8589934592 + , 2300000000 + , 'OSX' + , 'x86_64' + , '18.2.0' + , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' + , 2 + , 4 + , 2300000000 + , 2300000000 + , 32768 + , 32768 + , 262144 + , 4194304 + , '{"example": "for machine_other_attributes"}'::jsonb + -- environment_view + , 'Python' + , 'CPython 2.7' + , '{"six": "", "numpy": "1.15", "other_lib": "1.0"}'::jsonb +); + + +-- Bulk load from CSV. First column is empty; serial "benchmark_run_id" will be assigned. +--\copy benchmark_run_view FROM 'examples/benchmark_run_example.csv' WITH (FORMAT csv, HEADER); + +-- Load from JSON +--\set content `cat examples/benchmark_example.json` +--SELECT ingest_benchmark_view(:'content'::jsonb); + +INSERT INTO public.benchmark_view ( + benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + , benchmark_language + ) VALUES ( + 'Benchmark 1' + , '{"arg0", "arg1", "arg2"}'::text[] + , E'Description.\nNewlines are OK in a string escaped with leading "E".' + , 'Time' + , 'miliseconds' + , TRUE + , 'Hash of code or other way to identify distinct benchmark versions.' + , 'Python' + ), ( + 'Benchmark 2' + , '{"arg0", "arg1"}'::text[] + , 'Description 2.' + , 'Time' + , 'nanoseconds' + , TRUE + , 'version 0' + , 'Python' + ); + + +\x +SELECT * from benchmark_run_view; + +\x diff --git a/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json new file mode 100644 index 000000000..fec5eed0a --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json @@ -0,0 +1,12 @@ +{ + "query": "mutation ($p: CreateProjectInput!){createProject(input:$p){project{id}}}", + "variables": { + "p": { + "project": { + "projectName": "Apache Arrow", + "projectUrl": "https://www.arrow.apache.org", + "repoUrl": "https://www.github.com/apache/arrow" + } + } + } +} diff --git a/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json new file mode 100644 index 000000000..78804fa91 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json @@ -0,0 +1,3 @@ +{ + "query": "{allEnvironmentViews(orderBy: [BENCHMARK_LANGUAGE_ASC, LANGUAGE_IMPLEMENTATION_VERSION_ASC, DEPENDENCIES_ASC]) {edges {node {environmentId, benchmarkLanguage, languageImplementationVersion, dependencies}}}}" +} diff --git a/src/arrow/dev/benchmarking/examples/machine.json b/src/arrow/dev/benchmarking/examples/machine.json new file mode 100644 index 000000000..2485e2bc1 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/machine.json @@ -0,0 +1,22 @@ +{ + "mac_address": "0a:00:2d:01:02:03", + "machine_name": "Yet-Another-Machine-Name", + "memory_bytes": 8589934592, + "cpu_actual_frequency_hz": 2300000000, + "os_name": "OSX", + "architecture_name": "x86_64", + "kernel_name": "18.2.0", + "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", + "cpu_core_count": 2, + "cpu_thread_count": 4, + "cpu_frequency_max_hz": 2300000000, + "cpu_frequency_min_hz": 2300000000, + "cpu_l1d_cache_bytes": 32768, + "cpu_l1i_cache_bytes": 32768, + "cpu_l2_cache_bytes": 262144, + "cpu_l3_cache_bytes": 4194304, + "machine_other_attributes": {"just": "an example"}, + "gpu_information": "", + "gpu_part_number": "", + "gpu_product_name": "" +} diff --git a/src/arrow/dev/benchmarking/graphql_submit.sh b/src/arrow/dev/benchmarking/graphql_submit.sh new file mode 100755 index 000000000..2eaab9cdf --- /dev/null +++ b/src/arrow/dev/benchmarking/graphql_submit.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +OPTIONS=("machine" "benchmarks" "runs") + +option=${1-help} +datafile=${2-machine.json} +uri=${3-localhost:5000/graphql} + +help() { + cat <<HELP + Submit data via GraphQL + + Usage: + ${0} [option] [JSON_file] [URI] + + Arguments: + option - $(echo ${OPTIONS[@]} | sed 's/ /|/g') + JSON_file - path to the submission file (default 'machine.json') + URI - URI to submit to (default 'localhost:5000/graphql') +HELP +} + +escape_quote() { sed 's/"/\\"/g'; } + +template() { + cat <<TEMPLATE + { + "query": "mutation (\$jsonb: JSON!){${1}(input:{fromJsonb:\$jsonb}){${2}}}", + "variables": { + "jsonb": "$(echo $(cat ${datafile}) | escape_quote )" + } + } +TEMPLATE +} + +submit () { + curl -X POST -H "Content-Type: application/json" --data @<(template $1 $2) ${uri} +} + + +case "$1" in + machine) + submit ingestMachineView integer;; + + benchmarks) + submit ingestBenchmarkView integers;; + + runs) + if grep -q context <(head -n2 ${2}) + then + submit ingestBenchmarkRunsWithContext bigInts + else + submit ingestBenchmarkRunView bigInts + fi;; + + *) + help + exit 1 +esac diff --git a/src/arrow/dev/benchmarking/make_data_model_rst.sh b/src/arrow/dev/benchmarking/make_data_model_rst.sh new file mode 100755 index 000000000..6a4f5f5b6 --- /dev/null +++ b/src/arrow/dev/benchmarking/make_data_model_rst.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +DOTFILE=data_model.dot +OUTFILE=data_model.rst + +license() { + cat <<'LICENSE' > ${1} +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +LICENSE +} + +warning() { + cat <<'WARNING' >> ${1} +.. WARNING +.. This is an auto-generated file. Please do not edit. + +.. To reproduce, please run :code:`./make_data_model_rst.sh`. +.. (This requires you have the +.. `psql client <https://www.postgresql.org/download/>`_ +.. and have started the docker containers using +.. :code:`docker-compose up`). + +WARNING +} + +echo "Making ${OUTFILE}" + +license ${OUTFILE} +warning ${OUTFILE} + +PGPASSWORD=arrow \ + psql --tuples-only --username=arrow_web \ + --dbname=benchmark --port=5432 --host=localhost \ + --command="select public.documentation('${DOTFILE}');" \ + | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/src/arrow/dev/benchmarking/make_dotfile.sh b/src/arrow/dev/benchmarking/make_dotfile.sh new file mode 100755 index 000000000..b86dc3eb3 --- /dev/null +++ b/src/arrow/dev/benchmarking/make_dotfile.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +OUTFILE=data_model.dot + +license() { + cat <<'LICENSE' > ${1} +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements.See the NOTICE file + distributed with this work for additional information + regarding copyright ownership.The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License.You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied.See the License for the + specific language governing permissions and limitations + under the License. +*/ + +LICENSE +} + +warning() { + cat <<'WARNING' >> ${1} +/* + WARNING + This is an auto-generated file. Please do not edit. + + To reproduce, please run :code:`./make_data_model_rst.sh`. + (This requires you have the + `psql client <https://www.postgresql.org/download/>`_ + and have started the docker containers using + :code:`docker-compose up`). +*/ +WARNING +} + +echo "Making ${OUTFILE}" + +license ${OUTFILE} +warning ${OUTFILE} + +PGPASSWORD=arrow \ + psql --tuples-only --username=arrow_web \ + --dbname=benchmark --port=5432 --host=localhost \ + --command="select public.documentation_dotfile();" \ + | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/src/arrow/dev/benchmarking/make_machine_json.sh b/src/arrow/dev/benchmarking/make_machine_json.sh new file mode 100755 index 000000000..09bf0ea2d --- /dev/null +++ b/src/arrow/dev/benchmarking/make_machine_json.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +OUTFILE=machine.json + +echo "Making ${OUTFILE}" +echo "** NOTE: This command fails on everything but OSX right now. **" +echo "* also, the intent is to make this script not suck, just not now. *" +echo "Please type GPU details here (or manually modify ${OUTFILE} later)." +read -p "GPU information string (or <enter>): " gpu_information +read -p "GPU part number (or <enter>): " gpu_part_number +read -p "GPU product name (or <enter>): " gpu_product_name + + +cat <<MACHINE_JSON > ${OUTFILE} +{ + "mac_address": "$(ifconfig en1 | awk '/ether/{print $2}')", + "machine_name": "$(uname -n)", + "memory_bytes": $(sysctl -n hw.memsize), + "cpu_actual_frequency_hz": $(sysctl -n hw.cpufrequency), + "os_name": "$(uname -s)", + "architecture_name": "$(uname -m)", + "kernel_name": "$(uname -r)", + "cpu_model_name": "$(sysctl -n machdep.cpu.brand_string)", + "cpu_core_count": $(sysctl -n hw.physicalcpu), + "cpu_thread_count": $(sysctl -n hw.logicalcpu), + "cpu_frequency_max_hz": $(sysctl -n hw.cpufrequency_max), + "cpu_frequency_min_hz": $(sysctl -n hw.cpufrequency_min), + "cpu_l1d_cache_bytes": $(sysctl -n hw.l1dcachesize), + "cpu_l1i_cache_bytes": $(sysctl -n hw.l1icachesize), + "cpu_l2_cache_bytes": $(sysctl -n hw.l2cachesize), + "cpu_l3_cache_bytes": $(sysctl -n hw.l3cachesize), + "gpu_information": "${gpu_information}", + "gpu_part_number": "${gpu_part_number}", + "gpu_product_name": "${gpu_product_name}" +} +MACHINE_JSON + +echo "Machine details saved in ${OUTFILE}" diff --git a/src/arrow/dev/conbench_envs/README.md b/src/arrow/dev/conbench_envs/README.md new file mode 100644 index 000000000..5a4eb58b2 --- /dev/null +++ b/src/arrow/dev/conbench_envs/README.md @@ -0,0 +1,214 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +# Benchmark Builds Env and Hooks +This directory contains: +- [benchmarks.env](benchmarks.env) - list of env vars used for building Arrow C++/Python/R/Java/JavaScript and running benchmarks using [conbench](https://ursalabs.org/blog/announcing-conbench/). +- [hooks.sh](hooks.sh) - hooks used by <b>@ursabot</b> benchmark builds that are triggered by `@ursabot please benchmark` PR comments. + +## How to add or update Arrow build and run env vars used by `@ursabot` benchmark builds +1. Create `apache/arrow` PR +2. Update or add env var value in [benchmarks.env](../../dev/conbench_envs/benchmarks.env) +3. Add `@ursabot please benchmark` comment to PR +4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where +- baseline = PR base HEAD commit with unaltered `/dev/conbench_envs/benchmarks.env` +- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env` + +## Why do`@ursabot` benchmark builds need `hooks.sh`? +`@ursabot` benchmark builds are maintained in Ursa's private repo. +Benchmark builds use `hooks.sh` functions as hooks to create conda env with Arrow dependencies and build Arrow C++/Python/R/Java/JavaScript from source for a specific Arrow repo's commit. + +Defining hooks in Arrow repo allows benchmark builds for a specific commit to be +compatible with the files/scripts *in that commit* which are used for installing Arrow +dependencies and building Arrow. This allows Arrow contributors to asses the perfomance +implications of different build options, dependency versions, etc by updating +`hooks.sh`. + +## Can other repos and services use `benchmarks.env` and `hooks.sh`? + +Yes, other repos and services are welcome to use `benchmarks.env` and `hooks.sh` as long as +- existing hooks are not removed or renamed. +- function definitions for exiting hooks can only be updated in the Arrow commit where Arrow build scripts or files with dependencies have been renamed, moved or added. +- benchmark builds are run using `@ursabot please benchmark` PR comment to confirm that function definition updates do not break benchmark builds. + +## How can other repos and services use `benchmarks.env` and `hooks.sh` to setup benchmark env? +Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.sh` to setup benchmarking env on Ubuntu: + +### 1. Install Arrow dependencies + sudo su + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-regex-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libgflags-dev \ + libcurl4-openssl-dev \ + libgoogle-glog-dev \ + liblz4-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + rapidjson-dev \ + tzdata \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + + apt-get update -y -q && \ + apt-get install -y -q \ + python3 \ + python3-pip \ + python3-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +### 2. Install Arrow dependencies for Java + sudo su + apt-get install openjdk-8-jdk + apt-get install maven + +Verify that you have at least these versions of `java`, `javac` and `maven`: + + # java -version + openjdk version "1.8.0_292" + .. + # javac -version + javac 1.8.0_292 + ... + # mvn -version + Apache Maven 3.6.3 + ... + +### 3. Install Arrow dependencies for Java Script + sudo apt update + sudo apt -y upgrade + sudo apt update + sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates + curl -fsSL https://deb.nodesource.com/setup_14.x | sudo -E bash - + sudo apt-get install -y nodejs + sudo apt -y install yarn + sudo apt -y install gcc g++ make + +Verify that you have at least these versions of `node` and `yarn`: + + # node --version + v14.17.2 + ... + # yarn --version + 1.22.5 + ... + +### 4. Install Conda + sudo apt install curl + curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + sudo bash Miniconda3-latest-Linux-x86_64.sh + +### 5. Set env vars: + export ARROW_REPO=https://github.com/apache/arrow.git + export BENCHMARKABLE=e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6 + export BENCHMARKABLE_TYPE=arrow-commit + export PYTHON_VERSION=3.8 + export CONBENCH_EMAIL=... + export CONBENCH_URL="https://conbench.ursa.dev" + export CONBENCH_PASSWORD=... + export MACHINE=... + +### 6. Use `create_conda_env_with_arrow_python` hook to create conda env and build Arrow C++ and Arrow Python + git clone "${ARROW_REPO}" + pushd arrow + git fetch -v --prune -- origin "${BENCHMARKABLE}" + git checkout -f "${BENCHMARKABLE}" + source dev/conbench_envs/hooks.sh create_conda_env_with_arrow_python + popd + +### 7. Install conbench + git clone https://github.com/ursacomputing/conbench.git + pushd conbench + pip install -r requirements-cli.txt + pip install -U PyYAML + python setup.py install + popd + +### 8. Setup benchmarks repo + git clone https://github.com/ursacomputing/benchmarks.git + pushd benchmarks + python setup.py develop + popd + +### 9. Setup conbench credentials + pushd benchmarks + touch .conbench + echo "url: $CONBENCH_URL" >> .conbench + echo "email: $CONBENCH_EMAIL" >> .conbench + echo "password: $CONBENCH_PASSWORD" >> .conbench + echo "host_name: $MACHINE" >> .conbench + popd + +### 10. Run Python benchmarks + cd benchmarks + conbench file-read ALL --iterations=3 --all=true --drop-caches=true + +### 11. Use `install_archery` hook to setup archery and run C++ benchmarks + pushd arrow + source dev/conbench_envs/hooks.sh install_archery + popd + cd benchmarks + conbench cpp-micro --iterations=1 + +### 12. Use `build_arrow_r` hook to build Arrow R and run R benchmarks + pushd arrow + source dev/conbench_envs/hooks.sh build_arrow_r + popd + R -e "remotes::install_github('ursacomputing/arrowbench')" + cd benchmarks + conbench dataframe-to-table ALL --iterations=3 --drop-caches=true --language=R + +### 13. Use `build_arrow_java` and `install_archery` hooks to build Arrow Java and run Java benchmarks + pushd arrow + source dev/conbench_envs/hooks.sh build_arrow_java + source dev/conbench_envs/hooks.sh install_archery + popd + cd benchmarks + conbench java-micro --iterations=1 + +### 14. Use `install_java_script_project_dependencies` hook to install Java Script dependencies and run Java Script benchmarks + pushd arrow + source dev/conbench_envs/hooks.sh install_java_script_project_dependencies + popd + cd benchmarks + conbench js-micro diff --git a/src/arrow/dev/conbench_envs/benchmarks.env b/src/arrow/dev/conbench_envs/benchmarks.env new file mode 100644 index 000000000..6c151aa7c --- /dev/null +++ b/src/arrow/dev/conbench_envs/benchmarks.env @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ARROW_BUILD_TESTS=OFF +ARROW_BUILD_TYPE=release +ARROW_DEPENDENCY_SOURCE=AUTO +ARROW_DATASET=ON +ARROW_DEFAULT_MEMORY_POOL=mimalloc +ARROW_ENABLE_UNSAFE_MEMORY_ACCESS=true +ARROW_ENABLE_NULL_CHECK_FOR_GET=false +ARROW_FLIGHT=OFF +ARROW_GANDIVA=OFF +ARROW_HDFS=ON +ARROW_HOME=$CONDA_PREFIX +ARROW_INSTALL_NAME_RPATH=OFF +ARROW_MIMALLOC=ON +ARROW_NO_DEPRECATED_API=ON +ARROW_ORC=ON +ARROW_PARQUET=ON +ARROW_PLASMA=ON +ARROW_PYTHON=ON +ARROW_S3=ON +ARROW_USE_ASAN=OFF +ARROW_USE_CCACHE=ON +ARROW_USE_UBSAN=OFF +ARROW_WITH_BROTLI=ON +ARROW_WITH_BZ2=ON +ARROW_WITH_LZ4=ON +ARROW_WITH_SNAPPY=ON +ARROW_WITH_ZLIB=ON +ARROW_WITH_ZSTD=ON +GTest_SOURCE=BUNDLED +ORC_SOURCE=BUNDLED +PARQUET_BUILD_EXAMPLES=ON +PARQUET_BUILD_EXECUTABLES=ON +PYTHON=python +LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
\ No newline at end of file diff --git a/src/arrow/dev/conbench_envs/hooks.sh b/src/arrow/dev/conbench_envs/hooks.sh new file mode 100755 index 000000000..6bcfbe446 --- /dev/null +++ b/src/arrow/dev/conbench_envs/hooks.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +## These hooks are used by benchmark builds +# to create a conda env with Arrow dependencies and build Arrow C++, Python, etc +create_conda_env_for_benchmark_build() { + conda create -y -n "${BENCHMARKABLE_TYPE}" -c conda-forge \ + --file ci/conda_env_unix.txt \ + --file ci/conda_env_cpp.txt \ + --file ci/conda_env_python.txt \ + --file ci/conda_env_gandiva.txt \ + compilers \ + python="${PYTHON_VERSION}" \ + pandas \ + aws-sdk-cpp \ + r +} + +activate_conda_env_for_benchmark_build() { + conda init bash + conda activate "${BENCHMARKABLE_TYPE}" +} + +install_arrow_python_dependencies() { + pip install -r python/requirements-build.txt -r python/requirements-test.txt +} + +set_arrow_build_and_run_env_vars() { + set -a + source dev/conbench_envs/benchmarks.env + set +a +} + +build_arrow_cpp() { + # Ignore the error when a cache can't be created + if ! ci/scripts/cpp_build.sh $(pwd) $(pwd) 2> error.log; then + if ! grep -q -F "Can\'t create temporary cache file" error.log; then + cat error.log + fi + fi +} + +build_arrow_python() { + ci/scripts/python_build.sh $(pwd) $(pwd) +} + +build_arrow_r() { + cat ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site + ci/scripts/r_deps.sh $(pwd) $(pwd) + (cd r; R CMD INSTALL .;) +} + +build_arrow_java() { + ci/scripts/java_build.sh $(pwd) $(pwd) +} + +install_archery() { + pip install -e dev/archery +} + +install_java_script_project_dependencies() { + (cd js; yarn;) +} + +create_conda_env_with_arrow_python() { + create_conda_env_for_benchmark_build + activate_conda_env_for_benchmark_build + install_arrow_python_dependencies + set_arrow_build_and_run_env_vars + build_arrow_cpp + build_arrow_python +} + +"$@" diff --git a/src/arrow/dev/merge.conf.sample b/src/arrow/dev/merge.conf.sample new file mode 100644 index 000000000..c71b21161 --- /dev/null +++ b/src/arrow/dev/merge.conf.sample @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Configuration for the merge_arrow_pr.py tool +# Install a copy of this file at ~/.config/arrow/merge.conf + +[jira] +# issues.apache.org JIRA credentials. Sadly, the jira instance doesn't offer +# token credentials. Ensure that the file is properly protected. +username=johnsmith +password=123456 diff --git a/src/arrow/dev/merge_arrow_pr.py b/src/arrow/dev/merge_arrow_pr.py new file mode 100755 index 000000000..a8ff0b0e2 --- /dev/null +++ b/src/arrow/dev/merge_arrow_pr.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python3 + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Utility for creating well-formed pull request merges and pushing them to +# Apache. +# usage: ./merge_arrow_pr.py (see config env vars below) +# +# This utility assumes you already have a local Arrow git clone and that you +# have added remotes corresponding to both (i) the GitHub Apache Arrow mirror +# and (ii) the apache git repo. +# +# There are several pieces of authorization possibly needed via environment +# variables +# +# APACHE_JIRA_USERNAME: your Apache JIRA id +# APACHE_JIRA_PASSWORD: your Apache JIRA password +# ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid +# rate limiting) + +import configparser +import os +import pprint +import re +import subprocess +import sys +import requests +import getpass + +from six.moves import input +import six + +try: + import jira.client + import jira.exceptions +except ImportError: + print("Could not find jira library. " + "Run 'sudo pip install jira' to install.") + print("Exiting without trying to close the associated JIRA.") + sys.exit(1) + +# Remote name which points to the GitHub site +PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache") + +# For testing to avoid accidentally pushing to apache +DEBUG = bool(int(os.environ.get("DEBUG", 0))) + + +if DEBUG: + print("**************** DEBUGGING ****************") + + +# Prefix added to temporary branches +BRANCH_PREFIX = "PR_TOOL" +JIRA_API_BASE = "https://issues.apache.org/jira" + + +def get_json(url, headers=None): + req = requests.get(url, headers=headers) + return req.json() + + +def run_cmd(cmd): + if isinstance(cmd, six.string_types): + cmd = cmd.split(' ') + + try: + output = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + # this avoids hiding the stdout / stderr of failed processes + print('Command failed: %s' % cmd) + print('With output:') + print('--------------') + print(e.output) + print('--------------') + raise e + + if isinstance(output, six.binary_type): + output = output.decode('utf-8') + return output + + +original_head = run_cmd("git rev-parse HEAD")[:8] + + +def clean_up(): + print("Restoring head pointer to %s" % original_head) + run_cmd("git checkout %s" % original_head) + + branches = run_cmd("git branch").replace(" ", "").split("\n") + + for branch in [x for x in branches + if x.startswith(BRANCH_PREFIX)]: + print("Deleting local branch %s" % branch) + run_cmd("git branch -D %s" % branch) + + +_REGEX_CI_DIRECTIVE = re.compile(r'\[[^\]]*\]') + + +def strip_ci_directives(commit_message): + # Remove things like '[force ci]', '[skip appveyor]' from the assembled + # commit message + return _REGEX_CI_DIRECTIVE.sub('', commit_message) + + +def fix_version_from_branch(branch, versions): + # Note: Assumes this is a sorted (newest->oldest) list of un-released + # versions + if branch == "master": + return versions[-1] + else: + branch_ver = branch.replace("branch-", "") + return [x for x in versions if x.name.startswith(branch_ver)][-1] + + +# We can merge both ARROW and PARQUET patchesa +SUPPORTED_PROJECTS = ['ARROW', 'PARQUET'] +PR_TITLE_REGEXEN = [(project, re.compile(r'^(' + project + r'-[0-9]+)\b.*$')) + for project in SUPPORTED_PROJECTS] + + +class JiraIssue(object): + + def __init__(self, jira_con, jira_id, project, cmd): + self.jira_con = jira_con + self.jira_id = jira_id + self.project = project + self.cmd = cmd + + try: + self.issue = jira_con.issue(jira_id) + except Exception as e: + self.cmd.fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) + + @property + def current_fix_versions(self): + return self.issue.fields.fixVersions + + def get_candidate_fix_versions(self, merge_branches=('master',)): + # Only suggest versions starting with a number, like 0.x but not JS-0.x + all_versions = self.jira_con.project_versions(self.project) + unreleased_versions = [x for x in all_versions + if not x.raw['released']] + + unreleased_versions = sorted(unreleased_versions, + key=lambda x: x.name, reverse=True) + + mainline_versions = self._filter_mainline_versions(unreleased_versions) + + mainline_non_patch_versions = [] + for v in mainline_versions: + (major, minor, patch) = v.name.split(".") + if patch == "0": + mainline_non_patch_versions.append(v) + + if len(mainline_versions) > len(mainline_non_patch_versions): + # If there is a non-patch release, suggest that instead + mainline_versions = mainline_non_patch_versions + + default_fix_versions = [ + fix_version_from_branch(x, mainline_versions).name + for x in merge_branches] + + return all_versions, default_fix_versions + + def _filter_mainline_versions(self, versions): + if self.project == 'PARQUET': + mainline_regex = re.compile(r'cpp-\d.*') + else: + mainline_regex = re.compile(r'\d.*') + + return [x for x in versions if mainline_regex.match(x.name)] + + def resolve(self, fix_versions, comment): + fields = self.issue.fields + cur_status = fields.status.name + + if cur_status == "Resolved" or cur_status == "Closed": + self.cmd.fail("JIRA issue %s already has status '%s'" + % (self.jira_id, cur_status)) + + if DEBUG: + print("JIRA issue %s untouched" % (self.jira_id)) + return + + resolve = [x for x in self.jira_con.transitions(self.jira_id) + if x['name'] == "Resolve Issue"][0] + + # ARROW-6915: do not overwrite existing fix versions corresponding to + # point releases + fix_versions = list(fix_versions) + fix_version_names = set(x['name'] for x in fix_versions) + for version in self.current_fix_versions: + major, minor, patch = version.name.split('.') + if patch != '0' and version.name not in fix_version_names: + fix_versions.append(version.raw) + + self.jira_con.transition_issue(self.jira_id, resolve["id"], + comment=comment, + fixVersions=fix_versions) + + print("Successfully resolved %s!" % (self.jira_id)) + + self.issue = self.jira_con.issue(self.jira_id) + self.show() + + def show(self): + fields = self.issue.fields + print(format_jira_output(self.jira_id, fields.status.name, + fields.summary, fields.assignee, + fields.components)) + + +def format_jira_output(jira_id, status, summary, assignee, components): + if assignee is None: + assignee = "NOT ASSIGNED!!!" + else: + assignee = assignee.displayName + + if len(components) == 0: + components = 'NO COMPONENTS!!!' + else: + components = ', '.join((x.name for x in components)) + + return """=== JIRA {} === +Summary\t\t{} +Assignee\t{} +Components\t{} +Status\t\t{} +URL\t\t{}/{}""".format(jira_id, summary, assignee, components, status, + '/'.join((JIRA_API_BASE, 'browse')), jira_id) + + +class GitHubAPI(object): + + def __init__(self, project_name): + self.github_api = ("https://api.github.com/repos/apache/{0}" + .format(project_name)) + + token = os.environ.get('ARROW_GITHUB_API_TOKEN', None) + if token: + self.headers = {'Authorization': 'token {0}'.format(token)} + else: + self.headers = None + + def get_pr_data(self, number): + return get_json("%s/pulls/%s" % (self.github_api, number), + headers=self.headers) + + +class CommandInput(object): + """ + Interface to input(...) to enable unit test mocks to be created + """ + + def fail(self, msg): + clean_up() + raise Exception(msg) + + def prompt(self, prompt): + return input(prompt) + + def getpass(self, prompt): + return getpass.getpass(prompt) + + def continue_maybe(self, prompt): + while True: + result = input("\n%s (y/n): " % prompt) + if result.lower() == "y": + return + elif result.lower() == "n": + self.fail("Okay, exiting") + else: + prompt = "Please input 'y' or 'n'" + + +class PullRequest(object): + + def __init__(self, cmd, github_api, git_remote, jira_con, number): + self.cmd = cmd + self.git_remote = git_remote + self.con = jira_con + self.number = number + self._pr_data = github_api.get_pr_data(number) + try: + self.url = self._pr_data["url"] + self.title = self._pr_data["title"] + self.body = self._pr_data["body"] + self.target_ref = self._pr_data["base"]["ref"] + self.user_login = self._pr_data["user"]["login"] + self.base_ref = self._pr_data["head"]["ref"] + except KeyError: + pprint.pprint(self._pr_data) + raise + self.description = "%s/%s" % (self.user_login, self.base_ref) + + self.jira_issue = self._get_jira() + + def show(self): + print("\n=== Pull Request #%s ===" % self.number) + print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" + % (self.title, self.description, self.target_ref, self.url)) + if self.jira_issue is not None: + self.jira_issue.show() + else: + print("Minor PR. Please ensure it meets guidelines for minor.\n") + + @property + def is_merged(self): + return bool(self._pr_data["merged"]) + + @property + def is_mergeable(self): + return bool(self._pr_data["mergeable"]) + + def _get_jira(self): + if self.title.startswith("MINOR:"): + return None + + jira_id = None + for project, regex in PR_TITLE_REGEXEN: + m = regex.search(self.title) + if m: + jira_id = m.group(1) + break + + if jira_id is None: + options = ' or '.join('{0}-XXX'.format(project) + for project in SUPPORTED_PROJECTS) + self.cmd.fail("PR title should be prefixed by a jira id " + "{0}, but found {1}".format(options, self.title)) + + return JiraIssue(self.con, jira_id, project, self.cmd) + + def merge(self): + """ + merge the requested PR and return the merge hash + """ + pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, self.number) + target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, + self.number, + self.target_ref.upper()) + run_cmd("git fetch %s pull/%s/head:%s" % (self.git_remote, + self.number, + pr_branch_name)) + run_cmd("git fetch %s %s:%s" % (self.git_remote, self.target_ref, + target_branch_name)) + run_cmd("git checkout %s" % target_branch_name) + + had_conflicts = False + try: + run_cmd(['git', 'merge', pr_branch_name, '--ff', '--squash']) + except Exception as e: + msg = ("Error merging: %s\nWould you like to " + "manually fix-up this merge?" % e) + self.cmd.continue_maybe(msg) + msg = ("Okay, please fix any conflicts and 'git add' " + "conflicting files... Finished?") + self.cmd.continue_maybe(msg) + had_conflicts = True + + commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, + '--pretty=format:%an <%ae>']).split("\n") + distinct_authors = sorted(set(commit_authors), + key=lambda x: commit_authors.count(x), + reverse=True) + + for i, author in enumerate(distinct_authors): + print("Author {}: {}".format(i + 1, author)) + + if len(distinct_authors) > 1: + primary_author, distinct_authors = get_primary_author( + self.cmd, distinct_authors) + else: + # If there is only one author, do not prompt for a lead author + primary_author = distinct_authors[0] + + merge_message_flags = [] + + merge_message_flags += ["-m", self.title] + if self.body is not None: + merge_message_flags += ["-m", self.body] + + committer_name = run_cmd("git config --get user.name").strip() + committer_email = run_cmd("git config --get user.email").strip() + + authors = ("Authored-by:" if len(distinct_authors) == 1 + else "Lead-authored-by:") + authors += " %s" % (distinct_authors.pop(0)) + if len(distinct_authors) > 0: + authors += "\n" + "\n".join(["Co-authored-by: %s" % a + for a in distinct_authors]) + authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name, + committer_email) + + if had_conflicts: + committer_name = run_cmd("git config --get user.name").strip() + committer_email = run_cmd("git config --get user.email").strip() + message = ("This patch had conflicts when merged, " + "resolved by\nCommitter: %s <%s>" % + (committer_name, committer_email)) + merge_message_flags += ["-m", message] + + # The string "Closes #%s" string is required for GitHub to correctly + # close the PR + merge_message_flags += [ + "-m", + "Closes #%s from %s" + % (self.number, self.description)] + merge_message_flags += ["-m", authors] + + if DEBUG: + print("\n".join(merge_message_flags)) + + run_cmd(['git', 'commit', + '--no-verify', # do not run commit hooks + '--author="%s"' % primary_author] + + merge_message_flags) + + self.cmd.continue_maybe("Merge complete (local ref %s). Push to %s?" + % (target_branch_name, self.git_remote)) + + try: + push_cmd = ('git push %s %s:%s' % (self.git_remote, + target_branch_name, + self.target_ref)) + if DEBUG: + print(push_cmd) + else: + run_cmd(push_cmd) + except Exception as e: + clean_up() + self.cmd.fail("Exception while pushing: %s" % e) + + merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] + clean_up() + print("Pull request #%s merged!" % self.number) + print("Merge hash: %s" % merge_hash) + return merge_hash + + +def get_primary_author(cmd, distinct_authors): + author_pat = re.compile(r'(.*) <(.*)>') + + while True: + primary_author = cmd.prompt( + "Enter primary author in the format of " + "\"name <email>\" [%s]: " % distinct_authors[0]) + + if primary_author == "": + return distinct_authors[0], distinct_authors + + if author_pat.match(primary_author): + break + print('Bad author "{}", please try again'.format(primary_author)) + + # When primary author is specified manually, de-dup it from + # author list and put it at the head of author list. + distinct_authors = [x for x in distinct_authors + if x != primary_author] + distinct_authors = [primary_author] + distinct_authors + return primary_author, distinct_authors + + +def prompt_for_fix_version(cmd, jira_issue): + (all_versions, + default_fix_versions) = jira_issue.get_candidate_fix_versions() + + default_fix_versions = ",".join(default_fix_versions) + + issue_fix_versions = cmd.prompt("Enter comma-separated " + "fix version(s) [%s]: " + % default_fix_versions) + if issue_fix_versions == "": + issue_fix_versions = default_fix_versions + issue_fix_versions = issue_fix_versions.replace(" ", "").split(",") + + def get_version_json(version_str): + return [x for x in all_versions if x.name == version_str][0].raw + + return [get_version_json(v) for v in issue_fix_versions] + + +CONFIG_FILE = "~/.config/arrow/merge.conf" + + +def load_configuration(): + config = configparser.ConfigParser() + config.read(os.path.expanduser(CONFIG_FILE)) + return config + + +def get_credentials(cmd): + username, password = None, None + + config = load_configuration() + if "jira" in config.sections(): + username = config["jira"].get("username") + password = config["jira"].get("password") + + # Fallback to environment variables + if not username: + username = os.environ.get("APACHE_JIRA_USERNAME") + + if not password: + password = os.environ.get("APACHE_JIRA_PASSWORD") + + # Fallback to user tty prompt + if not username: + username = cmd.prompt("Env APACHE_JIRA_USERNAME not set, " + "please enter your JIRA username:") + + if not password: + password = cmd.getpass("Env APACHE_JIRA_PASSWORD not set, " + "please enter your JIRA password:") + + return (username, password) + + +def connect_jira(cmd): + try: + return jira.client.JIRA(options={'server': JIRA_API_BASE}, + basic_auth=get_credentials(cmd)) + except jira.exceptions.JIRAError as e: + if "CAPTCHA_CHALLENGE" in e.text: + print("") + print("It looks like you need to answer a captcha challenge for " + "this account (probably due to a login attempt with an " + "incorrect password). Please log in at " + "https://issues.apache.org/jira and complete the captcha " + "before running this tool again.") + print("Exiting.") + sys.exit(1) + raise e + + +def get_pr_num(): + if len(sys.argv) == 2: + return sys.argv[1] + + return input("Which pull request would you like to merge? (e.g. 34): ") + + +def cli(): + # Location of your Arrow git clone + ARROW_HOME = os.path.abspath(os.path.dirname(__file__)) + PROJECT_NAME = os.environ.get('ARROW_PROJECT_NAME') or 'arrow' + print("ARROW_HOME = " + ARROW_HOME) + print("PROJECT_NAME = " + PROJECT_NAME) + + cmd = CommandInput() + + pr_num = get_pr_num() + + os.chdir(ARROW_HOME) + + github_api = GitHubAPI(PROJECT_NAME) + + jira_con = connect_jira(cmd) + pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num) + + if pr.is_merged: + print("Pull request %s has already been merged") + sys.exit(0) + + if not pr.is_mergeable: + msg = ("Pull request %s is not mergeable in its current form.\n" + % pr_num + "Continue? (experts only!)") + cmd.continue_maybe(msg) + + pr.show() + + cmd.continue_maybe("Proceed with merging pull request #%s?" % pr_num) + + # merged hash not used + pr.merge() + + if pr.jira_issue is None: + print("Minor PR. No JIRA issue to update.\n") + return + + cmd.continue_maybe("Would you like to update the associated JIRA?") + jira_comment = ( + "Issue resolved by pull request %s\n[%s/%s]" + % (pr_num, + "https://github.com/apache/" + PROJECT_NAME + "/pull", + pr_num)) + + fix_versions_json = prompt_for_fix_version(cmd, pr.jira_issue) + pr.jira_issue.resolve(fix_versions_json, jira_comment) + + +if __name__ == '__main__': + try: + cli() + except Exception: + raise diff --git a/src/arrow/dev/merge_arrow_pr.sh b/src/arrow/dev/merge_arrow_pr.sh new file mode 100755 index 000000000..5b55780dc --- /dev/null +++ b/src/arrow/dev/merge_arrow_pr.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Wrapper script that automatically creates a Python virtual environment +# and runs merge_arrow_pr.py inside it. + +set -e + +PYTHON=$(which python3) +PYVER=$($PYTHON -c "import sys; print('.'.join(map(str, sys.version_info[:2])))") + +GIT_ROOT=$(git rev-parse --show-toplevel) +ENV_DIR=$GIT_ROOT/dev/.venv$PYVER + +ENV_PYTHON=$ENV_DIR/bin/python3 +ENV_PIP="$ENV_PYTHON -m pip --no-input" + +check_venv() { + [ -x $ENV_PYTHON ] || { + echo "Virtual environment broken: $ENV_PYTHON not an executable" + exit 1 + } +} + +create_venv() { + echo "" + echo "Creating Python virtual environment in $ENV_DIR ..." + echo "" + $PYTHON -m venv $ENV_DIR + $ENV_PIP install -q -r $GIT_ROOT/dev/requirements_merge_arrow_pr.txt || { + echo "Failed to setup virtual environment" + echo "Please delete directory '$ENV_DIR' and try again" + exit $? + } +} + +[ -d $ENV_DIR ] || create_venv +check_venv + +$ENV_PYTHON $GIT_ROOT/dev/merge_arrow_pr.py "$@" diff --git a/src/arrow/dev/release/.env.example b/src/arrow/dev/release/.env.example new file mode 100644 index 000000000..50c8ec8e6 --- /dev/null +++ b/src/arrow/dev/release/.env.example @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# The GPG key ID to sign artifacts. The GPG key ID must be registered +# to both of the followings: +# +# * https://dist.apache.org/repos/dist/dev/arrow/KEYS +# * https://dist.apache.org/repos/dist/release/arrow/KEYS +# +# See these files how to import your GPG key ID to these files. +# +# You must set this. +#GPG_KEY_ID=08D3564B7C6A9CAFBFF6A66791D18FCF079F8007 + +# The Artifactory API key to upload artifacts to Artifactory. +# +# You must set this. +#ARTIFACTORY_API_KEY=secret diff --git a/src/arrow/dev/release/.gitignore b/src/arrow/dev/release/.gitignore new file mode 100644 index 000000000..f3d708a6a --- /dev/null +++ b/src/arrow/dev/release/.gitignore @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/.env +/binary/id_rsa +/binary/id_rsa.pub +/binary/tmp/ diff --git a/src/arrow/dev/release/01-prepare-test.rb b/src/arrow/dev/release/01-prepare-test.rb new file mode 100644 index 000000000..51665ec02 --- /dev/null +++ b/src/arrow/dev/release/01-prepare-test.rb @@ -0,0 +1,586 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class PrepareTest < Test::Unit::TestCase + include GitRunnable + include VersionDetectable + + def setup + @current_commit = git_current_commit + detect_versions + + top_dir = Pathname(__dir__).parent.parent + @original_git_repository = top_dir + ".git" + Dir.mktmpdir do |dir| + @test_git_repository = Pathname(dir) + "arrow" + git("clone", @original_git_repository.to_s, @test_git_repository.to_s) + Dir.chdir(@test_git_repository) do + @tag_name = "apache-arrow-#{@release_version}" + @release_branch = "testing-release-#{@release_version}-rc0" + git("checkout", "-b", @release_branch, @current_commit) + yield + end + FileUtils.rm_rf(@test_git_repository) + end + end + + def omit_on_release_branch + omit("Not for release branch") if on_release_branch? + end + + def prepare(*targets) + if targets.last.is_a?(Hash) + additional_env = targets.pop + else + additional_env = {} + end + env = { "PREPARE_DEFAULT" => "0" } + targets.each do |target| + env["PREPARE_#{target}"] = "1" + end + env = env.merge(additional_env) + sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0") + end + + def bump_versions(*targets) + env = { "BUMP_DEFAULT" => "0" } + targets.each do |target| + env["BUMP_#{target}"] = "1" + end + sh(env, "dev/release/post-12-bump-versions.sh", @release_version, + @next_version) + end + + def parse_patch(patch) + diffs = [] + in_hunk = false + patch.each_line do |line| + case line + when /\A--- a\// + path = $POSTMATCH.chomp + diffs << { path: path, hunks: [] } + in_hunk = false + when /\A@@/ + in_hunk = true + diffs.last[:hunks] << [] + when /\A[-+]/ + next unless in_hunk + diffs.last[:hunks].last << line.chomp + end + end + diffs.sort_by do |diff| + diff[:path] + end + end + + def test_linux_packages + user = "Arrow Developers" + email = "dev@arrow.apache.org" + prepare("LINUX_PACKAGES", "DEBFULLNAME" => user, "DEBEMAIL" => email) + changes = parse_patch(git("log", "-n", "1", "-p")) + sampled_changes = changes.collect do |change| + { + path: change[:path], + sampled_hunks: change[:hunks].collect(&:first), + } + end + base_dir = "dev/tasks/linux-packages" + today = Time.now.utc.strftime("%a %b %d %Y") + expected_changes = [ + { + path: "#{base_dir}/apache-arrow-apt-source/debian/changelog", + sampled_hunks: [ + "+apache-arrow-apt-source (#{@release_version}-1) " + + "unstable; urgency=low", + ], + }, + { + path: "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in", + sampled_hunks: [ + "+* #{today} #{user} <#{email}> - #{@release_version}-1", + ], + }, + { + path: "#{base_dir}/apache-arrow/debian/changelog", + sampled_hunks: [ + "+apache-arrow (#{@release_version}-1) unstable; urgency=low", + ], + }, + { + path: "#{base_dir}/apache-arrow/yum/arrow.spec.in", + sampled_hunks: [ + "+* #{today} #{user} <#{email}> - #{@release_version}-1", + ], + }, + ] + assert_equal(expected_changes, sampled_changes) + end + + def test_version_pre_tag + omit_on_release_branch + prepare("VERSION_PRE_TAG") + assert_equal([ + { + path: "c_glib/meson.build", + hunks: [ + ["-version = '#{@snapshot_version}'", + "+version = '#{@release_version}'"], + ], + }, + { + path: "ci/scripts/PKGBUILD", + hunks: [ + ["-pkgver=#{@previous_version}.9000", + "+pkgver=#{@release_version}"], + ], + }, + { + path: "cpp/CMakeLists.txt", + hunks: [ + ["-set(ARROW_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_VERSION \"#{@release_version}\")"], + ], + }, + { + path: "cpp/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@release_version}\","], + ], + }, + { + path: "csharp/Directory.Build.props", + hunks: [ + ["- <Version>#{@snapshot_version}</Version>", + "+ <Version>#{@release_version}</Version>"], + ], + }, + { + path: "dev/tasks/homebrew-formulae/apache-arrow.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""], + ], + }, + { + path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""], + ], + }, + { + path: "java/adapter/avro/pom.xml", + hunks: [ + ["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"], + ], + }, + { + hunks: [ + ["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"], + ], + path: "java/adapter/jdbc/pom.xml", + }, + { + hunks: [ + ["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"], + ], + path: "java/adapter/orc/pom.xml", + }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/algorithm/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/c/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/compression/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/dataset/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/flight/flight-core/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/flight/flight-grpc/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@release_version}</version>"]], + path: "java/format/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/gandiva/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/memory/memory-core/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/memory/memory-netty/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/memory/memory-unsafe/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/memory/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"], + ["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/performance/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/plasma/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@release_version}</version>"]], + path: "java/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/tools/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@release_version}</version>"]], + path: "java/vector/pom.xml" }, + { + path: "js/package.json", + hunks: [ + ["- \"version\": \"#{@snapshot_version}\"", + "+ \"version\": \"#{@release_version}\""], + ], + }, + { + path: "matlab/CMakeLists.txt", + hunks: [ + ["-set(MLARROW_VERSION \"#{@snapshot_version}\")", + "+set(MLARROW_VERSION \"#{@release_version}\")"], + ], + }, + { + path: "python/setup.py", + hunks: [ + ["-default_version = '#{@snapshot_version}'", + "+default_version = '#{@release_version}'"], + ], + }, + { + path: "r/DESCRIPTION", + hunks: [ + ["-Version: #{@previous_version}.9000", + "+Version: #{@release_version}"], + ], + }, + { + path: "r/NEWS.md", + hunks: [ + ["-\# arrow #{@previous_version}.9000", + "+\# arrow #{@release_version}"], + ], + }, + { + path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-arrow/lib/arrow/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-gandiva/lib/gandiva/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-parquet/lib/parquet/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + { + path: "ruby/red-plasma/lib/plasma/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@release_version}\""], + ], + }, + ], + parse_patch(git("log", "-n", "1", "-p"))) + end + + def test_version_post_tag + omit_on_release_branch + bump_versions("VERSION_POST_TAG") + assert_equal([ + { + path: "c_glib/meson.build", + hunks: [ + ["-version = '#{@snapshot_version}'", + "+version = '#{@next_snapshot_version}'"], + ], + }, + { + path: "ci/scripts/PKGBUILD", + hunks: [ + ["-pkgver=#{@previous_version}.9000", + "+pkgver=#{@release_version}.9000"], + ], + }, + { + path: "cpp/CMakeLists.txt", + hunks: [ + ["-set(ARROW_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"], + ], + }, + { + path: "cpp/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, + { + path: "csharp/Directory.Build.props", + hunks: [ + ["- <Version>#{@snapshot_version}</Version>", + "+ <Version>#{@next_snapshot_version}</Version>"], + ], + }, + { + path: "dev/tasks/homebrew-formulae/apache-arrow.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""], + ], + }, + { + path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""], + ], + }, + { path: "java/adapter/avro/pom.xml", + hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]] }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/adapter/jdbc/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/adapter/orc/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/algorithm/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/c/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/compression/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/dataset/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/flight/flight-core/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/flight/flight-grpc/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/format/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/gandiva/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/memory/memory-core/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/memory/memory-netty/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/memory/memory-unsafe/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/memory/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"], + ["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/performance/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/plasma/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/tools/pom.xml" }, + { hunks: [["- <version>#{@snapshot_version}</version>", + "+ <version>#{@next_snapshot_version}</version>"]], + path: "java/vector/pom.xml" }, + { + path: "js/package.json", + hunks: [ + ["- \"version\": \"#{@snapshot_version}\"", + "+ \"version\": \"#{@next_snapshot_version}\""], + ], + }, + { + path: "matlab/CMakeLists.txt", + hunks: [ + ["-set(MLARROW_VERSION \"#{@snapshot_version}\")", + "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], + ], + }, + { + path: "python/setup.py", + hunks: [ + ["-default_version = '#{@snapshot_version}'", + "+default_version = '#{@next_snapshot_version}'"], + ], + }, + { + path: "r/DESCRIPTION", + hunks: [ + ["-Version: #{@previous_version}.9000", + "+Version: #{@release_version}.9000"], + ], + }, + { + path: "r/NEWS.md", + hunks: [ + ["-# arrow #{@previous_version}.9000", + "+# arrow #{@release_version}.9000", + "+", + "+# arrow #{@release_version}",], + ], + }, + { + path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-arrow/lib/arrow/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-gandiva/lib/gandiva/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-parquet/lib/parquet/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + { + path: "ruby/red-plasma/lib/plasma/version.rb", + hunks: [ + ["- VERSION = \"#{@snapshot_version}\"", + "+ VERSION = \"#{@next_snapshot_version}\""], + ], + }, + ], + parse_patch(git("log", "-n", "1", "-p"))) + end + + def test_deb_package_names + bump_versions("DEB_PACKAGE_NAMES") + changes = parse_patch(git("log", "-n", "1", "-p")) + sampled_changes = changes.collect do |change| + first_hunk = change[:hunks][0] + first_removed_line = first_hunk.find { |line| line.start_with?("-") } + first_added_line = first_hunk.find { |line| line.start_with?("+") } + { + sampled_diff: [first_removed_line, first_added_line], + path: change[:path], + } + end + expected_changes = [ + { + sampled_diff: [ + "-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@so_version}.install", + "+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@next_so_version}.install", + ], + path: "dev/release/rat_exclude_files.txt", + }, + { + sampled_diff: [ + "-Package: libarrow#{@so_version}", + "+Package: libarrow#{@next_so_version}", + ], + path: "dev/tasks/linux-packages/apache-arrow/debian/control.in", + }, + { + sampled_diff: [ + "- - libarrow-dataset-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + "+ - libarrow-dataset-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + ], + path: "dev/tasks/tasks.yml", + }, + ] + assert_equal(expected_changes, sampled_changes) + end +end diff --git a/src/arrow/dev/release/01-prepare.sh b/src/arrow/dev/release/01-prepare.sh new file mode 100755 index 000000000..b1e917390 --- /dev/null +++ b/src/arrow/dev/release/01-prepare.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -ue + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 <version> <next_version> <rc-num>" + exit 1 +fi + +. $SOURCE_DIR/utils-prepare.sh + +version=$1 +next_version=$2 +next_version_snapshot="${next_version}-SNAPSHOT" +rc_number=$3 + +release_tag="apache-arrow-${version}" +release_branch="release-${version}" +release_candidate_branch="release-${version}-rc${rc_number}" + +: ${PREPARE_DEFAULT:=1} +: ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}} +: ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}} +: ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}} +: ${PREPARE_BRANCH:=${PREPARE_DEFAULT}} +: ${PREPARE_TAG:=${PREPARE_DEFAULT}} + +if [ ${PREPARE_TAG} -gt 0 ]; then + if [ $(git tag -l "${release_tag}") ]; then + echo "Delete existing git tag $release_tag" + git tag -d "${release_tag}" + fi +fi + +if [ ${PREPARE_BRANCH} -gt 0 ]; then + if [[ $(git branch -l "${release_candidate_branch}") ]]; then + next_rc_number=$(($rc_number+1)) + echo "Branch ${release_candidate_branch} already exists, so create a new release candidate:" + echo "1. Checkout the master branch for major releases and maint-<version> for patch releases." + echo "2. Execute the script again with bumped RC number." + echo "Commands:" + echo " git checkout master" + echo " dev/release/01-prepare.sh ${version} ${next_version} ${next_rc_number}" + exit 1 + fi + + echo "Create local branch ${release_candidate_branch} for release candidate ${rc_number}" + git checkout -b ${release_candidate_branch} +fi + +############################## Pre-Tag Commits ############################## + +if [ ${PREPARE_CHANGELOG} -gt 0 ]; then + echo "Updating changelog for $version" + # Update changelog + archery release changelog add $version + git add ${SOURCE_DIR}/../../CHANGELOG.md + git commit -m "[Release] Update CHANGELOG.md for $version" +fi + +if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then + echo "Updating .deb/.rpm changelogs for $version" + cd $SOURCE_DIR/../tasks/linux-packages + rake \ + version:update \ + ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \ + ARROW_VERSION=${version} + git add */debian*/changelog */yum/*.spec.in + git commit -m "[Release] Update .deb/.rpm changelogs for $version" + cd - +fi + +if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then + echo "Prepare release ${version} on tag ${release_tag} then reset to version ${next_version_snapshot}" + + update_versions "${version}" "${next_version}" "release" + git commit -m "[Release] Update versions for ${version}" +fi + +############################## Tag the Release ############################## + +if [ ${PREPARE_TAG} -gt 0 ]; then + git tag -a "${release_tag}" -m "[Release] Apache Arrow Release ${version}" +fi diff --git a/src/arrow/dev/release/02-source-test.rb b/src/arrow/dev/release/02-source-test.rb new file mode 100644 index 000000000..652d4c07f --- /dev/null +++ b/src/arrow/dev/release/02-source-test.rb @@ -0,0 +1,148 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SourceTest < Test::Unit::TestCase + include GitRunnable + include VersionDetectable + + def setup + @current_commit = git_current_commit + detect_versions + @tag_name = "apache-arrow-#{@release_version}" + @script = File.expand_path("dev/release/02-source.sh") + + Dir.mktmpdir do |dir| + Dir.chdir(dir) do + yield + end + end + end + + def source(*targets) + env = { + "SOURCE_DEFAULT" => "0", + "release_hash" => @current_commit, + } + targets.each do |target| + env["SOURCE_#{target}"] = "1" + end + output = sh(env, @script, @release_version, "0") + sh("tar", "xf", "#{@tag_name}.tar.gz") + output + end + + def test_symbolic_links + source + Dir.chdir(@tag_name) do + assert_equal([], + Find.find(".").find_all {|path| File.symlink?(path)}) + end + end + + def test_csharp_git_commit_information + source + Dir.chdir("#{@tag_name}/csharp") do + FileUtils.mv("dummy.git", "../.git") + sh("dotnet", "pack", "-c", "Release") + FileUtils.mv("../.git", "dummy.git") + Dir.chdir("artifacts/Apache.Arrow/Release") do + sh("unzip", "Apache.Arrow.#{@snapshot_version}.nupkg") + FileUtils.chmod(0400, "Apache.Arrow.nuspec") + nuspec = REXML::Document.new(File.read("Apache.Arrow.nuspec")) + nuspec_repository = nuspec.elements["package/metadata/repository"] + attributes = {} + nuspec_repository.attributes.each do |key, value| + attributes[key] = value + end + assert_equal({ + "type" => "git", + "url" => "https://github.com/apache/arrow", + "commit" => @current_commit, + }, + attributes) + end + end + end + + def test_python_version + source + Dir.chdir("#{@tag_name}/python") do + sh("python3", "setup.py", "sdist") + if on_release_branch? + pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" + else + pyarrow_source_archive = "dist/pyarrow-#{@release_version}a0.tar.gz" + end + assert_equal([pyarrow_source_archive], + Dir.glob("dist/pyarrow-*.tar.gz")) + end + end + + def test_vote + jira_url = "https://issues.apache.org/jira" + jql_conditions = [ + "project = ARROW", + "status in (Resolved, Closed)", + "fixVersion = #{@release_version}", + ] + jql = jql_conditions.join(" AND ") + n_resolved_issues = nil + search_url = URI("#{jira_url}/rest/api/2/search?jql=#{CGI.escape(jql)}") + search_url.open do |response| + n_resolved_issues = JSON.parse(response.read)["total"] + end + output = source("VOTE") + assert_equal(<<-VOTE.strip, output[/^-+$(.+?)^-+$/m, 1].strip) +To: dev@arrow.apache.org +Subject: [VOTE] Release Apache Arrow #{@release_version} - RC0 + +Hi, + +I would like to propose the following release candidate (RC0) of Apache +Arrow version #{@release_version}. This is a release consisting of #{n_resolved_issues} +resolved JIRA issues[1]. + +This release candidate is based on commit: +#{@current_commit} [2] + +The source release rc0 is hosted at [3]. +The binary artifacts are hosted at [4][5][6][7][8][9]. +The changelog is located at [10]. + +Please download, verify checksums and signatures, run the unit tests, +and vote on the release. See [11] for how to validate a release candidate. + +The vote will be open for at least 72 hours. + +[ ] +1 Release this as Apache Arrow #{@release_version} +[ ] +0 +[ ] -1 Do not release this as Apache Arrow #{@release_version} because... + +[1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version} +[2]: https://github.com/apache/arrow/tree/#{@current_commit} +[3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0 +[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ +[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ +[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ +[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0 +[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0 +[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[10]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md +[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates + VOTE + end +end diff --git a/src/arrow/dev/release/02-source.sh b/src/arrow/dev/release/02-source.sh new file mode 100755 index 000000000..156eccc1b --- /dev/null +++ b/src/arrow/dev/release/02-source.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +: ${SOURCE_DEFAULT:=1} +: ${SOURCE_RAT:=${SOURCE_DEFAULT}} +: ${SOURCE_UPLOAD:=${SOURCE_DEFAULT}} +: ${SOURCE_VOTE:=${SOURCE_DEFAULT}} + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc=$2 + +tag=apache-arrow-${version} +tagrc=${tag}-rc${rc} +rc_url="https://dist.apache.org/repos/dist/dev/arrow/${tagrc}" + +echo "Preparing source for tag ${tag}" + +: ${release_hash:=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag})} + +if [ ${SOURCE_UPLOAD} -gt 0 ]; then + if [ -z "$release_hash" ]; then + echo "Cannot continue: unknown git tag: $tag" + exit + fi +fi + +echo "Using commit $release_hash" + +tarball=${tag}.tar.gz + +rm -rf ${tag} +# be conservative and use the release hash, even though git produces the same +# archive (identical hashes) using the scm tag +(cd "${SOURCE_TOP_DIR}" && \ + git archive ${release_hash} --prefix ${tag}/) | \ + tar xf - + +# Resolve all hard and symbolic links +rm -rf ${tag}.tmp +mv ${tag} ${tag}.tmp +cp -R -L ${tag}.tmp ${tag} +rm -rf ${tag}.tmp + +# Create a dummy .git/ directory to download the source files from GitHub with Source Link in C#. +dummy_git=${tag}/csharp/dummy.git +mkdir ${dummy_git} +pushd ${dummy_git} +echo ${release_hash} > HEAD +echo '[remote "origin"] url = https://github.com/apache/arrow.git' >> config +mkdir objects refs +popd + +# Create new tarball from modified source directory +tar czf ${tarball} ${tag} +rm -rf ${tag} + +if [ ${SOURCE_RAT} -gt 0 ]; then + "${SOURCE_DIR}/run-rat.sh" ${tarball} +fi + +if [ ${SOURCE_UPLOAD} -gt 0 ]; then + # sign the archive + gpg --armor --output ${tarball}.asc --detach-sig ${tarball} + shasum -a 256 $tarball > ${tarball}.sha256 + shasum -a 512 $tarball > ${tarball}.sha512 + + # check out the arrow RC folder + svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow tmp + + # add the release candidate for the tag + mkdir -p tmp/${tagrc} + + # copy the rc tarball into the tmp dir + cp ${tarball}* tmp/${tagrc} + + # commit to svn + svn add tmp/${tagrc} + svn ci -m "Apache Arrow ${version} RC${rc}" tmp/${tagrc} + + # clean up + rm -rf tmp + + echo "Success! The release candidate is available here:" + echo " ${rc_url}" + echo "" + echo "Commit SHA1: ${release_hash}" + echo "" +fi + +if [ ${SOURCE_VOTE} -gt 0 ]; then + echo "The following draft email has been created to send to the" + echo "dev@arrow.apache.org mailing list" + echo "" + echo "---------------------------------------------------------" + jira_url="https://issues.apache.org/jira" + jql="project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20${version}" + n_resolved_issues=$(curl "${jira_url}/rest/api/2/search/?jql=${jql}" | jq ".total") + cat <<MAIL +To: dev@arrow.apache.org +Subject: [VOTE] Release Apache Arrow ${version} - RC${rc} + +Hi, + +I would like to propose the following release candidate (RC${rc}) of Apache +Arrow version ${version}. This is a release consisting of ${n_resolved_issues} +resolved JIRA issues[1]. + +This release candidate is based on commit: +${release_hash} [2] + +The source release rc${rc} is hosted at [3]. +The binary artifacts are hosted at [4][5][6][7][8][9]. +The changelog is located at [10]. + +Please download, verify checksums and signatures, run the unit tests, +and vote on the release. See [11] for how to validate a release candidate. + +The vote will be open for at least 72 hours. + +[ ] +1 Release this as Apache Arrow ${version} +[ ] +0 +[ ] -1 Do not release this as Apache Arrow ${version} because... + +[1]: ${jira_url}/issues/?jql=${jql} +[2]: https://github.com/apache/arrow/tree/${release_hash} +[3]: ${rc_url} +[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ +[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ +[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ +[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc} +[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc} +[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[10]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md +[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates +MAIL + echo "---------------------------------------------------------" +fi diff --git a/src/arrow/dev/release/03-binary-submit.sh b/src/arrow/dev/release/03-binary-submit.sh new file mode 100755 index 000000000..b22a54bfd --- /dev/null +++ b/src/arrow/dev/release/03-binary-submit.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc_number=$2 +version_with_rc="${version}-rc${rc_number}" +crossbow_job_prefix="release-${version_with_rc}" +release_tag="apache-arrow-${version}" + +: ${ARROW_REPOSITORY:="apache/arrow"} +: ${ARROW_BRANCH:=$release_tag} + +# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there +# are jobs submitted with the same prefix (the integer at the end is auto +# incremented) +archery crossbow submit \ + --no-fetch \ + --job-prefix ${crossbow_job_prefix} \ + --arrow-version ${version_with_rc} \ + --arrow-remote "https://github.com/${ARROW_REPOSITORY}" \ + --arrow-branch ${ARROW_BRANCH} \ + --group packaging diff --git a/src/arrow/dev/release/04-binary-download.sh b/src/arrow/dev/release/04-binary-download.sh new file mode 100755 index 000000000..b433a3f9c --- /dev/null +++ b/src/arrow/dev/release/04-binary-download.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc_number=$2 +version_with_rc="${version}-rc${rc_number}" +crossbow_job_prefix="release-${version_with_rc}" + +# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there +# are jobs submitted with the same prefix (the integer at the end is auto +# incremented) +: ${CROSSBOW_JOB_NUMBER:="0"} +: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"} + +archery crossbow download-artifacts ${CROSSBOW_JOB_ID} --no-fetch diff --git a/src/arrow/dev/release/05-binary-upload.sh b/src/arrow/dev/release/05-binary-upload.sh new file mode 100755 index 000000000..5a30fc8bd --- /dev/null +++ b/src/arrow/dev/release/05-binary-upload.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc=$2 + +version_with_rc="${version}-rc${rc}" +crossbow_job_prefix="release-${version_with_rc}" +crossbow_package_dir="${SOURCE_DIR}/../../packages" + +: ${CROSSBOW_JOB_NUMBER:="0"} +: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"} +artifact_dir="${crossbow_package_dir}/${CROSSBOW_JOB_ID}" + +if [ ! -e "$artifact_dir" ]; then + echo "$artifact_dir does not exist" + exit 1 +fi + +if [ ! -d "$artifact_dir" ]; then + echo "$artifact_dir is not a directory" + exit 1 +fi + +cd "${SOURCE_DIR}" + +if [ ! -f .env ]; then + echo "You must create $(pwd)/.env" + echo "You can use $(pwd)/.env.example as template" + exit 1 +fi +. .env + +. utils-binary.sh + +# By default upload all artifacts. +# To deactivate one category, deactivate the category and all of its dependents. +# To explicitly select one category, set UPLOAD_DEFAULT=0 UPLOAD_X=1. +: ${UPLOAD_DEFAULT:=1} +: ${UPLOAD_ALMALINUX:=${UPLOAD_DEFAULT}} +: ${UPLOAD_AMAZON_LINUX:=${UPLOAD_DEFAULT}} +: ${UPLOAD_CENTOS:=${UPLOAD_DEFAULT}} +: ${UPLOAD_DEBIAN:=${UPLOAD_DEFAULT}} +: ${UPLOAD_NUGET:=${UPLOAD_DEFAULT}} +: ${UPLOAD_PYTHON:=${UPLOAD_DEFAULT}} +: ${UPLOAD_UBUNTU:=${UPLOAD_DEFAULT}} + +rake_tasks=() +apt_targets=() +yum_targets=() +if [ ${UPLOAD_ALMALINUX} -gt 0 ]; then + rake_tasks+=(yum:rc) + yum_targets+=(almalinux) +fi +if [ ${UPLOAD_AMAZON_LINUX} -gt 0 ]; then + rake_tasks+=(yum:rc) + yum_targets+=(amazon-linux) +fi +if [ ${UPLOAD_CENTOS} -gt 0 ]; then + rake_tasks+=(yum:rc) + yum_targets+=(centos) +fi +if [ ${UPLOAD_DEBIAN} -gt 0 ]; then + rake_tasks+=(apt:rc) + apt_targets+=(debian) +fi +if [ ${UPLOAD_NUGET} -gt 0 ]; then + rake_tasks+=(nuget:rc) +fi +if [ ${UPLOAD_PYTHON} -gt 0 ]; then + rake_tasks+=(python:rc) +fi +if [ ${UPLOAD_UBUNTU} -gt 0 ]; then + rake_tasks+=(apt:rc) + apt_targets+=(ubuntu) +fi +rake_tasks+=(summary:rc) + +tmp_dir=binary/tmp +mkdir -p "${tmp_dir}" +source_artifacts_dir="${tmp_dir}/artifacts" +rm -rf "${source_artifacts_dir}" +cp -a "${artifact_dir}" "${source_artifacts_dir}" + +docker_run \ + ./runner.sh \ + rake \ + "${rake_tasks[@]}" \ + APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \ + ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \ + ARTIFACTS_DIR="${tmp_dir}/artifacts" \ + RC=${rc} \ + STAGING=${STAGING:-no} \ + VERSION=${version} \ + YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}") diff --git a/src/arrow/dev/release/README.md b/src/arrow/dev/release/README.md new file mode 100644 index 000000000..0a9cc3e04 --- /dev/null +++ b/src/arrow/dev/release/README.md @@ -0,0 +1,24 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +## Release management scripts + +To learn more, see the project wiki: + +https://cwiki.apache.org/confluence/display/ARROW/Release+Management+Guide diff --git a/src/arrow/dev/release/Rakefile b/src/arrow/dev/release/Rakefile new file mode 100644 index 000000000..ff57bad5e --- /dev/null +++ b/src/arrow/dev/release/Rakefile @@ -0,0 +1,37 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "binary-task" + +if File.exist?(".env") + File.open(".env") do |env| + env.each_line do |line| + case line.strip + when /\A#/ + when /\A([^=]+)=(.*)\z/ + key = $1 + value = $2 + ENV[key] ||= value + end + end + end +end + +binary_task = BinaryTask.new +binary_task.define diff --git a/src/arrow/dev/release/VERIFY.md b/src/arrow/dev/release/VERIFY.md new file mode 100644 index 000000000..5b441ac13 --- /dev/null +++ b/src/arrow/dev/release/VERIFY.md @@ -0,0 +1,76 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Verifying Arrow releases + +## Windows + +We've provided a convenience script for verifying the C++ and Python builds on +Windows. Read the comments in `verify-release-candidate.bat` for instructions. + +## Linux and macOS + +We've provided a convenience script for verifying the C++, Python, C +GLib, Java and JavaScript builds on Linux and macOS. Read the comments in +`verify-release-candidate.sh` for instructions. + +### C GLib + +You need the followings to verify C GLib build: + + * GLib + * GObject Introspection + * Ruby (not EOL-ed version is required) + * gobject-introspection gem + * test-unit gem + +You can install them by the followings on Debian GNU/Linux and Ubuntu: + +```console +% sudo apt install -y -V libgirepository1.0-dev ruby-dev +% sudo gem install gobject-introspection test-unit +``` + +You can install them by the followings on CentOS: + +```console +% sudo yum install -y gobject-introspection-devel +% git clone https://github.com/sstephenson/rbenv.git ~/.rbenv +% git clone https://github.com/sstephenson/ruby-build.git ~/.rbenv/plugins/ruby-build +% echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bash_profile +% echo 'eval "$(rbenv init -)"' >> ~/.bash_profile +% exec ${SHELL} --login +% sudo yum install -y gcc make patch openssl-devel readline-devel zlib-devel +% rbenv install 2.4.2 +% rbenv global 2.4.2 +% gem install gobject-introspection test-unit +``` + +You can install them by the followings on macOS: + +```console +% brew install -y gobject-introspection +% gem install gobject-introspection test-unit +``` + +You need to set `PKG_CONFIG_PATH` to find libffi on macOS: + +```console +% export PKG_CONFIG_PATH=$(brew --prefix libffi)/lib/pkgconfig:$PKG_CONFIG_PATH +``` diff --git a/src/arrow/dev/release/binary-task.rb b/src/arrow/dev/release/binary-task.rb new file mode 100644 index 000000000..5f88e477e --- /dev/null +++ b/src/arrow/dev/release/binary-task.rb @@ -0,0 +1,1910 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "cgi/util" +require "digest/sha2" +require "io/console" +require "json" +require "net/http" +require "pathname" +require "tempfile" +require "thread" +require "time" + +begin + require "apt-dists-merge" +rescue LoadError + warn("apt-dists-merge is needed for apt:* tasks") +end + +class BinaryTask + include Rake::DSL + + class ThreadPool + def initialize(use_case, &worker) + @n_workers = choose_n_workers(use_case) + @worker = worker + @jobs = Thread::Queue.new + @workers = @n_workers.times.collect do + Thread.new do + loop do + job = @jobs.pop + break if job.nil? + @worker.call(job) + end + end + end + end + + def <<(job) + @jobs << job + end + + def join + @n_workers.times do + @jobs << nil + end + @workers.each(&:join) + end + + private + def choose_n_workers(use_case) + case use_case + when :artifactory + # Too many workers cause Artifactory error. + 6 + when :gpg + # Too many workers cause gpg-agent error. + 2 + else + raise "Unknown use case: #{use_case}" + end + end + end + + class ProgressReporter + def initialize(label, count_max=0) + @label = label + @count_max = count_max + + @mutex = Thread::Mutex.new + + @time_start = Time.now + @time_previous = Time.now + @count_current = 0 + @count_previous = 0 + end + + def advance + @mutex.synchronize do + @count_current += 1 + + return if @count_max.zero? + + time_current = Time.now + if time_current - @time_previous <= 1 + return + end + + show_progress(time_current) + end + end + + def increment_max + @mutex.synchronize do + @count_max += 1 + show_progress(Time.now) if @count_max == 1 + end + end + + def finish + @mutex.synchronize do + return if @count_max.zero? + show_progress(Time.now) + $stderr.puts + end + end + + private + def show_progress(time_current) + n_finishes = @count_current - @count_previous + throughput = n_finishes.to_f / (time_current - @time_previous) + @time_previous = time_current + @count_previous = @count_current + + message = build_message(time_current, throughput) + $stderr.print("\r#{message}") if message + end + + def build_message(time_current, throughput) + percent = (@count_current / @count_max.to_f) * 100 + formatted_count = "[%s/%s]" % [ + format_count(@count_current), + format_count(@count_max), + ] + elapsed_second = time_current - @time_start + if throughput.zero? + rest_second = 0 + else + rest_second = (@count_max - @count_current) / throughput + end + separator = " - " + progress = "%5.1f%% %s %s %s %s" % [ + percent, + formatted_count, + format_time_interval(elapsed_second), + format_time_interval(rest_second), + format_throughput(throughput), + ] + label = @label + + width = guess_terminal_width + return "#{label}#{separator}#{progress}" if width.nil? + + return nil if progress.size > width + + label_width = width - progress.size - separator.size + if label.size > label_width + ellipsis = "..." + shorten_label_width = label_width - ellipsis.size + if shorten_label_width < 1 + return progress + else + label = label[0, shorten_label_width] + ellipsis + end + end + "#{label}#{separator}#{progress}" + end + + def format_count(count) + "%d" % count + end + + def format_time_interval(interval) + if interval < 60 + "00:00:%02d" % interval + elsif interval < (60 * 60) + minute, second = interval.divmod(60) + "00:%02d:%02d" % [minute, second] + elsif interval < (60 * 60 * 24) + minute, second = interval.divmod(60) + hour, minute = minute.divmod(60) + "%02d:%02d:%02d" % [hour, minute, second] + else + minute, second = interval.divmod(60) + hour, minute = minute.divmod(60) + day, hour = hour.divmod(24) + "%dd %02d:%02d:%02d" % [day, hour, minute, second] + end + end + + def format_throughput(throughput) + "%2d/s" % throughput + end + + def guess_terminal_width + guess_terminal_width_from_io || + guess_terminal_width_from_command || + guess_terminal_width_from_env || + 80 + end + + def guess_terminal_width_from_io + if IO.respond_to?(:console) and IO.console + IO.console.winsize[1] + elsif $stderr.respond_to?(:winsize) + begin + $stderr.winsize[1] + rescue SystemCallError + nil + end + else + nil + end + end + + def guess_terminal_width_from_command + IO.pipe do |input, output| + begin + pid = spawn("tput", "cols", {:out => output, :err => output}) + rescue SystemCallError + return nil + end + + output.close + _, status = Process.waitpid2(pid) + return nil unless status.success? + + result = input.read.chomp + begin + Integer(result, 10) + rescue ArgumentError + nil + end + end + end + + def guess_terminal_width_from_env + env = ENV["COLUMNS"] || ENV["TERM_WIDTH"] + return nil if env.nil? + + begin + Integer(env, 10) + rescue ArgumentError + nil + end + end + end + + class ArtifactoryClient + class Error < StandardError + attr_reader :request + attr_reader :response + def initialize(request, response, message) + @request = request + @response = response + super(message) + end + end + + def initialize(prefix, api_key) + @prefix = prefix + @api_key = api_key + @http = nil + restart + end + + def restart + close + @http = start_http(build_url("")) + end + + private def start_http(url, &block) + http = Net::HTTP.new(url.host, url.port) + http.set_debug_output($stderr) if ENV["DEBUG"] + http.use_ssl = true + if block_given? + http.start(&block) + else + http + end + end + + def close + return if @http.nil? + @http.finish if @http.started? + @http = nil + end + + def request(method, headers, url, body: nil, &block) + request = build_request(method, url, headers, body: body) + if ENV["DRY_RUN"] + case request + when Net::HTTP::Get, Net::HTTP::Head + else + p [method, url] + return + end + end + request_internal(@http, request, &block) + end + + private def request_internal(http, request, &block) + http.request(request) do |response| + case response + when Net::HTTPSuccess, + Net::HTTPNotModified + if block_given? + return yield(response) + else + response.read_body + return response + end + when Net::HTTPRedirection + redirected_url = URI(response["Location"]) + redirected_request = Net::HTTP::Get.new(redirected_url, {}) + start_http(redirected_url) do |redirected_http| + request_internal(redirected_http, redirected_request, &block) + end + else + message = "failed to request: " + message << "#{request.uri}: #{request.method}: " + message << "#{response.message} #{response.code}" + if response.body + message << "\n" + message << response.body + end + raise Error.new(request, response, message) + end + end + end + + def files + _files = [] + directories = [""] + until directories.empty? + directory = directories.shift + list(directory).each do |path| + resolved_path = "#{directory}#{path}" + case path + when "../" + when /\/\z/ + directories << resolved_path + else + _files << resolved_path + end + end + end + _files + end + + def list(path) + url = build_url(path) + with_retry(3, url) do + begin + request(:get, {}, url) do |response| + response.body.scan(/<a href="(.+?)"/).flatten + end + rescue Error => error + case error.response + when Net::HTTPNotFound + return [] + else + raise + end + end + end + end + + def head(path) + url = build_url(path) + with_retry(3, url) do + request(:head, {}, url) + end + end + + def exist?(path) + begin + head(path) + true + rescue Error => error + case error.response + when Net::HTTPNotFound + false + else + raise + end + end + end + + def upload(path, destination_path) + destination_url = build_url(destination_path) + with_retry(3, destination_url) do + sha1 = Digest::SHA1.file(path).hexdigest + sha256 = Digest::SHA256.file(path).hexdigest + headers = { + "X-Artifactory-Last-Modified" => File.mtime(path).rfc2822, + "X-Checksum-Deploy" => "false", + "X-Checksum-Sha1" => sha1, + "X-Checksum-Sha256" => sha256, + "Content-Length" => File.size(path).to_s, + "Content-Type" => "application/octet-stream", + } + File.open(path, "rb") do |input| + request(:put, headers, destination_url, body: input) + end + end + end + + def download(path, output_path) + url = build_url(path) + with_retry(5, url) do + begin + begin + headers = {} + if File.exist?(output_path) + headers["If-Modified-Since"] = File.mtime(output_path).rfc2822 + end + request(:get, headers, url) do |response| + case response + when Net::HTTPNotModified + else + File.open(output_path, "wb") do |output| + response.read_body do |chunk| + output.write(chunk) + end + end + last_modified = response["Last-Modified"] + if last_modified + FileUtils.touch(output_path, + mtime: Time.rfc2822(last_modified)) + end + end + end + rescue Error => error + case error.response + when Net::HTTPNotFound + $stderr.puts(error.message) + return + else + raise + end + end + end + rescue + FileUtils.rm_f(output_path) + raise + end + end + + def delete(path) + url = build_url(path) + with_retry(3, url) do + request(:delete, {}, url) + end + end + + def copy(source, destination) + uri = build_api_url("copy/arrow/#{source}", + "to" => "/arrow/#{destination}") + with_read_timeout(300) do + request(:post, {}, uri) + end + end + + private + def build_url(path) + uri_string = "https://apache.jfrog.io/artifactory/arrow" + uri_string << "/#{@prefix}" unless @prefix.nil? + uri_string << "/#{path}" + URI(uri_string) + end + + def build_api_url(path, parameters) + uri_string = "https://apache.jfrog.io/artifactory/api/#{path}" + unless parameters.empty? + uri_string << "?" + escaped_parameters = parameters.collect do |key, value| + "#{CGI.escape(key)}=#{CGI.escape(value)}" + end + uri_string << escaped_parameters.join("&") + end + URI(uri_string) + end + + def build_request(method, url, headers, body: nil) + need_auth = false + case method + when :head + request = Net::HTTP::Head.new(url, headers) + when :get + request = Net::HTTP::Get.new(url, headers) + when :post + need_auth = true + request = Net::HTTP::Post.new(url, headers) + when :put + need_auth = true + request = Net::HTTP::Put.new(url, headers) + when :delete + need_auth = true + request = Net::HTTP::Delete.new(url, headers) + else + raise "unsupported HTTP method: #{method.inspect}" + end + request["Connection"] = "Keep-Alive" + request["X-JFrog-Art-Api"] = @api_key if need_auth + if body + if body.is_a?(String) + request.body = body + else + request.body_stream = body + end + end + request + end + + def with_retry(max_n_retries, target) + n_retries = 0 + begin + yield + rescue Net::OpenTimeout, + OpenSSL::OpenSSLError, + SocketError, + SystemCallError, + Timeout::Error => error + n_retries += 1 + if n_retries <= max_n_retries + $stderr.puts + $stderr.puts("Retry #{n_retries}: #{target}: " + + "#{error.class}: #{error.message}") + restart + retry + else + raise + end + end + end + + def with_read_timeout(timeout) + current_timeout = @http.read_timeout + begin + @http.read_timeout = timeout + yield + ensure + @http.read_timeout = current_timeout + end + end + end + + class ArtifactoryClientPool + class << self + def open(prefix, api_key) + pool = new(prefix, api_key) + begin + yield(pool) + ensure + pool.close + end + end + end + + def initialize(prefix, api_key) + @prefix = prefix + @api_key = api_key + @mutex = Thread::Mutex.new + @clients = [] + end + + def pull + client = @mutex.synchronize do + if @clients.empty? + ArtifactoryClient.new(@prefix, @api_key) + else + @clients.pop + end + end + begin + yield(client) + ensure + release(client) + end + end + + def release(client) + @mutex.synchronize do + @clients << client + end + end + + def close + @clients.each(&:close) + end + end + + module ArtifactoryPath + private + def base_path + path = @distribution + path += "-staging" if @staging + path += "-rc" if @rc + path + end + end + + class ArtifactoryDownloader + include ArtifactoryPath + + def initialize(api_key:, + destination:, + distribution:, + list: nil, + pattern: nil, + prefix: nil, + rc: nil, + staging: false) + @api_key = api_key + @destination = destination + @distribution = distribution + @list = list + @pattern = pattern + @prefix = prefix + @rc = rc + @staging = staging + end + + def download + progress_label = "Downloading: #{base_path}" + progress_reporter = ProgressReporter.new(progress_label) + prefix = [base_path, @prefix].compact.join("/") + ArtifactoryClientPool.open(prefix, @api_key) do |client_pool| + thread_pool = ThreadPool.new(:artifactory) do |path, output_path| + client_pool.pull do |client| + client.download(path, output_path) + end + progress_reporter.advance + end + files = client_pool.pull do |client| + if @list + list_output_path = "#{@destination}/#{@list}" + client.download(@list, list_output_path) + File.readlines(list_output_path, chomp: true) + else + client.files + end + end + files.each do |path| + output_path = "#{@destination}/#{path}" + if @pattern + next unless @pattern.match?(path) + end + yield(output_path) + output_dir = File.dirname(output_path) + FileUtils.mkdir_p(output_dir) + progress_reporter.increment_max + thread_pool << [path, output_path] + end + thread_pool.join + end + progress_reporter.finish + end + end + + class ArtifactoryUploader + include ArtifactoryPath + + def initialize(api_key:, + destination_prefix: nil, + distribution:, + rc: nil, + source:, + staging: false, + sync: false, + sync_pattern: nil) + @api_key = api_key + @destination_prefix = destination_prefix + @distribution = distribution + @rc = rc + @source = source + @staging = staging + @sync = sync + @sync_pattern = sync_pattern + end + + def upload + progress_label = "Uploading: #{base_path}" + progress_reporter = ProgressReporter.new(progress_label) + prefix = base_path + prefix += "/#{@destination_prefix}" if @destination_prefix + ArtifactoryClientPool.open(prefix, @api_key) do |client_pool| + if @sync + existing_files = client_pool.pull do |client| + client.files + end + else + existing_files = [] + end + + thread_pool = ThreadPool.new(:artifactory) do |path, relative_path| + client_pool.pull do |client| + client.upload(path, relative_path) + end + progress_reporter.advance + end + + source = Pathname(@source) + source.glob("**/*") do |path| + next if path.directory? + destination_path = path.relative_path_from(source) + progress_reporter.increment_max + existing_files.delete(destination_path.to_s) + thread_pool << [path, destination_path] + end + thread_pool.join + + if @sync + thread_pool = ThreadPool.new(:artifactory) do |path| + client_pool.pull do |client| + client.delete(path) + end + progress_reporter.advance + end + existing_files.each do |path| + if @sync_pattern + next unless @sync_pattern.match?(path) + end + progress_reporter.increment_max + thread_pool << path + end + thread_pool.join + end + end + progress_reporter.finish + end + end + + def define + define_apt_tasks + define_yum_tasks + define_python_tasks + define_nuget_tasks + define_summary_tasks + end + + private + def env_value(name) + value = ENV[name] + value = yield(name) if value.nil? and block_given? + raise "Specify #{name} environment variable" if value.nil? + value + end + + def verbose? + ENV["VERBOSE"] == "yes" + end + + def default_output + if verbose? + $stdout + else + IO::NULL + end + end + + def gpg_key_id + env_value("GPG_KEY_ID") + end + + def shorten_gpg_key_id(id) + id[-8..-1] + end + + def rpm_gpg_key_package_name(id) + "gpg-pubkey-#{shorten_gpg_key_id(id).downcase}" + end + + def artifactory_api_key + env_value("ARTIFACTORY_API_KEY") + end + + def artifacts_dir + env_value("ARTIFACTS_DIR") + end + + def version + env_value("VERSION") + end + + def rc + env_value("RC") + end + + def staging? + ENV["STAGING"] == "yes" + end + + def full_version + "#{version}-rc#{rc}" + end + + def valid_sign?(path, sign_path) + IO.pipe do |input, output| + begin + sh({"LANG" => "C"}, + "gpg", + "--verify", + sign_path, + path, + out: default_output, + err: output, + verbose: false) + rescue + return false + end + output.close + /Good signature/ === input.read + end + end + + def sign(source_path, destination_path) + if File.exist?(destination_path) + return if valid_sign?(source_path, destination_path) + rm(destination_path, verbose: false) + end + sh("gpg", + "--detach-sig", + "--local-user", gpg_key_id, + "--output", destination_path, + source_path, + out: default_output, + verbose: verbose?) + end + + def sha512(source_path, destination_path) + if File.exist?(destination_path) + sha512 = File.read(destination_path).split[0] + return if Digest::SHA512.file(source_path).hexdigest == sha512 + end + absolute_destination_path = File.expand_path(destination_path) + Dir.chdir(File.dirname(source_path)) do + sh("shasum", + "--algorithm", "512", + File.basename(source_path), + out: absolute_destination_path, + verbose: verbose?) + end + end + + def sign_dir(label, dir) + progress_label = "Signing: #{label}" + progress_reporter = ProgressReporter.new(progress_label) + + target_paths = [] + Pathname(dir).glob("**/*") do |path| + next if path.directory? + case path.extname + when ".asc", ".sha512" + next + end + progress_reporter.increment_max + target_paths << path.to_s + end + target_paths.each do |path| + sign(path, "#{path}.asc") + sha512(path, "#{path}.sha512") + progress_reporter.advance + end + progress_reporter.finish + end + + def download_distribution(distribution, + destination, + target, + list: nil, + pattern: nil, + prefix: nil) + mkdir_p(destination, verbose: verbose?) unless File.exist?(destination) + existing_paths = {} + Pathname(destination).glob("**/*") do |path| + next if path.directory? + existing_paths[path.to_s] = true + end + options = { + api_key: artifactory_api_key, + destination: destination, + distribution: distribution, + list: list, + pattern: pattern, + prefix: prefix, + staging: staging?, + } + options[:rc] = rc if target == :rc + downloader = ArtifactoryDownloader.new(**options) + downloader.download do |output_path| + existing_paths.delete(output_path) + end + existing_paths.each_key do |path| + rm_f(path, verbose: verbose?) + end + end + + def same_content?(path1, path2) + File.exist?(path1) and + File.exist?(path2) and + Digest::SHA256.file(path1) == Digest::SHA256.file(path2) + end + + def copy_artifact(source_path, + destination_path, + progress_reporter) + return if same_content?(source_path, destination_path) + progress_reporter.increment_max + destination_dir = File.dirname(destination_path) + unless File.exist?(destination_dir) + mkdir_p(destination_dir, verbose: verbose?) + end + cp(source_path, destination_path, verbose: verbose?) + progress_reporter.advance + end + + def prepare_staging(base_path) + client = ArtifactoryClient.new(nil, artifactory_api_key) + ["", "-rc"].each do |suffix| + path = "#{base_path}#{suffix}" + progress_reporter = ProgressReporter.new("Preparing staging for #{path}") + progress_reporter.increment_max + begin + staging_path = "#{base_path}-staging#{suffix}" + if client.exist?(staging_path) + client.delete(staging_path) + end + if client.exist?(path) + client.copy(path, staging_path) + end + ensure + progress_reporter.advance + progress_reporter.finish + end + end + end + + def delete_staging(base_path) + client = ArtifactoryClient.new(nil, artifactory_api_key) + ["", "-rc"].each do |suffix| + path = "#{base_path}#{suffix}" + progress_reporter = ProgressReporter.new("Deleting staging for #{path}") + progress_reporter.increment_max + begin + staging_path = "#{base_path}-staging#{suffix}" + if client.exist?(staging_path) + client.delete(staging_path) + end + ensure + progress_reporter.advance + progress_reporter.finish + end + end + end + + def uploaded_files_name + "uploaded-files.txt" + end + + def write_uploaded_files(dir) + dir = Pathname(dir) + uploaded_files = [] + dir.glob("**/*") do |path| + next if path.directory? + uploaded_files << path.relative_path_from(dir).to_s + end + File.open("#{dir}/#{uploaded_files_name}", "w") do |output| + output.puts(uploaded_files.sort) + end + end + + def tmp_dir + "binary/tmp" + end + + def rc_dir + "#{tmp_dir}/rc" + end + + def release_dir + "#{tmp_dir}/release" + end + + def apt_repository_label + "Apache Arrow" + end + + def apt_repository_description + "Apache Arrow packages" + end + + def apt_rc_repositories_dir + "#{rc_dir}/apt/repositories" + end + + def apt_release_repositories_dir + "#{release_dir}/apt/repositories" + end + + def available_apt_targets + [ + ["debian", "buster", "main"], + ["debian", "bullseye", "main"], + ["debian", "bookworm", "main"], + ["ubuntu", "bionic", "main"], + ["ubuntu", "focal", "main"], + ["ubuntu", "hirsute", "main"], + ["ubuntu", "impish", "main"], + ] + end + + def apt_targets + env_apt_targets = (ENV["APT_TARGETS"] || "").split(",") + if env_apt_targets.empty? + available_apt_targets + else + available_apt_targets.select do |distribution, code_name, component| + env_apt_targets.any? do |env_apt_target| + if env_apt_target.include?("-") + env_apt_target.start_with?("#{distribution}-#{code_name}") + else + env_apt_target == distribution + end + end + end + end + end + + def apt_distributions + apt_targets.collect(&:first).uniq + end + + def apt_architectures + [ + "amd64", + "arm64", + ] + end + + def generate_apt_release(dists_dir, code_name, component, architecture) + dir = "#{dists_dir}/#{component}/" + if architecture == "source" + dir << architecture + else + dir << "binary-#{architecture}" + end + + mkdir_p(dir, verbose: verbose?) + File.open("#{dir}/Release", "w") do |release| + release.puts(<<-RELEASE) +Archive: #{code_name} +Component: #{component} +Origin: #{apt_repository_label} +Label: #{apt_repository_label} +Architecture: #{architecture} + RELEASE + end + end + + def generate_apt_ftp_archive_generate_conf(code_name, component) + conf = <<-CONF +Dir::ArchiveDir "."; +Dir::CacheDir "."; +TreeDefault::Directory "pool/#{code_name}/#{component}"; +TreeDefault::SrcDirectory "pool/#{code_name}/#{component}"; +Default::Packages::Extensions ".deb"; +Default::Packages::Compress ". gzip xz"; +Default::Sources::Compress ". gzip xz"; +Default::Contents::Compress "gzip"; + CONF + + apt_architectures.each do |architecture| + conf << <<-CONF + +BinDirectory "dists/#{code_name}/#{component}/binary-#{architecture}" { + Packages "dists/#{code_name}/#{component}/binary-#{architecture}/Packages"; + Contents "dists/#{code_name}/#{component}/Contents-#{architecture}"; + SrcPackages "dists/#{code_name}/#{component}/source/Sources"; +}; + CONF + end + + conf << <<-CONF + +Tree "dists/#{code_name}" { + Sections "#{component}"; + Architectures "#{apt_architectures.join(" ")} source"; +}; + CONF + + conf + end + + def generate_apt_ftp_archive_release_conf(code_name, component) + <<-CONF +APT::FTPArchive::Release::Origin "#{apt_repository_label}"; +APT::FTPArchive::Release::Label "#{apt_repository_label}"; +APT::FTPArchive::Release::Architectures "#{apt_architectures.join(" ")}"; +APT::FTPArchive::Release::Codename "#{code_name}"; +APT::FTPArchive::Release::Suite "#{code_name}"; +APT::FTPArchive::Release::Components "#{component}"; +APT::FTPArchive::Release::Description "#{apt_repository_description}"; + CONF + end + + def apt_update(base_dir, incoming_dir, merged_dir) + apt_targets.each do |distribution, code_name, component| + distribution_dir = "#{incoming_dir}/#{distribution}" + pool_dir = "#{distribution_dir}/pool/#{code_name}" + next unless File.exist?(pool_dir) + dists_dir = "#{distribution_dir}/dists/#{code_name}" + rm_rf(dists_dir, verbose: verbose?) + generate_apt_release(dists_dir, code_name, component, "source") + apt_architectures.each do |architecture| + generate_apt_release(dists_dir, code_name, component, architecture) + end + + generate_conf_file = Tempfile.new("apt-ftparchive-generate.conf") + File.open(generate_conf_file.path, "w") do |conf| + conf.puts(generate_apt_ftp_archive_generate_conf(code_name, + component)) + end + cd(distribution_dir, verbose: verbose?) do + sh("apt-ftparchive", + "generate", + generate_conf_file.path, + out: default_output, + verbose: verbose?) + end + + Dir.glob("#{dists_dir}/Release*") do |release| + rm_f(release, verbose: verbose?) + end + Dir.glob("#{distribution_dir}/*.db") do |db| + rm_f(db, verbose: verbose?) + end + release_conf_file = Tempfile.new("apt-ftparchive-release.conf") + File.open(release_conf_file.path, "w") do |conf| + conf.puts(generate_apt_ftp_archive_release_conf(code_name, + component)) + end + release_file = Tempfile.new("apt-ftparchive-release") + sh("apt-ftparchive", + "-c", release_conf_file.path, + "release", + dists_dir, + out: release_file.path, + verbose: verbose?) + mv(release_file.path, "#{dists_dir}/Release", verbose: verbose?) + + base_dists_dir = "#{base_dir}/#{distribution}/dists/#{code_name}" + merged_dists_dir = "#{merged_dir}/#{distribution}/dists/#{code_name}" + rm_rf(merged_dists_dir) + merger = APTDistsMerge::Merger.new(base_dists_dir, + dists_dir, + merged_dists_dir) + merger.merge + + in_release_path = "#{merged_dists_dir}/InRelease" + release_path = "#{merged_dists_dir}/Release" + signed_release_path = "#{release_path}.gpg" + sh("gpg", + "--sign", + "--detach-sign", + "--armor", + "--local-user", gpg_key_id, + "--output", signed_release_path, + release_path, + out: default_output, + verbose: verbose?) + sh("gpg", + "--clear-sign", + "--local-user", gpg_key_id, + "--output", in_release_path, + release_path, + out: default_output, + verbose: verbose?) + end + end + + def define_apt_staging_tasks + namespace :apt do + namespace :staging do + desc "Prepare staging environment for APT repositories" + task :prepare do + apt_distributions.each do |distribution| + prepare_staging(distribution) + end + end + + desc "Delete staging environment for APT repositories" + task :delete do + apt_distributions.each do |distribution| + delete_staging(distribution) + end + end + end + end + end + + def define_apt_rc_tasks + namespace :apt do + namespace :rc do + base_dir = "#{apt_rc_repositories_dir}/base" + incoming_dir = "#{apt_rc_repositories_dir}/incoming" + merged_dir = "#{apt_rc_repositories_dir}/merged" + upload_dir = "#{apt_rc_repositories_dir}/upload" + + desc "Copy .deb packages" + task :copy do + apt_targets.each do |distribution, code_name, component| + progress_label = "Copying: #{distribution} #{code_name}" + progress_reporter = ProgressReporter.new(progress_label) + + distribution_dir = "#{incoming_dir}/#{distribution}" + pool_dir = "#{distribution_dir}/pool/#{code_name}" + rm_rf(pool_dir, verbose: verbose?) + mkdir_p(pool_dir, verbose: verbose?) + source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{code_name}" + Dir.glob("#{source_dir_prefix}*/**/*") do |path| + next if File.directory?(path) + base_name = File.basename(path) + if base_name.start_with?("apache-arrow-apt-source") + package_name = "apache-arrow-apt-source" + else + package_name = "apache-arrow" + end + destination_path = [ + pool_dir, + component, + package_name[0], + package_name, + base_name, + ].join("/") + copy_artifact(path, + destination_path, + progress_reporter) + case base_name + when /\A[^_]+-apt-source_.*\.deb\z/ + latest_apt_source_package_path = [ + distribution_dir, + "#{package_name}-latest-#{code_name}.deb" + ].join("/") + copy_artifact(path, + latest_apt_source_package_path, + progress_reporter) + end + end + progress_reporter.finish + end + end + + desc "Download dists/ for RC APT repositories" + task :download do + apt_distributions.each do |distribution| + not_checksum_pattern = /.+(?<!\.asc|\.sha512)\z/ + base_distribution_dir = "#{base_dir}/#{distribution}" + pattern = /\Adists\/#{not_checksum_pattern}/ + download_distribution(distribution, + base_distribution_dir, + :base, + pattern: pattern) + end + end + + desc "Sign .deb packages" + task :sign do + apt_distributions.each do |distribution| + distribution_dir = "#{incoming_dir}/#{distribution}" + Dir.glob("#{distribution_dir}/**/*.dsc") do |path| + begin + sh({"LANG" => "C"}, + "gpg", + "--verify", + path, + out: IO::NULL, + err: IO::NULL, + verbose: false) + rescue + sh("debsign", + "--no-re-sign", + "-k#{gpg_key_id}", + path, + out: default_output, + verbose: verbose?) + end + end + sign_dir(distribution, distribution_dir) + end + end + + desc "Update RC APT repositories" + task :update do + apt_update(base_dir, incoming_dir, merged_dir) + apt_targets.each do |distribution, code_name, component| + dists_dir = "#{merged_dir}/#{distribution}/dists/#{code_name}" + next unless File.exist?(dists_dir) + sign_dir("#{distribution} #{code_name}", + dists_dir) + end + end + + desc "Upload .deb packages and RC APT repositories" + task :upload do + apt_distributions.each do |distribution| + upload_distribution_dir = "#{upload_dir}/#{distribution}" + incoming_distribution_dir = "#{incoming_dir}/#{distribution}" + merged_dists_dir = "#{merged_dir}/#{distribution}/dists" + + rm_rf(upload_distribution_dir, verbose: verbose?) + mkdir_p(upload_distribution_dir, verbose: verbose?) + Dir.glob("#{incoming_distribution_dir}/*") do |path| + next if File.basename(path) == "dists" + cp_r(path, + upload_distribution_dir, + preserve: true, + verbose: verbose?) + end + cp_r(merged_dists_dir, + upload_distribution_dir, + preserve: true, + verbose: verbose?) + write_uploaded_files(upload_distribution_dir) + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + rc: rc, + source: upload_distribution_dir, + staging: staging?) + uploader.upload + end + end + end + + desc "Release RC APT repositories" + apt_rc_tasks = [ + "apt:rc:copy", + "apt:rc:download", + "apt:rc:sign", + "apt:rc:update", + "apt:rc:upload", + ] + apt_rc_tasks.unshift("apt:staging:prepare") if staging? + task :rc => apt_rc_tasks + end + end + + def define_apt_release_tasks + directory apt_release_repositories_dir + + namespace :apt do + namespace :release do + desc "Download RC APT repositories" + task :download => apt_release_repositories_dir do + apt_distributions.each do |distribution| + distribution_dir = "#{apt_release_repositories_dir}/#{distribution}" + download_distribution(distribution, + distribution_dir, + :rc, + list: uploaded_files_name) + end + end + + desc "Upload release APT repositories" + task :upload => apt_release_repositories_dir do + apt_distributions.each do |distribution| + distribution_dir = "#{apt_release_repositories_dir}/#{distribution}" + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + source: distribution_dir, + staging: staging?) + uploader.upload + end + end + end + + desc "Release APT repositories" + apt_release_tasks = [ + "apt:release:download", + "apt:release:upload", + ] + task :release => apt_release_tasks + end + end + + def define_apt_tasks + define_apt_staging_tasks + define_apt_rc_tasks + define_apt_release_tasks + end + + def yum_rc_repositories_dir + "#{rc_dir}/yum/repositories" + end + + def yum_release_repositories_dir + "#{release_dir}/yum/repositories" + end + + def available_yum_targets + [ + ["almalinux", "8"], + ["amazon-linux", "2"], + ["centos", "7"], + ["centos", "8"], + ] + end + + def yum_targets + env_yum_targets = (ENV["YUM_TARGETS"] || "").split(",") + if env_yum_targets.empty? + available_yum_targets + else + available_yum_targets.select do |distribution, distribution_version| + env_yum_targets.any? do |env_yum_target| + if /\d/.match?(env_yum_target) + env_yum_target.start_with?("#{distribution}-#{distribution_version}") + else + env_yum_target == distribution + end + end + end + end + end + + def yum_distributions + yum_targets.collect(&:first).uniq + end + + def yum_architectures + [ + "aarch64", + "x86_64", + ] + end + + def signed_rpm?(rpm) + IO.pipe do |input, output| + system("rpm", "--checksig", rpm, out: output) + output.close + signature = input.gets.sub(/\A#{Regexp.escape(rpm)}: /, "") + signature.split.include?("signatures") + end + end + + def sign_rpms(directory) + thread_pool = ThreadPool.new(:gpg) do |rpm| + unless signed_rpm?(rpm) + sh("rpm", + "-D", "_gpg_name #{gpg_key_id}", + "-D", "__gpg /usr/bin/gpg", + "-D", "__gpg_check_password_cmd /bin/true true", + "--resign", + rpm, + out: default_output, + verbose: verbose?) + end + end + Dir.glob("#{directory}/**/*.rpm") do |rpm| + thread_pool << rpm + end + thread_pool.join + end + + def rpm_sign(directory) + unless system("rpm", "-q", + rpm_gpg_key_package_name(gpg_key_id), + out: IO::NULL) + gpg_key = Tempfile.new(["apache-arrow-binary", ".asc"]) + sh("gpg", + "--armor", + "--export", gpg_key_id, + out: gpg_key.path, + verbose: verbose?) + sh("rpm", + "--import", gpg_key.path, + out: default_output, + verbose: verbose?) + gpg_key.close! + end + + yum_targets.each do |distribution, distribution_version| + source_dir = [ + directory, + distribution, + distribution_version, + ].join("/") + sign_rpms(source_dir) + end + end + + def yum_update(base_dir, incoming_dir) + yum_targets.each do |distribution, distribution_version| + target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" + target_dir = Pathname(target_dir) + next unless target_dir.directory? + Dir.glob("#{target_dir}/**/repodata") do |repodata| + rm_rf(repodata, verbose: verbose?) + end + target_dir.glob("*") do |arch_dir| + next unless arch_dir.directory? + base_repodata_dir = [ + base_dir, + distribution, + distribution_version, + File.basename(arch_dir), + "repodata", + ].join("/") + if File.exist?(base_repodata_dir) + cp_r(base_repodata_dir, + arch_dir.to_s, + preserve: true, + verbose: verbose?) + end + packages = Tempfile.new("createrepo-c-packages") + Pathname.glob("#{arch_dir}/*/*.rpm") do |rpm| + relative_rpm = rpm.relative_path_from(arch_dir) + packages.puts(relative_rpm.to_s) + end + packages.close + sh("createrepo_c", + "--pkglist", packages.path, + "--recycle-pkglist", + "--retain-old-md-by-age=0", + "--skip-stat", + "--update", + arch_dir.to_s, + out: default_output, + verbose: verbose?) + end + end + end + + def define_yum_staging_tasks + namespace :yum do + namespace :staging do + desc "Prepare staging environment for Yum repositories" + task :prepare do + yum_distributions.each do |distribution| + prepare_staging(distribution) + end + end + + desc "Delete staging environment for Yum repositories" + task :delete do + yum_distributions.each do |distribution| + delete_staging(distribution) + end + end + end + end + end + + def define_yum_rc_tasks + namespace :yum do + namespace :rc do + base_dir = "#{yum_rc_repositories_dir}/base" + incoming_dir = "#{yum_rc_repositories_dir}/incoming" + upload_dir = "#{yum_rc_repositories_dir}/upload" + + desc "Copy RPM packages" + task :copy do + yum_targets.each do |distribution, distribution_version| + progress_label = "Copying: #{distribution} #{distribution_version}" + progress_reporter = ProgressReporter.new(progress_label) + + destination_prefix = [ + incoming_dir, + distribution, + distribution_version, + ].join("/") + rm_rf(destination_prefix, verbose: verbose?) + source_dir_prefix = + "#{artifacts_dir}/#{distribution}-#{distribution_version}" + Dir.glob("#{source_dir_prefix}*/**/*") do |path| + next if File.directory?(path) + base_name = File.basename(path) + type = base_name.split(".")[-2] + destination_paths = [] + case type + when "src" + destination_paths << [ + destination_prefix, + "Source", + "SPackages", + base_name, + ].join("/") + when "noarch" + yum_architectures.each do |architecture| + destination_paths << [ + destination_prefix, + architecture, + "Packages", + base_name, + ].join("/") + end + else + destination_paths << [ + destination_prefix, + type, + "Packages", + base_name, + ].join("/") + end + destination_paths.each do |destination_path| + copy_artifact(path, + destination_path, + progress_reporter) + end + case base_name + when /\A(apache-arrow-release)-.*\.noarch\.rpm\z/ + package_name = $1 + latest_release_package_path = [ + destination_prefix, + "#{package_name}-latest.rpm" + ].join("/") + copy_artifact(path, + latest_release_package_path, + progress_reporter) + end + end + + progress_reporter.finish + end + end + + desc "Download repodata for RC Yum repositories" + task :download do + yum_distributions.each do |distribution| + distribution_dir = "#{base_dir}/#{distribution}" + download_distribution(distribution, + distribution_dir, + :base, + pattern: /\/repodata\//) + end + end + + desc "Sign RPM packages" + task :sign do + rpm_sign(incoming_dir) + yum_targets.each do |distribution, distribution_version| + source_dir = [ + incoming_dir, + distribution, + distribution_version, + ].join("/") + sign_dir("#{distribution}-#{distribution_version}", + source_dir) + end + end + + desc "Update RC Yum repositories" + task :update do + yum_update(base_dir, incoming_dir) + yum_targets.each do |distribution, distribution_version| + target_dir = [ + incoming_dir, + distribution, + distribution_version, + ].join("/") + target_dir = Pathname(target_dir) + next unless target_dir.directory? + target_dir.glob("*") do |arch_dir| + next unless arch_dir.directory? + sign_label = + "#{distribution}-#{distribution_version} #{arch_dir.basename}" + sign_dir(sign_label, + arch_dir.to_s) + end + end + end + + desc "Upload RC Yum repositories" + task :upload => yum_rc_repositories_dir do + yum_distributions.each do |distribution| + incoming_target_dir = "#{incoming_dir}/#{distribution}" + upload_target_dir = "#{upload_dir}/#{distribution}" + + rm_rf(upload_target_dir, verbose: verbose?) + mkdir_p(upload_target_dir, verbose: verbose?) + cp_r(Dir.glob("#{incoming_target_dir}/*"), + upload_target_dir.to_s, + preserve: true, + verbose: verbose?) + write_uploaded_files(upload_target_dir) + + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + rc: rc, + source: upload_target_dir, + staging: staging?, + sync: true, + sync_pattern: /\/repodata\//) + uploader.upload + end + end + end + + desc "Release RC Yum packages" + yum_rc_tasks = [ + "yum:rc:copy", + "yum:rc:download", + "yum:rc:sign", + "yum:rc:update", + "yum:rc:upload", + ] + yum_rc_tasks.unshift("yum:staging:prepare") if staging? + task :rc => yum_rc_tasks + end + end + + def define_yum_release_tasks + directory yum_release_repositories_dir + + namespace :yum do + namespace :release do + desc "Download RC Yum repositories" + task :download => yum_release_repositories_dir do + yum_distributions.each do |distribution| + distribution_dir = "#{yum_release_repositories_dir}/#{distribution}" + download_distribution(distribution, + distribution_dir, + :rc, + list: uploaded_files_name) + end + end + + desc "Upload release Yum repositories" + task :upload => yum_release_repositories_dir do + yum_distributions.each do |distribution| + distribution_dir = "#{yum_release_repositories_dir}/#{distribution}" + uploader = + ArtifactoryUploader.new(api_key: artifactory_api_key, + distribution: distribution, + source: distribution_dir, + staging: staging?, + sync: true, + sync_pattern: /\/repodata\//) + uploader.upload + end + end + end + + desc "Release Yum packages" + yum_release_tasks = [ + "yum:release:download", + "yum:release:upload", + ] + task :release => yum_release_tasks + end + end + + def define_yum_tasks + define_yum_staging_tasks + define_yum_rc_tasks + define_yum_release_tasks + end + + def define_generic_data_rc_tasks(label, + id, + rc_dir, + target_files_glob) + directory rc_dir + + namespace id do + namespace :rc do + desc "Copy #{label} packages" + task :copy => rc_dir do + progress_label = "Copying: #{label}" + progress_reporter = ProgressReporter.new(progress_label) + + Pathname(artifacts_dir).glob(target_files_glob) do |path| + next if path.directory? + destination_path = [ + rc_dir, + path.basename.to_s, + ].join("/") + copy_artifact(path, destination_path, progress_reporter) + end + + progress_reporter.finish + end + + desc "Sign #{label} packages" + task :sign => rc_dir do + sign_dir(label, rc_dir) + end + + desc "Upload #{label} packages" + task :upload do + uploader = + ArtifactoryUploader.new(api_key: artifactory_api_key, + destination_prefix: full_version, + distribution: id.to_s, + rc: rc, + source: rc_dir, + staging: staging?) + uploader.upload + end + end + + desc "Release RC #{label} packages" + rc_tasks = [ + "#{id}:rc:copy", + "#{id}:rc:sign", + "#{id}:rc:upload", + ] + task :rc => rc_tasks + end + end + + def define_generic_data_release_tasks(label, id, release_dir) + directory release_dir + + namespace id do + namespace :release do + desc "Download RC #{label} packages" + task :download => release_dir do + download_distribution(id.to_s, + release_dir, + :rc, + prefix: "#{full_version}") + end + + desc "Upload release #{label} packages" + task :upload => release_dir do + uploader = ArtifactoryUploader.new(api_key: artifactory_api_key, + destination_prefix: version, + distribution: id.to_s, + source: release_dir, + staging: staging?) + uploader.upload + end + end + + desc "Release #{label} packages" + release_tasks = [ + "#{id}:release:download", + "#{id}:release:upload", + ] + task :release => release_tasks + end + end + + def define_generic_data_tasks(label, + id, + rc_dir, + release_dir, + target_files_glob) + define_generic_data_rc_tasks(label, id, rc_dir, target_files_glob) + define_generic_data_release_tasks(label, id, release_dir) + end + + def define_python_tasks + define_generic_data_tasks("Python", + :python, + "#{rc_dir}/python/#{full_version}", + "#{release_dir}/python/#{full_version}", + "{python-sdist,wheel-*}/**/*") + end + + def define_nuget_tasks + define_generic_data_tasks("NuGet", + :nuget, + "#{rc_dir}/nuget/#{full_version}", + "#{release_dir}/nuget/#{full_version}", + "nuget/**/*") + end + + def define_summary_tasks + namespace :summary do + desc "Show RC summary" + task :rc do + suffix = "" + suffix << "-staging" if staging? + puts(<<-SUMMARY) +Success! The release candidate binaries are available here: + https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}-rc/ + https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}-rc/ + https://apache.jfrog.io/artifactory/arrow/centos#{suffix}-rc/ + https://apache.jfrog.io/artifactory/arrow/debian#{suffix}-rc/ + https://apache.jfrog.io/artifactory/arrow/nuget#{suffix}-rc/#{full_version} + https://apache.jfrog.io/artifactory/arrow/python#{suffix}-rc/#{full_version} + https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}-rc/ + SUMMARY + end + + desc "Show release summary" + task :release do + suffix = "" + suffix << "-staging" if staging? + puts(<<-SUMMARY) +Success! The release binaries are available here: + https://apache.jfrog.io/artifactory/arrow/almalinux#{suffix}/ + https://apache.jfrog.io/artifactory/arrow/amazon-linux#{suffix}/ + https://apache.jfrog.io/artifactory/arrow/centos#{suffix}/ + https://apache.jfrog.io/artifactory/arrow/debian#{suffix}/ + https://apache.jfrog.io/artifactory/arrow/nuget#{suffix}/#{version} + https://apache.jfrog.io/artifactory/arrow/python#{suffix}/#{version} + https://apache.jfrog.io/artifactory/arrow/ubuntu#{suffix}/ + SUMMARY + end + end + end +end diff --git a/src/arrow/dev/release/binary/.dockerignore b/src/arrow/dev/release/binary/.dockerignore new file mode 100644 index 000000000..f2c46d8ce --- /dev/null +++ b/src/arrow/dev/release/binary/.dockerignore @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/tmp/ diff --git a/src/arrow/dev/release/binary/Dockerfile b/src/arrow/dev/release/binary/Dockerfile new file mode 100644 index 000000000..a21b32dd7 --- /dev/null +++ b/src/arrow/dev/release/binary/Dockerfile @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM debian:bullseye + +ENV DEBIAN_FRONTEND noninteractive + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + apt-utils \ + createrepo-c \ + devscripts \ + gpg \ + locales \ + openssh-server \ + rake \ + rpm \ + ruby \ + sudo && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* + +RUN gem install apt-dists-merge -v ">= 1.0.2" + +RUN locale-gen en_US.UTF-8 + +RUN mkdir -p /run/sshd +RUN echo "StreamLocalBindUnlink yes" >> /etc/ssh/sshd_config + +ENV ARROW_USER arrow +ENV ARROW_UID 10000 + +RUN \ + groupadd --gid ${ARROW_UID} ${ARROW_USER} && \ + useradd --uid ${ARROW_UID} --gid ${ARROW_UID} --create-home ${ARROW_USER} && \ + mkdir -p /home/arrow/.gnupg /home/arrow/.ssh && \ + chown -R arrow: /home/arrow/.gnupg /home/arrow/.ssh && \ + chmod -R og-rwx /home/arrow/.gnupg /home/arrow/.ssh && \ + echo "${ARROW_USER} ALL=(ALL:ALL) NOPASSWD:ALL" | \ + EDITOR=tee visudo -f /etc/sudoers.d/arrow + +COPY id_rsa.pub /home/arrow/.ssh/authorized_keys +RUN \ + chown -R arrow: /home/arrow/.ssh && \ + chmod -R og-rwx /home/arrow/.ssh + +COPY runner.sh /home/arrow/runner.sh +RUN \ + chown -R arrow: /home/arrow/runner.sh && \ + chmod +x /home/arrow/runner.sh + +EXPOSE 22 diff --git a/src/arrow/dev/release/binary/runner.sh b/src/arrow/dev/release/binary/runner.sh new file mode 100755 index 000000000..465d60d62 --- /dev/null +++ b/src/arrow/dev/release/binary/runner.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -u + +export LANG=C + +target_dir=/host/binary/tmp +original_owner=$(stat --format=%u ${target_dir}) +original_group=$(stat --format=%g ${target_dir}) + +sudo -H chown -R ${USER}: ${target_dir} +restore_owner() { + sudo -H chown -R ${original_owner}:${original_group} ${target_dir} +} +trap restore_owner EXIT + +cd /host + +"$@" diff --git a/src/arrow/dev/release/check-rat-report.py b/src/arrow/dev/release/check-rat-report.py new file mode 100644 index 000000000..a5718103a --- /dev/null +++ b/src/arrow/dev/release/check-rat-report.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +############################################################################## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################## +import fnmatch +import re +import sys +import xml.etree.ElementTree as ET + +if len(sys.argv) != 3: + sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % + sys.argv[0]) + sys.exit(1) + +exclude_globs_filename = sys.argv[1] +xml_filename = sys.argv[2] + +globs = [line.strip() for line in open(exclude_globs_filename, "r")] + +tree = ET.parse(xml_filename) +root = tree.getroot() +resources = root.findall('resource') + +all_ok = True +for r in resources: + approvals = r.findall('license-approval') + if not approvals or approvals[0].attrib['name'] == 'true': + continue + clean_name = re.sub('^[^/]+/', '', r.attrib['name']) + excluded = False + for g in globs: + if fnmatch.fnmatch(clean_name, g): + excluded = True + break + if not excluded: + sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % ( + clean_name, r.attrib['name'], approvals[0].attrib['name'])) + all_ok = False + +if not all_ok: + sys.exit(1) + +print('OK') +sys.exit(0) diff --git a/src/arrow/dev/release/download_rc_binaries.py b/src/arrow/dev/release/download_rc_binaries.py new file mode 100755 index 000000000..3e3d0f7d3 --- /dev/null +++ b/src/arrow/dev/release/download_rc_binaries.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import argparse +import concurrent.futures as cf +import functools +import os +import subprocess +import urllib.request + + +ARTIFACTORY_ROOT = "https://apache.jfrog.io/artifactory/arrow" +DEFAULT_PARALLEL_DOWNLOADS = 8 + + +class Artifactory: + + def get_file_list(self, prefix): + def traverse(directory, files, directories): + url = f'{ARTIFACTORY_ROOT}/{directory}' + response = urllib.request.urlopen(url).read().decode() + paths = re.findall('<a href="(.+?)"', response) + for path in paths: + if path == '../': + continue + resolved_path = f'{directory}{path}' + if path.endswith('/'): + directories.append(resolved_path) + else: + files.append(resolved_path) + files = [] + if not prefix.endswith('/'): + prefix += '/' + directories = [prefix] + while len(directories) > 0: + directory = directories.pop() + traverse(directory, files, directories) + return files + + def download_files(self, files, dest=None, num_parallel=None, + re_match=None): + """ + Download files from Bintray in parallel. If file already exists, will + overwrite if the checksum does not match what Bintray says it should be + + Parameters + ---------- + files : List[Dict] + File listing from Bintray + dest : str, default None + Defaults to current working directory + num_parallel : int, default 8 + Number of files to download in parallel. If set to None, uses + default + """ + if dest is None: + dest = os.getcwd() + if num_parallel is None: + num_parallel = DEFAULT_PARALLEL_DOWNLOADS + + if re_match is not None: + regex = re.compile(re_match) + files = [x for x in files if regex.match(x)] + + if num_parallel == 1: + for path in files: + self._download_file(dest, path) + else: + parallel_map_terminate_early( + functools.partial(self._download_file, dest), + files, + num_parallel + ) + + def _download_file(self, dest, path): + base, filename = os.path.split(path) + + dest_dir = os.path.join(dest, base) + os.makedirs(dest_dir, exist_ok=True) + + dest_path = os.path.join(dest_dir, filename) + + print("Downloading {} to {}".format(path, dest_path)) + + url = f'{ARTIFACTORY_ROOT}/{path}' + + cmd = [ + 'curl', '--fail', '--location', '--retry', '5', + '--output', dest_path, url + ] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + if proc.returncode != 0: + raise Exception("Downloading {} failed\nstdout: {}\nstderr: {}" + .format(path, stdout, stderr)) + + +def parallel_map_terminate_early(f, iterable, num_parallel): + tasks = [] + with cf.ProcessPoolExecutor(num_parallel) as pool: + for v in iterable: + tasks.append(pool.submit(functools.partial(f, v))) + + for task in cf.as_completed(tasks): + if task.exception() is not None: + e = task.exception() + for task in tasks: + task.cancel() + raise e + + +ARROW_REPOSITORY_PACKAGE_TYPES = ['centos', 'debian', 'ubuntu'] +ARROW_STANDALONE_PACKAGE_TYPES = ['nuget', 'python'] +ARROW_PACKAGE_TYPES = \ + ARROW_REPOSITORY_PACKAGE_TYPES + \ + ARROW_STANDALONE_PACKAGE_TYPES + + +def download_rc_binaries(version, rc_number, re_match=None, dest=None, + num_parallel=None, target_package_type=None): + artifactory = Artifactory() + + version_string = '{}-rc{}'.format(version, rc_number) + if target_package_type: + package_types = [target_package_type] + else: + package_types = ARROW_PACKAGE_TYPES + for package_type in package_types: + if package_type in ARROW_REPOSITORY_PACKAGE_TYPES: + prefix = f'{package_type}-rc' + else: + prefix = f'{package_type}-rc/{version_string}' + files = artifactory.get_file_list(prefix) + if package_type in ARROW_REPOSITORY_PACKAGE_TYPES: + version_pattern = re.compile(r'\d+\.\d+\.\d+') + + def is_old_release(path): + match = version_pattern.search(path) + if not match: + return False + return match[0] != version + files = [x for x in files if not is_old_release(x)] + artifactory.download_files(files, re_match=re_match, dest=dest, + num_parallel=num_parallel) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Download release candidate binaries' + ) + parser.add_argument('version', type=str, help='The version number') + parser.add_argument('rc_number', type=int, + help='The release candidate number, e.g. 0, 1, etc') + parser.add_argument('-e', '--regexp', type=str, default=None, + help=('Regular expression to match on file names ' + 'to only download certain files')) + parser.add_argument('--dest', type=str, default=os.getcwd(), + help='The output folder for the downloaded files') + parser.add_argument('--num_parallel', type=int, default=8, + help='The number of concurrent downloads to do') + parser.add_argument('--package_type', type=str, default=None, + help='The package type to be downloaded') + args = parser.parse_args() + + download_rc_binaries(args.version, args.rc_number, dest=args.dest, + re_match=args.regexp, num_parallel=args.num_parallel, + target_package_type=args.package_type) diff --git a/src/arrow/dev/release/post-01-upload.sh b/src/arrow/dev/release/post-01-upload.sh new file mode 100755 index 000000000..5671c3746 --- /dev/null +++ b/src/arrow/dev/release/post-01-upload.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -e +set -u + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc=$2 + +tmp_dir=tmp-apache-arrow-dist + +echo "Recreate temporary directory: ${tmp_dir}" +rm -rf ${tmp_dir} +mkdir -p ${tmp_dir} + +echo "Clone dev dist repository" +svn \ + co \ + https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-${version}-rc${rc} \ + ${tmp_dir}/dev + +echo "Clone release dist repository" +svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release + +echo "Copy ${version}-rc${rc} to release working copy" +release_version=arrow-${version} +mkdir -p ${tmp_dir}/release/${release_version} +cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ +svn add ${tmp_dir}/release/${release_version} + +echo "Keep only the three most recent versions" +old_releases=$( + svn ls ${tmp_dir}/release/ | \ + grep -E '^arrow-[0-9\.]+' | \ + sort --version-sort --reverse | \ + tail -n +4 +) +for old_release_version in $old_releases; do + echo "Remove old release ${old_release_version}" + svn delete ${tmp_dir}/release/${old_release_version} +done + +echo "Commit release" +svn ci -m "Apache Arrow ${version}" ${tmp_dir}/release + +echo "Clean up" +rm -rf ${tmp_dir} + +echo "Success! The release is available here:" +echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}" diff --git a/src/arrow/dev/release/post-02-binary.sh b/src/arrow/dev/release/post-02-binary.sh new file mode 100755 index 000000000..b1b41f9fb --- /dev/null +++ b/src/arrow/dev/release/post-02-binary.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc=$2 + +cd "${SOURCE_DIR}" + +if [ ! -f .env ]; then + echo "You must create $(pwd)/.env" + echo "You can use $(pwd)/.env.example as template" + exit 1 +fi +. .env + +. utils-binary.sh + +# By default deploy all artifacts. +# To deactivate one category, deactivate the category and all of its dependents. +# To explicitly select one category, set DEPLOY_DEFAULT=0 DEPLOY_X=1. +: ${DEPLOY_DEFAULT:=1} +: ${DEPLOY_ALMALINUX:=${DEPLOY_DEFAULT}} +: ${DEPLOY_AMAZON_LINUX:=${DEPLOY_DEFAULT}} +: ${DEPLOY_CENTOS:=${DEPLOY_DEFAULT}} +: ${DEPLOY_DEBIAN:=${DEPLOY_DEFAULT}} +: ${DEPLOY_NUGET:=${DEPLOY_DEFAULT}} +: ${DEPLOY_PYTHON:=${DEPLOY_DEFAULT}} +: ${DEPLOY_UBUNTU:=${DEPLOY_DEFAULT}} + +rake_tasks=() +apt_targets=() +yum_targets=() +if [ ${DEPLOY_ALMALINUX} -gt 0 ]; then + rake_tasks+=(yum:release) + yum_targets+=(almalinux) +fi +if [ ${DEPLOY_AMAZON_LINUX} -gt 0 ]; then + rake_tasks+=(yum:release) + yum_targets+=(amazon-linux) +fi +if [ ${DEPLOY_CENTOS} -gt 0 ]; then + rake_tasks+=(yum:release) + yum_targets+=(centos) +fi +if [ ${DEPLOY_DEBIAN} -gt 0 ]; then + rake_tasks+=(apt:release) + apt_targets+=(debian) +fi +if [ ${DEPLOY_NUGET} -gt 0 ]; then + rake_tasks+=(nuget:release) +fi +if [ ${DEPLOY_PYTHON} -gt 0 ]; then + rake_tasks+=(python:release) +fi +if [ ${DEPLOY_UBUNTU} -gt 0 ]; then + rake_tasks+=(apt:release) + apt_targets+=(ubuntu) +fi +rake_tasks+=(summary:release) + +tmp_dir=binary/tmp +mkdir -p "${tmp_dir}" + +docker_run \ + ./runner.sh \ + rake \ + --trace \ + "${rake_tasks[@]}" \ + APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \ + ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \ + ARTIFACTS_DIR="${tmp_dir}/artifacts" \ + RC=${rc} \ + STAGING=${STAGING:-no} \ + VERSION=${version} \ + YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}") diff --git a/src/arrow/dev/release/post-03-website.sh b/src/arrow/dev/release/post-03-website.sh new file mode 100755 index 000000000..7aceeaf59 --- /dev/null +++ b/src/arrow/dev/release/post-03-website.sh @@ -0,0 +1,266 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ARROW_DIR="${SOURCE_DIR}/../.." +ARROW_SITE_DIR="${ARROW_DIR}/../arrow-site" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <previous-version> <version>" + exit 1 +fi + +previous_version=$1 +version=$2 + +branch_name=release-note-${version} +release_dir="${ARROW_SITE_DIR}/_release" +announce_file="${release_dir}/${version}.md" +versions_yml="${ARROW_SITE_DIR}/_data/versions.yml" + +pushd "${ARROW_SITE_DIR}" +git checkout master +git checkout -b ${branch_name} +popd + +pushd "${ARROW_DIR}" + +release_date=$(LANG=C date "+%-d %B %Y") +previous_tag_date=$(git log -n 1 --pretty=%aI apache-arrow-${previous_version}) +rough_previous_release_date=$(date --date "${previous_tag_date}" +%s) +rough_release_date=$(date +%s) +rough_n_development_months=$(( + (${rough_release_date} - ${rough_previous_release_date}) / (60 * 60 * 24 * 30) +)) + +git_tag=apache-arrow-${version} +git_range=apache-arrow-${previous_version}..${git_tag} + +committers_command_line="git shortlog -csn ${git_range}" +contributors_command_line="git shortlog -sn ${git_range}" + +committers=$(${committers_command_line}) +contributors=$(${contributors_command_line}) + +n_commits=$(git log --pretty=oneline ${git_range} | wc -l) +n_contributors=$(${contributors_command_line} | wc -l) + +git_tag_hash=$(git log -n 1 --pretty=%H ${git_tag}) + +popd + +pushd "${ARROW_SITE_DIR}" + +# Add announce for the current version +cat <<ANNOUNCE > "${announce_file}" +--- +layout: default +title: Apache Arrow ${version} Release +permalink: /release/${version}.html +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +# Apache Arrow ${version} (${release_date}) + +This is a major release covering more than ${rough_n_development_months} months of development. + +## Download + +* [**Source Artifacts**][1] +* **Binary Artifacts** + * [For CentOS][2] + * [For Debian][3] + * [For Python][4] + * [For Ubuntu][5] +* [Git tag][6] + +## Contributors + +This release includes ${n_commits} commits from ${n_contributors} distinct contributors. + +\`\`\`console +$ ${contributors_command_line} +ANNOUNCE + +echo "${contributors}" >> "${announce_file}" + +cat <<ANNOUNCE >> "${announce_file}" +\`\`\` + +## Patch Committers + +The following Apache committers merged contributed patches to the repository. + +\`\`\`console +$ ${committers_command_line} +ANNOUNCE + +echo "${committers}" >> "${announce_file}" + +cat <<ANNOUNCE >> "${announce_file}" +\`\`\` + +## Changelog + +ANNOUNCE + +archery release changelog generate ${version} | \ + sed -e 's/^#/##/g' >> "${announce_file}" + +cat <<ANNOUNCE >> "${announce_file}" +[1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/ +[2]: https://apache.jfrog.io/artifactory/arrow/centos/ +[3]: https://apache.jfrog.io/artifactory/arrow/debian/ +[4]: https://apache.jfrog.io/artifactory/arrow/python/${version}/ +[5]: https://apache.jfrog.io/artifactory/arrow/ubuntu/ +[6]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version} +ANNOUNCE +git add "${announce_file}" + + +# Update index +pushd "${release_dir}" + +index_file=index.md +rm -f ${index_file} +announce_files="$(ls | sort --version-sort --reverse)" +cat <<INDEX > ${index_file} +--- +layout: default +title: Releases +permalink: /release/index.html +--- +<!-- +{% comment %} +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to you under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +{% endcomment %} +--> + +# Apache Arrow Releases + +Navigate to the release page for downloads and the changelog. + +INDEX + +i=0 +for md_file in ${announce_files}; do + i=$((i + 1)) + title=$(grep '^# Apache Arrow' ${md_file} | sed -e 's/^# Apache Arrow //') + echo "* [${title}][${i}]" >> ${index_file} +done +echo >> ${index_file} + +i=0 +for md_file in ${announce_files}; do + i=$((i + 1)) + html_file=$(echo ${md_file} | sed -e 's/md$/html/') + echo "[${i}]: {{ site.baseurl }}/release/${html_file}" >> ${index_file} +done + +git add ${index_file} + +popd + + +# Update versions.yml +pinned_version=$(echo ${version} | sed -e 's/\.[^.]*$/.*/') + +apache_download_url=https://downloads.apache.org + +cat <<YAML > "${versions_yml}" +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Database of the current version +# +current: + number: '${version}' + pinned_number: '${pinned_version}' + date: '${release_date}' + git-tag: '${git_tag_hash}' + github-tag-link: 'https://github.com/apache/arrow/releases/tag/${git_tag}' + release-notes: 'https://arrow.apache.org/release/${version}.html' + mirrors: 'https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/' + tarball-name: 'apache-arrow-${version}.tar.gz' + tarball-url: 'https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz' + java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%22${version}%22' + asc: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.asc' + sha256: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256' + sha512: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha512' +YAML +git add "${versions_yml}" + +git commit -m "[Website] Add release note for ${version}" +git push -u origin ${branch_name} + +github_url=$(git remote get-url origin | \ + sed \ + -e 's,^git@github.com:,https://github.com/,' \ + -e 's,\.git$,,') + +echo "Success!" +echo "Create a pull request:" +echo " ${github_url}/pull/new/${branch_name}" + +popd diff --git a/src/arrow/dev/release/post-04-ruby.sh b/src/arrow/dev/release/post-04-ruby.sh new file mode 100755 index 000000000..edcb54c13 --- /dev/null +++ b/src/arrow/dev/release/post-04-ruby.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -e +set -o pipefail + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 +archive_name=apache-arrow-${version} +tar_gz=${archive_name}.tar.gz + +echo "NOTE: We should release RubyGems after Homebrew and MSYS2 packages are updated!!!" + +echo "Checking Homebrew package..." +homebrew_version=$( + curl \ + --fail \ + --no-progress-meter \ + https://raw.githubusercontent.com/Homebrew/homebrew-core/master/Formula/apache-arrow-glib.rb | \ + grep url | \ + grep -o "[0-9]*\.[0-9]*\.[0-9]*" | \ + head -n 1) +echo "Homebrew package version: ${homebrew_version}" +if [ "${version}" = "${homebrew_version}" ]; then + echo "OK!" +else + echo "Different!" + exit 1 +fi + + +echo "Checking MSYS2 package..." +msys2_version=$( + curl \ + --fail \ + --no-progress-meter \ + https://packages.msys2.org/base/mingw-w64-arrow | \ + grep -A 1 ">Version:<" | \ + grep -o "[0-9]*\.[0-9]*\.[0-9]*") +echo "MSYS2 package version: ${msys2_version}" +if [ "${version}" = "${msys2_version}" ]; then + echo "OK!" +else + echo "Different!" + exit 1 +fi + + +rm -f ${tar_gz} +curl \ + --remote-name \ + --fail \ + https://downloads.apache.org/arrow/arrow-${version}/${tar_gz} +rm -rf ${archive_name} +tar xf ${tar_gz} +modules=() +for module in ${archive_name}/ruby/red-*; do + pushd ${module} + rake release + modules+=($(basename ${module})) + popd +done +rm -rf ${archive_name} +rm -f ${tar_gz} + +echo "Success! The released RubyGems are available here:" +for module in ${modules[@]}; do + echo " https://rubygems.org/gems/${module}/versions/${version}" +done diff --git a/src/arrow/dev/release/post-05-js.sh b/src/arrow/dev/release/post-05-js.sh new file mode 100755 index 000000000..edc5fe20b --- /dev/null +++ b/src/arrow/dev/release/post-05-js.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -e + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 +archive_name=apache-arrow-${version} +tar_gz=${archive_name}.tar.gz + +rm -f ${tar_gz} +curl \ + --remote-name \ + --fail \ + https://downloads.apache.org/arrow/arrow-${version}/${tar_gz} +rm -rf ${archive_name} +tar xf ${tar_gz} +pushd ${archive_name}/js +./npm-release.sh +popd +rm -rf ${archive_name} +rm -f ${tar_gz} + +echo "Success! The released npm packages are available here:" +echo " https://www.npmjs.com/package/apache-arrow/v/${version}" diff --git a/src/arrow/dev/release/post-06-csharp.sh b/src/arrow/dev/release/post-06-csharp.sh new file mode 100755 index 000000000..d2968a5d5 --- /dev/null +++ b/src/arrow/dev/release/post-06-csharp.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -eux + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 + +if [ -z "${NUGET_API_KEY}" ]; then + echo "NUGET_API_KEY is empty" + exit 1 +fi + +base_names=() +base_names+=(Apache.Arrow.${version}) +base_names+=(Apache.Arrow.Flight.${version}) +base_names+=(Apache.Arrow.Flight.AspNetCore.${version}) +for base_name in ${base_names[@]}; do + for extension in nupkg snupkg; do + path=${base_name}.${extension} + rm -f ${path} + curl \ + --fail \ + --location \ + --remote-name \ + https://apache.jfrog.io/artifactory/arrow/nuget/${version}/${path} + done + dotnet nuget push \ + ${base_name}.nupkg \ + -k ${NUGET_API_KEY} \ + -s https://api.nuget.org/v3/index.json + rm -f ${base_name}.{nupkg,snupkg} +done + +echo "Success! The released NuGet package is available here:" +echo " https://www.nuget.org/packages/Apache.Arrow/${version}" diff --git a/src/arrow/dev/release/post-08-remove-rc.sh b/src/arrow/dev/release/post-08-remove-rc.sh new file mode 100755 index 000000000..8e02b7e95 --- /dev/null +++ b/src/arrow/dev/release/post-08-remove-rc.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e +set -u +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 + +base_url=https://dist.apache.org/repos/dist/dev/arrow +pattern="^apache-arrow-${version}-rc" +paths=$() +if svn ls ${base_url} | grep "${pattern}" > /dev/null 2>&1; then + rc_paths=$(svn ls ${base_url} | grep "${pattern}") + rc_urls=() + for rc_path in ${rc_paths}; do + rc_urls+=(${base_url}/${rc_path}) + done + svn rm --message "Remove RC for ${version}" ${rc_urls[@]} + echo "Removed RC artifacts:" + for rc_url in ${rc_urls[@]}; do + echo " ${rc_url}" + done +else + echo "No RC artifacts at ${base_url}" +fi diff --git a/src/arrow/dev/release/post-09-docs.sh b/src/arrow/dev/release/post-09-docs.sh new file mode 100755 index 000000000..9c0b77bb5 --- /dev/null +++ b/src/arrow/dev/release/post-09-docs.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ARROW_DIR="${SOURCE_DIR}/../.." +ARROW_SITE_DIR="${ARROW_DIR}/../arrow-site" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit 1 +fi + +version=$1 +release_tag="apache-arrow-${version}" +branch_name=release-docs-${version} + +pushd "${ARROW_SITE_DIR}" +git checkout asf-site +git checkout -b ${branch_name} +rm -rf docs/* +git checkout docs/c_glib/index.html +popd + +pushd "${ARROW_DIR}" +git checkout "${release_tag}" + +UBUNTU=20.10 archery docker run \ + -v "${ARROW_SITE_DIR}/docs:/build/docs" \ + -e ARROW_DOCS_VERSION="${version}" \ + ubuntu-docs + +: ${PUSH:=1} + +if [ ${PUSH} -gt 0 ]; then + pushd "${ARROW_SITE_DIR}" + git add docs + git commit -m "[Website] Update documentations for ${version}" + git push -u origin ${branch_name} + github_url=$(git remote get-url origin | \ + sed \ + -e 's,^git@github.com:,https://github.com/,' \ + -e 's,\.git$,,') + popd + + echo "Success!" + echo "Create a pull request:" + echo " ${github_url}/pull/new/${branch_name}" +fi diff --git a/src/arrow/dev/release/post-10-python.sh b/src/arrow/dev/release/post-10-python.sh new file mode 100755 index 000000000..a014239ea --- /dev/null +++ b/src/arrow/dev/release/post-10-python.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +: ${TEST_PYPI:=0} + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + exit +fi + +version=$1 +rc=$2 + +tmp=$(mktemp -d -t "arrow-post-python.XXXXX") +${PYTHON:-python} \ + "${SOURCE_DIR}/download_rc_binaries.py" \ + ${version} \ + ${rc} \ + --dest="${tmp}" \ + --package_type=python \ + --regex=".*\.(whl|tar\.gz)$" + +if [ ${TEST_PYPI} -gt 0 ]; then + TWINE_ARGS="--repository-url https://test.pypi.org/legacy/" +fi + +twine upload ${TWINE_ARGS} ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz} + +rm -rf "${tmp}" + +echo "Success! The released PyPI packages are available here:" +echo " https://pypi.org/project/pyarrow/${version}" diff --git a/src/arrow/dev/release/post-11-java.sh b/src/arrow/dev/release/post-11-java.sh new file mode 100755 index 000000000..86e6e9b57 --- /dev/null +++ b/src/arrow/dev/release/post-11-java.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -o pipefail + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 +archive_name=apache-arrow-${version} +tar_gz=${archive_name}.tar.gz + +rm -f ${tar_gz} +curl \ + --remote-name \ + --fail \ + https://downloads.apache.org/arrow/arrow-${version}/${tar_gz} +rm -rf ${archive_name} +tar xf ${tar_gz} + +pushd ${archive_name} + +# clone the testing data to the appropiate directories +git clone https://github.com/apache/arrow-testing.git testing +git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing + +# build the jni bindings similarly like the 01-perform.sh does +mkdir -p cpp/java-build +pushd cpp/java-build +cmake \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_GANDIVA_JAVA=ON \ + -DARROW_GANDIVA=ON \ + -DARROW_JNI=ON \ + -DARROW_ORC=ON \ + -DARROW_PARQUET=ON \ + -DCMAKE_BUILD_TYPE=release \ + -G Ninja \ + .. +ninja +popd + +# go in the java subfolder +pushd java +# stage the artifacts using both the apache-release and arrow-jni profiles +# Note: on ORC checkstyle failure use -Dcheckstyle.skip=true until https://issues.apache.org/jira/browse/ARROW-12552 gets resolved +mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build/release) deploy +popd + +popd + +echo "Success! The maven artifacts have been stated. Proceed with the following steps:" +echo "1. Login to the apache repository: https://repository.apache.org/#stagingRepositories" +echo "2. Select the arrow staging repository you just just created: orgapachearrow-100x" +echo "3. Click the \"close\" button" +echo "4. Once validation has passed, click the \"release\" button" +echo "" +echo "Note, that you must set up Maven to be able to publish to Apache's repositories." +echo "Read more at https://www.apache.org/dev/publishing-maven-artifacts.html." diff --git a/src/arrow/dev/release/post-12-bump-versions.sh b/src/arrow/dev/release/post-12-bump-versions.sh new file mode 100755 index 000000000..8474f03d2 --- /dev/null +++ b/src/arrow/dev/release/post-12-bump-versions.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -ue + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <next_version>" + exit 1 +fi + +: ${BUMP_DEFAULT:=1} +: ${BUMP_VERSION_POST_TAG:=${BUMP_DEFAULT}} +: ${BUMP_DEB_PACKAGE_NAMES:=${BUMP_DEFAULT}} + +. $SOURCE_DIR/utils-prepare.sh + +version=$1 +next_version=$2 +next_version_snapshot="${next_version}-SNAPSHOT" + +if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then + echo "Updating versions for ${next_version_snapshot}" + update_versions "${version}" "${next_version}" "snapshot" + git commit -m "[Release] Update versions for ${next_version_snapshot}" +fi + +if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ]; then + echo "Updating .deb package names for ${next_version}" + so_version() { + local version=$1 + local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/') + local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') + expr ${major_version} \* 100 + ${minor_version} + } + deb_lib_suffix=$(so_version $version) + next_deb_lib_suffix=$(so_version $next_version) + if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then + cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow + for target in debian*/lib*${deb_lib_suffix}.install; do + git mv \ + ${target} \ + $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/") + done + deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g" + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control* + rm -f debian*/control*.bak + git add debian*/control* + cd - + cd $SOURCE_DIR/../tasks/ + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml + rm -f tasks.yml.bak + git add tasks.yml + cd - + cd $SOURCE_DIR + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt + rm -f rat_exclude_files.txt.bak + git add rat_exclude_files.txt + git commit -m "[Release] Update .deb package names for $next_version" + cd - + fi +fi diff --git a/src/arrow/dev/release/post-13-go.sh b/src/arrow/dev/release/post-13-go.sh new file mode 100644 index 000000000..7c6034837 --- /dev/null +++ b/src/arrow/dev/release/post-13-go.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -ue + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <version>" + exit +fi + +version=$1 +version_tag="apache-arrow-${version}" +go_arrow_tag="go/v${version}" + +git tag "${go_arrow_tag}" "${version_tag}" +git push apache "${go_arrow_tag}" diff --git a/src/arrow/dev/release/rat_exclude_files.txt b/src/arrow/dev/release/rat_exclude_files.txt new file mode 100644 index 000000000..47fcf618f --- /dev/null +++ b/src/arrow/dev/release/rat_exclude_files.txt @@ -0,0 +1,208 @@ +*.npmrc +*.gitignore +.gitmodules +*_generated.h +*_generated.js +*_generated.ts +*.csv +*.json +*.snap +.github/ISSUE_TEMPLATE/question.md +ci/etc/rprofile +ci/etc/*.patch +ci/vcpkg/*.patch +CHANGELOG.md +cpp/CHANGELOG_PARQUET.md +cpp/src/arrow/io/mman.h +cpp/src/arrow/util/random.h +cpp/src/arrow/status.cc +cpp/src/arrow/status.h +cpp/src/arrow/vendored/* +cpp/build-support/asan_symbolize.py +cpp/build-support/cpplint.py +cpp/build-support/lint_exclusions.txt +cpp/build-support/iwyu/* +cpp/cmake_modules/FindPythonLibsNew.cmake +cpp/cmake_modules/SnappyCMakeLists.txt +cpp/cmake_modules/SnappyConfig.h +cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake +cpp/src/parquet/.parquetcppversion +cpp/src/generated/parquet_constants.cpp +cpp/src/generated/parquet_constants.h +cpp/src/generated/parquet_types.cpp +cpp/src/generated/parquet_types.h +cpp/src/plasma/thirdparty/ae/ae.c +cpp/src/plasma/thirdparty/ae/ae.h +cpp/src/plasma/thirdparty/ae/ae_epoll.c +cpp/src/plasma/thirdparty/ae/ae_evport.c +cpp/src/plasma/thirdparty/ae/ae_kqueue.c +cpp/src/plasma/thirdparty/ae/ae_select.c +cpp/src/plasma/thirdparty/ae/config.h +cpp/src/plasma/thirdparty/ae/zmalloc.h +cpp/src/plasma/thirdparty/dlmalloc.c +cpp/thirdparty/flatbuffers/include/flatbuffers/base.h +cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h +cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h +dev/requirements*.txt +dev/archery/MANIFEST.in +dev/archery/requirements*.txt +dev/archery/archery/tests/fixtures/* +dev/archery/archery/crossbow/tests/fixtures/* +dev/release/rat_exclude_files.txt +dev/tasks/homebrew-formulae/apache-arrow.rb +dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install +dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat +dev/tasks/linux-packages/apache-arrow-apt-source/debian/control +dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules +dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format +dev/tasks/linux-packages/apache-arrow/debian/compat +dev/tasks/linux-packages/apache-arrow/debian/control.in +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-flight-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install +dev/tasks/linux-packages/apache-arrow/debian/patches/series +dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install +dev/tasks/linux-packages/apache-arrow/debian/rules +dev/tasks/linux-packages/apache-arrow/debian/source/format +dev/tasks/linux-packages/apache-arrow/debian/watch +dev/tasks/requirements*.txt +dev/tasks/conda-recipes/* +docs/requirements.txt +go/arrow/flight/Flight_grpc.pb.go +go/go.sum +go/arrow/Gopkg.lock +go/arrow/flight/Flight.pb.go +go/arrow/flight/Flight_grpc.pb.go +go/arrow/internal/cpu/* +go/arrow/type_string.go +go/arrow/cdata/test/go.sum +go/*.tmpldata +go/*.s +go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go +go/parquet/internal/gen-go/parquet/parquet-consts.go +go/parquet/internal/gen-go/parquet/parquet.go +js/.npmignore +js/closure-compiler-scripts/* +js/src/fb/*.ts +js/yarn.lock +js/.eslintignore +python/cmake_modules +python/cmake_modules/FindPythonLibsNew.cmake +python/cmake_modules/SnappyCMakeLists.txt +python/cmake_modules/SnappyConfig.h +python/MANIFEST.in +python/manylinux1/.dockerignore +python/pyarrow/includes/__init__.pxd +python/pyarrow/tests/__init__.py +python/pyarrow/vendored/* +python/requirements*.txt +pax_global_header +MANIFEST.in +__init__.pxd +__init__.py +requirements.txt +csharp/.gitattributes +csharp/dummy.git/* +csharp/src/Apache.Arrow/Flatbuf/* +csharp/Apache.Arrow.sln +csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj +csharp/examples/Examples.sln +csharp/src/Apache.Arrow/Apache.Arrow.csproj +csharp/src/Apache.Arrow/Properties/Resources.Designer.cs +csharp/src/Apache.Arrow/Properties/Resources.resx +csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj +csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj +csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj +csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +csharp/test/Apache.Arrow.Tests/app.config +*.html +*.sgml +*.css +*.png +*.ico +*.svg +*.devhelp2 +*.scss +r/R/arrowExports.R +r/src/arrowExports.cpp +r/DESCRIPTION +r/LICENSE.md +r/NAMESPACE +r/.Rbuildignore +r/arrow.Rproj +r/README.md +r/README.Rmd +r/man/*.Rd +r/cran-comments.md +r/vignettes/*.Rmd +r/tests/testthat/test-*.txt +r/inst/include/cpp11.hpp +r/inst/include/cpp11/*.hpp +.gitattributes +ruby/red-arrow/.yardopts +julia/Arrow/Project.toml +julia/Arrow/README.md +julia/Arrow/docs/Manifest.toml +julia/Arrow/docs/Project.toml +julia/Arrow/docs/make.jl +julia/Arrow/docs/mkdocs.yml +julia/Arrow/docs/src/index.md +julia/Arrow/docs/src/manual.md +julia/Arrow/docs/src/reference.md diff --git a/src/arrow/dev/release/run-rat.sh b/src/arrow/dev/release/run-rat.sh new file mode 100755 index 000000000..2596a284c --- /dev/null +++ b/src/arrow/dev/release/run-rat.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +RAT_VERSION=0.13 + +# download apache rat +if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then + curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar +fi + +RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " + +RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) + +# generate the rat report +$RAT $1 > rat.txt +python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt +cat filtered_rat.txt +UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` + +if [ "0" -eq "${UNAPPROVED}" ]; then + echo "No unapproved licenses" +else + echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" + exit 1 +fi diff --git a/src/arrow/dev/release/run-test.rb b/src/arrow/dev/release/run-test.rb new file mode 100755 index 000000000..90df39b13 --- /dev/null +++ b/src/arrow/dev/release/run-test.rb @@ -0,0 +1,31 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +$VERBOSE = true + +require "pathname" + +test_dir = Pathname.new(__dir__) + +require "test-unit" +require_relative "test-helper" + +ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] = "10000" + +exit(Test::Unit::AutoRunner.run(true, test_dir.to_s)) diff --git a/src/arrow/dev/release/setup-gpg-agent.sh b/src/arrow/dev/release/setup-gpg-agent.sh new file mode 100644 index 000000000..9ff84f6f0 --- /dev/null +++ b/src/arrow/dev/release/setup-gpg-agent.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# source me +eval $(gpg-agent --daemon --allow-preset-passphrase) +gpg --use-agent -s LICENSE.txt +rm -rf LICENSE.txt.gpg diff --git a/src/arrow/dev/release/test-helper.rb b/src/arrow/dev/release/test-helper.rb new file mode 100644 index 000000000..8a272ddfe --- /dev/null +++ b/src/arrow/dev/release/test-helper.rb @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "English" +require "cgi/util" +require "fileutils" +require "find" +require "json" +require "open-uri" +require "rexml/document" +require "tempfile" +require "tmpdir" + +module CommandRunnable + class Error < StandardError + end + + def sh(*command_line, check_result: true) + if command_line[0].is_a?(Hash) + env = command_line.shift + else + env = {} + end + stdout = Tempfile.new("command-stdout.log") + stderr = Tempfile.new("command-stderr.log") + success = system(env, *command_line, out: stdout.path, err: stderr.path) + if check_result + unless success + message = "Failed to run: #{command_line.join(" ")}\n" + message << "stdout:\n #{stdout.read}\n" + message << "stderr:\n #{stderr.read}" + raise Error, message + end + end + stdout.read + end +end + +module GitRunnable + include CommandRunnable + + def git(*args) + if args[0].is_a?(Hash) + env = args.shift + else + env = {} + end + sh(env, "git", *args) + end + + def git_current_commit + git("rev-parse", "HEAD").chomp + end + + def git_tags + git("tags").lines(chomp: true) + end +end + +module VersionDetectable + def detect_versions + top_dir = Pathname(__dir__).parent.parent + cpp_cmake_lists = top_dir + "cpp" + "CMakeLists.txt" + @snapshot_version = cpp_cmake_lists.read[/ARROW_VERSION "(.+?)"/, 1] + @release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "") + @so_version = compute_so_version(@release_version) + @next_version = @release_version.gsub(/\A\d+/) {|major| major.succ} + @next_snapshot_version = "#{@next_version}-SNAPSHOT" + @next_so_version = compute_so_version(@next_version) + r_description = top_dir + "r" + "DESCRIPTION" + @previous_version = r_description.read[/^Version: (.+?)\.9000$/, 1] + end + + def compute_so_version(version) + major, minor, _patch = version.split(".") + Integer(major, 10) * 100 + Integer(minor, 10) + end + + def on_release_branch? + @snapshot_version == @release_version + end +end diff --git a/src/arrow/dev/release/utils-binary.sh b/src/arrow/dev/release/utils-binary.sh new file mode 100644 index 000000000..31ebcd8e9 --- /dev/null +++ b/src/arrow/dev/release/utils-binary.sh @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +docker_image_name=apache-arrow/release-binary +gpg_agent_extra_socket="$(gpgconf --list-dirs agent-extra-socket)" +if [ $(uname) = "Darwin" ]; then + docker_uid=10000 + docker_gid=10000 +else + docker_uid=$(id -u) + docker_gid=$(id -g) +fi +docker_ssh_key="${SOURCE_DIR}/binary/id_rsa" + +if [ ! -f "${docker_ssh_key}" ]; then + ssh-keygen -N "" -f "${docker_ssh_key}" +fi + +docker_gpg_ssh() { + local ssh_port=$1 + shift + local known_hosts_file=$(mktemp -t "arrow-binary-gpg-ssh-known-hosts.XXXXX") + local exit_code= + if ssh \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=${known_hosts_file} \ + -i "${docker_ssh_key}" \ + -p ${ssh_port} \ + -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \ + arrow@127.0.0.1 \ + "$@"; then + exit_code=$?; + else + exit_code=$?; + fi + rm -f ${known_hosts_file} + return ${exit_code} +} + +docker_run() { + local container_id_dir=$(mktemp -d -t "arrow-binary-gpg-container.XXXXX") + local container_id_file=${container_id_dir}/id + docker \ + run \ + --cidfile ${container_id_file} \ + --detach \ + --publish-all \ + --rm \ + --volume "$PWD":/host \ + ${docker_image_name} \ + bash -c " +if [ \$(id -u) -ne ${docker_uid} ]; then + usermod --uid ${docker_uid} arrow + chown -R arrow: ~arrow +fi +/usr/sbin/sshd -D +" + local container_id=$(cat ${container_id_file}) + local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$' | head -n 1) + # Wait for sshd available + while ! docker_gpg_ssh ${ssh_port} : > /dev/null 2>&1; do + sleep 0.1 + done + gpg --export ${GPG_KEY_ID} | docker_gpg_ssh ${ssh_port} gpg --import + docker_gpg_ssh ${ssh_port} "$@" + docker kill ${container_id} + rm -rf ${container_id_dir} +} + +docker build -t ${docker_image_name} "${SOURCE_DIR}/binary" + +chmod go-rwx "${docker_ssh_key}" diff --git a/src/arrow/dev/release/utils-prepare.sh b/src/arrow/dev/release/utils-prepare.sh new file mode 100644 index 000000000..7ba786a75 --- /dev/null +++ b/src/arrow/dev/release/utils-prepare.sh @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARROW_DIR="${SOURCE_DIR}/../.." + +update_versions() { + local base_version=$1 + local next_version=$2 + local type=$3 + + case ${type} in + release) + local version=${base_version} + local r_version=${base_version} + ;; + snapshot) + local version=${next_version}-SNAPSHOT + local r_version=${base_version}.9000 + ;; + esac + + pushd "${ARROW_DIR}/c_glib" + sed -i.bak -E -e \ + "s/^version = '.+'/version = '${version}'/" \ + meson.build + rm -f meson.build.bak + git add meson.build + popd + + pushd "${ARROW_DIR}/ci/scripts" + sed -i.bak -E -e \ + "s/^pkgver=.+/pkgver=${r_version}/" \ + PKGBUILD + rm -f PKGBUILD.bak + git add PKGBUILD + popd + + pushd "${ARROW_DIR}/cpp" + sed -i.bak -E -e \ + "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \ + CMakeLists.txt + rm -f CMakeLists.txt.bak + git add CMakeLists.txt + + sed -i.bak -E -e \ + "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \ + vcpkg.json + rm -f vcpkg.json.bak + git add vcpkg.json + popd + + pushd "${ARROW_DIR}/java" + mvn versions:set -DnewVersion=${version} + find . -type f -name pom.xml.versionsBackup -delete + git add "pom.xml" + git add "**/pom.xml" + popd + + pushd "${ARROW_DIR}/csharp" + sed -i.bak -E -e \ + "s/^ <Version>.+<\/Version>/ <Version>${version}<\/Version>/" \ + Directory.Build.props + rm -f Directory.Build.props.bak + git add Directory.Build.props + popd + + pushd "${ARROW_DIR}/dev/tasks/homebrew-formulae" + sed -i.bak -E -e \ + "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \ + autobrew/apache-arrow.rb + rm -f autobrew/apache-arrow.rb.bak + git add autobrew/apache-arrow.rb + sed -i.bak -E -e \ + "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \ + apache-arrow.rb + rm -f apache-arrow.rb.bak + git add apache-arrow.rb + popd + + pushd "${ARROW_DIR}/js" + sed -i.bak -E -e \ + "s/^ \"version\": \".+\"/ \"version\": \"${version}\"/" \ + package.json + rm -f package.json.bak + git add package.json + popd + + pushd "${ARROW_DIR}/matlab" + sed -i.bak -E -e \ + "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \ + CMakeLists.txt + rm -f CMakeLists.txt.bak + git add CMakeLists.txt + popd + + pushd "${ARROW_DIR}/python" + sed -i.bak -E -e \ + "s/^default_version = '.+'/default_version = '${version}'/" \ + setup.py + rm -f setup.py.bak + git add setup.py + popd + + pushd "${ARROW_DIR}/r" + sed -i.bak -E -e \ + "s/^Version: .+/Version: ${r_version}/" \ + DESCRIPTION + rm -f DESCRIPTION.bak + git add DESCRIPTION + # Replace dev version with release version + sed -i.bak -E -e \ + "0,/^# arrow /s/^# arrow .+/# arrow ${base_version}/" \ + NEWS.md + if [ ${type} = "snapshot" ]; then + # Add a news entry for the new dev version + sed -i.bak -E -e \ + "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \ + NEWS.md + fi + rm -f NEWS.md.bak + git add NEWS.md + popd + + pushd "${ARROW_DIR}/ruby" + sed -i.bak -E -e \ + "s/^ VERSION = \".+\"/ VERSION = \"${version}\"/g" \ + */*/*/version.rb + rm -f */*/*/version.rb.bak + git add */*/*/version.rb + popd +} diff --git a/src/arrow/dev/release/verify-apt.sh b/src/arrow/dev/release/verify-apt.sh new file mode 100755 index 000000000..3773e27fa --- /dev/null +++ b/src/arrow/dev/release/verify-apt.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -exu + +if [ $# -lt 2 ]; then + echo "Usage: $0 VERSION rc" + echo " $0 VERSION staging-rc" + echo " $0 VERSION release" + echo " $0 VERSION staging-release" + echo " $0 VERSION local" + echo " e.g.: $0 0.13.0 rc # Verify 0.13.0 RC" + echo " e.g.: $0 0.13.0 staging-rc # Verify 0.13.0 RC on staging" + echo " e.g.: $0 0.13.0 release # Verify 0.13.0" + echo " e.g.: $0 0.13.0 staging-release # Verify 0.13.0 on staging" + echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local" + exit 1 +fi + +VERSION="$1" +TYPE="$2" + +local_prefix="/arrow/dev/tasks/linux-packages" + + +echo "::group::Prepare repository" + +export DEBIAN_FRONTEND=noninteractive + +APT_INSTALL="apt install -y -V --no-install-recommends" + +apt update +${APT_INSTALL} \ + ca-certificates \ + curl \ + lsb-release + +code_name="$(lsb_release --codename --short)" +distribution="$(lsb_release --id --short | tr 'A-Z' 'a-z')" +artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/${distribution}" +case "${TYPE}" in + rc|staging-rc|staging-release) + suffix=${TYPE%-release} + artifactory_base_url+="-${suffix}" + ;; +esac + +have_flight=yes +have_plasma=yes +workaround_missing_packages=() +case "${distribution}-${code_name}" in + debian-*) + sed \ + -i"" \ + -e "s/ main$/ main contrib non-free/g" \ + /etc/apt/sources.list + ;; +esac +if [ "$(arch)" = "aarch64" ]; then + have_plasma=no +fi + +if [ "${TYPE}" = "local" ]; then + case "${VERSION}" in + *-dev*) + package_version="$(echo "${VERSION}" | sed -e 's/-dev\(.*\)$/~dev\1/g')" + ;; + *-rc*) + package_version="$(echo "${VERSION}" | sed -e 's/-rc.*$//g')" + ;; + *) + package_version="${VERSION}" + ;; + esac + package_version+="-1" + apt_source_path="${local_prefix}/apt/repositories" + apt_source_path+="/${distribution}/pool/${code_name}/main" + apt_source_path+="/a/apache-arrow-apt-source" + apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb" + ${APT_INSTALL} "${apt_source_path}" +else + package_version="${VERSION}-1" + apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb" + curl \ + --output "${apt_source_base_name}" \ + "${artifactory_base_url}/${apt_source_base_name}" + ${APT_INSTALL} "./${apt_source_base_name}" +fi + +if [ "${TYPE}" = "local" ]; then + sed \ + -i"" \ + -e "s,^URIs: .*$,URIs: file://${local_prefix}/apt/repositories/${distribution},g" \ + /etc/apt/sources.list.d/apache-arrow.sources + keys="${local_prefix}/KEYS" + if [ -f "${keys}" ]; then + gpg \ + --no-default-keyring \ + --keyring /usr/share/keyrings/apache-arrow-apt-source.gpg \ + --import "${keys}" + fi +else + case "${TYPE}" in + rc|staging-rc|staging-release) + suffix=${TYPE%-release} + sed \ + -i"" \ + -e "s,^URIs: \\(.*\\)/,URIs: \\1-${suffix}/,g" \ + /etc/apt/sources.list.d/apache-arrow.sources + ;; + esac +fi + +apt update + +echo "::endgroup::" + + +echo "::group::Test Apache Arrow C++" +${APT_INSTALL} libarrow-dev=${package_version} +required_packages=() +required_packages+=(cmake) +required_packages+=(g++) +required_packages+=(git) +required_packages+=(make) +required_packages+=(pkg-config) +required_packages+=(${workaround_missing_packages[@]}) +${APT_INSTALL} ${required_packages[@]} +mkdir -p build +cp -a /arrow/cpp/examples/minimal_build build +pushd build/minimal_build +cmake . +make -j$(nproc) +./arrow_example +c++ -std=c++11 -o arrow_example example.cc $(pkg-config --cflags --libs arrow) +./arrow_example +popd +echo "::endgroup::" + + +echo "::group::Test Apache Arrow GLib" +${APT_INSTALL} libarrow-glib-dev=${package_version} +${APT_INSTALL} libarrow-glib-doc=${package_version} +echo "::endgroup::" + + +if [ "${have_flight}" = "yes" ]; then + echo "::group::Test Apache Arrow Flight" + ${APT_INSTALL} libarrow-flight-glib-dev=${package_version} + ${APT_INSTALL} libarrow-flight-glib-doc=${package_version} + echo "::endgroup::" +fi + + +echo "::group::Test libarrow-python" +${APT_INSTALL} libarrow-python-dev=${package_version} +echo "::endgroup::" + + +if [ "${have_plasma}" = "yes" ]; then + echo "::group::Test Plasma" + ${APT_INSTALL} libplasma-glib-dev=${package_version} + ${APT_INSTALL} libplasma-glib-doc=${package_version} + ${APT_INSTALL} plasma-store-server=${package_version} + echo "::endgroup::" +fi + + +echo "::group::Test Gandiva" +${APT_INSTALL} libgandiva-glib-dev=${package_version} +${APT_INSTALL} libgandiva-glib-doc=${package_version} +echo "::endgroup::" + + +echo "::group::Test Parquet" +${APT_INSTALL} libparquet-glib-dev=${package_version} +${APT_INSTALL} libparquet-glib-doc=${package_version} +echo "::endgroup::" diff --git a/src/arrow/dev/release/verify-release-candidate-wheels.bat b/src/arrow/dev/release/verify-release-candidate-wheels.bat new file mode 100644 index 000000000..5bcefe80d --- /dev/null +++ b/src/arrow/dev/release/verify-release-candidate-wheels.bat @@ -0,0 +1,107 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@rem This script downloads and installs all Windows wheels for a release +@rem candidate into temporary conda environments and makes sure that imports +@rem work + +@rem To run the script: +@rem verify-release-candidate-wheels.bat VERSION RC_NUM + +@echo on + +set _CURRENT_DIR=%CD% +set _VERIFICATION_DIR=C:\tmp\arrow-verify-release-wheels + +if not exist "C:\tmp\" mkdir C:\tmp +if exist %_VERIFICATION_DIR% rd %_VERIFICATION_DIR% /s /q +if not exist %_VERIFICATION_DIR% mkdir %_VERIFICATION_DIR% + +cd %_VERIFICATION_DIR% + +@rem clone Arrow repository to obtain test requirements +set GIT_ENV_PATH=%_VERIFICATION_DIR%\_git +call conda create -p %GIT_ENV_PATH% ^ + --no-shortcuts -f -q -y git ^ + || EXIT /B 1 +call activate %GIT_ENV_PATH% + +git clone https://github.com/apache/arrow.git || EXIT /B 1 +pushd arrow +git submodule update --init +popd + +set ARROW_VERSION=%1 +set RC_NUMBER=%2 + +python arrow\dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^ + --package_type python ^ + --regex=".*win_amd64.*" || EXIT /B 1 + +call deactivate + +set ARROW_TEST_DATA=%cd%\arrow\testing\data + +CALL :verify_wheel 3.6 m +if errorlevel 1 GOTO error + +CALL :verify_wheel 3.7 m +if errorlevel 1 GOTO error + +CALL :verify_wheel 3.8 +if errorlevel 1 GOTO error + +:done +cd %_CURRENT_DIR% + +EXIT /B %ERRORLEVEL% + +:error +call deactivate +cd %_CURRENT_DIR% + +EXIT /B 1 + +@rem a batch function to verify a single wheel +:verify_wheel + +set PY_VERSION=%1 +set ABI_TAG=%2 +set PY_VERSION_NO_PERIOD=%PY_VERSION:.=% + +set CONDA_ENV_PATH=%_VERIFICATION_DIR%\_verify-wheel-%PY_VERSION% +call conda create -p %CONDA_ENV_PATH% ^ + --no-shortcuts -f -q -y python=%PY_VERSION% ^ + || EXIT /B 1 +call activate %CONDA_ENV_PATH% + +set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl + +pip install python-rc\%ARROW_VERSION%-rc%RC_NUMBER%\%WHEEL_FILENAME% || EXIT /B 1 +python -c "import pyarrow" || EXIT /B 1 +python -c "import pyarrow.parquet" || EXIT /B 1 +python -c "import pyarrow.flight" || EXIT /B 1 +python -c "import pyarrow.dataset" || EXIT /B 1 + +pip install -r arrow\python\requirements-test.txt || EXIT /B 1 +pytest %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1 + +:done + +call deactivate + +EXIT /B 0 diff --git a/src/arrow/dev/release/verify-release-candidate.bat b/src/arrow/dev/release/verify-release-candidate.bat new file mode 100644 index 000000000..fee8c01bc --- /dev/null +++ b/src/arrow/dev/release/verify-release-candidate.bat @@ -0,0 +1,130 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@rem To run the script: +@rem verify-release-candidate.bat VERSION RC_NUM + +@echo on + +if not exist "C:\tmp\" mkdir C:\tmp +if exist "C:\tmp\arrow-verify-release" rd C:\tmp\arrow-verify-release /s /q +if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release + +set _VERIFICATION_DIR=C:\tmp\arrow-verify-release +set _VERIFICATION_DIR_UNIX=C:/tmp/arrow-verify-release +set _VERIFICATION_CONDA_ENV=%_VERIFICATION_DIR%\conda-env +set _DIST_URL=https://dist.apache.org/repos/dist/dev/arrow +set _TARBALL=apache-arrow-%1.tar.gz +set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1 +set INSTALL_DIR=%_VERIFICATION_DIR%\install + +@rem Requires GNU Wget for Windows +wget --no-check-certificate -O %_TARBALL% %_DIST_URL%/apache-arrow-%1-rc%2/%_TARBALL% || exit /B 1 + +tar xf %_TARBALL% -C %_VERIFICATION_DIR_UNIX% + +set PYTHON=3.6 + +@rem Using call with conda.bat seems necessary to avoid terminating the batch +@rem script execution +call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^ + --file=ci\conda_env_cpp.txt ^ + --file=ci\conda_env_python.txt ^ + git ^ + python=%PYTHON% ^ + || exit /B 1 + +call activate %_VERIFICATION_CONDA_ENV% || exit /B 1 + +set GENERATOR=Visual Studio 15 2017 Win64 +set CONFIGURATION=release + +pushd %ARROW_SOURCE% + +set ARROW_HOME=%INSTALL_DIR% +set PARQUET_HOME=%INSTALL_DIR% +set PATH=%INSTALL_DIR%\bin;%PATH% + +@rem Build and test Arrow C++ libraries +mkdir %ARROW_SOURCE%\cpp\build +pushd %ARROW_SOURCE%\cpp\build + +@rem This is the path for Visual Studio Community 2017 +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64 + +@rem NOTE(wesm): not using Ninja for now to be able to more easily control the +@rem generator used + +cmake -G "%GENERATOR%" ^ + -DARROW_BOOST_USE_SHARED=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=ON ^ + -DARROW_CXXFLAGS="/MP" ^ + -DARROW_DATASET=ON ^ + -DARROW_FLIGHT=ON ^ + -DARROW_MIMALLOC=ON ^ + -DARROW_PARQUET=ON ^ + -DARROW_PYTHON=ON ^ + -DARROW_WITH_BROTLI=ON ^ + -DARROW_WITH_BZ2=ON ^ + -DARROW_WITH_LZ4=ON ^ + -DARROW_WITH_SNAPPY=ON ^ + -DARROW_WITH_ZLIB=ON ^ + -DARROW_WITH_ZSTD=ON ^ + -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^ + -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^ + -DCMAKE_UNITY_BUILD=ON ^ + -DGTest_SOURCE=BUNDLED ^ + .. || exit /B + +cmake --build . --target INSTALL --config Release || exit /B 1 + +@rem NOTE(wesm): Building googletest is flaky for me with ninja. Building it +@rem first fixes the problem + +@rem ninja googletest_ep || exit /B 1 +@rem ninja install || exit /B 1 + +@rem Get testing datasets for Parquet unit tests +git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing +set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data + +git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing +set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data + +@rem Needed so python-test.exe works +set PYTHONPATH_ORIGINAL=%PYTHONPATH% +set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH% +ctest -VV || exit /B 1 +set PYTHONPATH=%PYTHONPATH_ORIGINAL% +popd + +@rem Build and import pyarrow +pushd %ARROW_SOURCE%\python + +pip install -r requirements-test.txt || exit /B 1 + +set PYARROW_CMAKE_GENERATOR=%GENERATOR% +set PYARROW_WITH_FLIGHT=1 +set PYARROW_WITH_PARQUET=1 +set PYARROW_WITH_DATASET=1 +python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel || exit /B 1 +pytest pyarrow -v -s --enable-parquet || exit /B 1 + +popd + +call deactivate diff --git a/src/arrow/dev/release/verify-release-candidate.sh b/src/arrow/dev/release/verify-release-candidate.sh new file mode 100755 index 000000000..3da89360c --- /dev/null +++ b/src/arrow/dev/release/verify-release-candidate.sh @@ -0,0 +1,817 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Requirements +# - Ruby >= 2.3 +# - Maven >= 3.3.9 +# - JDK >=7 +# - gcc >= 4.8 +# - Node.js >= 11.12 (best way is to use nvm) +# - Go >= 1.15 +# +# If using a non-system Boost, set BOOST_ROOT and add Boost libraries to +# LD_LIBRARY_PATH. +# +# To reuse build artifacts between runs set ARROW_TMPDIR environment variable to +# a directory where the temporary files should be placed to, note that this +# directory is not cleaned up automatically. + +case $# in + 3) ARTIFACT="$1" + VERSION="$2" + RC_NUMBER="$3" + case $ARTIFACT in + source|binaries|wheels) ;; + *) echo "Invalid argument: '${ARTIFACT}', valid options are \ +'source', 'binaries', or 'wheels'" + exit 1 + ;; + esac + ;; + *) echo "Usage: $0 source|binaries X.Y.Z RC_NUMBER" + exit 1 + ;; +esac + +set -e +set -x +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" + +detect_cuda() { + if ! (which nvcc && which nvidia-smi) > /dev/null; then + return 1 + fi + + local n_gpus=$(nvidia-smi --list-gpus | wc -l) + return $((${n_gpus} < 1)) +} + +# Build options for the C++ library + +if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then + ARROW_CUDA=ON +fi +: ${ARROW_CUDA:=OFF} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=ON} + +ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' + +download_dist_file() { + curl \ + --silent \ + --show-error \ + --fail \ + --location \ + --remote-name $ARROW_DIST_URL/$1 +} + +download_rc_file() { + download_dist_file apache-arrow-${VERSION}-rc${RC_NUMBER}/$1 +} + +import_gpg_keys() { + download_dist_file KEYS + gpg --import KEYS +} + +fetch_archive() { + local dist_name=$1 + download_rc_file ${dist_name}.tar.gz + download_rc_file ${dist_name}.tar.gz.asc + download_rc_file ${dist_name}.tar.gz.sha256 + download_rc_file ${dist_name}.tar.gz.sha512 + gpg --verify ${dist_name}.tar.gz.asc ${dist_name}.tar.gz + shasum -a 256 -c ${dist_name}.tar.gz.sha256 + shasum -a 512 -c ${dist_name}.tar.gz.sha512 +} + +verify_dir_artifact_signatures() { + # verify the signature and the checksums of each artifact + find $1 -name '*.asc' | while read sigfile; do + artifact=${sigfile/.asc/} + gpg --verify $sigfile $artifact || exit 1 + + # go into the directory because the checksum files contain only the + # basename of the artifact + pushd $(dirname $artifact) + base_artifact=$(basename $artifact) + if [ -f $base_artifact.sha256 ]; then + shasum -a 256 -c $base_artifact.sha256 || exit 1 + fi + shasum -a 512 -c $base_artifact.sha512 || exit 1 + popd + done +} + +test_binary() { + local download_dir=binaries + mkdir -p ${download_dir} + + ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + --dest=${download_dir} + + verify_dir_artifact_signatures ${download_dir} +} + +test_apt() { + for target in "debian:buster" \ + "arm64v8/debian:buster" \ + "debian:bullseye" \ + "arm64v8/debian:bullseye" \ + "debian:bookworm" \ + "arm64v8/debian:bookworm" \ + "ubuntu:bionic" \ + "arm64v8/ubuntu:bionic" \ + "ubuntu:focal" \ + "arm64v8/ubuntu:focal" \ + "ubuntu:hirsute" \ + "arm64v8/ubuntu:hirsute" \ + "ubuntu:impish" \ + "arm64v8/ubuntu:impish"; do \ + case "${target}" in + arm64v8/*) + if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then + case "${target}" in + arm64v8/debian:buster|arm64v8/ubuntu:bionic|arm64v8/ubuntu:focal) + ;; # OK + *) + # qemu-user-static in Ubuntu 20.04 has a crash bug: + # https://bugs.launchpad.net/qemu/+bug/1749393 + continue + ;; + esac + else + continue + fi + ;; + esac + if ! docker run --rm -v "${SOURCE_DIR}"/../..:/arrow:delegated \ + "${target}" \ + /arrow/dev/release/verify-apt.sh \ + "${VERSION}" \ + "rc"; then + echo "Failed to verify the APT repository for ${target}" + exit 1 + fi + done +} + +test_yum() { + for target in "almalinux:8" \ + "arm64v8/almalinux:8" \ + "amazonlinux:2" \ + "centos:7" \ + "centos:8" \ + "arm64v8/centos:8"; do + case "${target}" in + arm64v8/*) + if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then + : # OK + else + continue + fi + ;; + esac + if ! docker run --rm -v "${SOURCE_DIR}"/../..:/arrow:delegated \ + "${target}" \ + /arrow/dev/release/verify-yum.sh \ + "${VERSION}" \ + "rc"; then + echo "Failed to verify the Yum repository for ${target}" + exit 1 + fi + done +} + + +setup_tempdir() { + cleanup() { + if [ "${TEST_SUCCESS}" = "yes" ]; then + rm -fr "${ARROW_TMPDIR}" + else + echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details." + fi + } + + if [ -z "${ARROW_TMPDIR}" ]; then + # clean up automatically if ARROW_TMPDIR is not defined + ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX") + trap cleanup EXIT + else + # don't clean up automatically + mkdir -p "${ARROW_TMPDIR}" + fi +} + +setup_miniconda() { + # Setup short-lived miniconda for Python and integration tests + OS="$(uname)" + if [ "${OS}" == "Darwin" ]; then + OS=MacOSX + fi + ARCH="$(uname -m)" + MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${OS}-${ARCH}.sh" + + MINICONDA=$PWD/test-miniconda + + if [ ! -d "${MINICONDA}" ]; then + # Setup miniconda only if the directory doesn't exist yet + wget -O miniconda.sh $MINICONDA_URL + bash miniconda.sh -b -p $MINICONDA + rm -f miniconda.sh + fi + echo "Installed miniconda at ${MINICONDA}" + + . $MINICONDA/etc/profile.d/conda.sh + + conda create -n arrow-test -y -q -c conda-forge \ + python=3.8 \ + nomkl \ + numpy \ + pandas \ + cython + conda activate arrow-test + echo "Using conda environment ${CONDA_PREFIX}" +} + +# Build and test Java (Requires newer Maven -- I used 3.3.9) + +test_package_java() { + pushd java + + mvn test + mvn package + + popd +} + +# Build and test C++ + +test_and_install_cpp() { + mkdir -p cpp/build + pushd cpp/build + + ARROW_CMAKE_OPTIONS=" +${ARROW_CMAKE_OPTIONS:-} +-DCMAKE_INSTALL_PREFIX=$ARROW_HOME +-DCMAKE_INSTALL_LIBDIR=lib +-DARROW_FLIGHT=${ARROW_FLIGHT} +-DARROW_PLASMA=ON +-DARROW_ORC=ON +-DARROW_PYTHON=ON +-DARROW_GANDIVA=${ARROW_GANDIVA} +-DARROW_PARQUET=ON +-DARROW_DATASET=ON +-DPARQUET_REQUIRE_ENCRYPTION=ON +-DARROW_VERBOSE_THIRDPARTY_BUILD=ON +-DARROW_WITH_BZ2=ON +-DARROW_WITH_ZLIB=ON +-DARROW_WITH_ZSTD=ON +-DARROW_WITH_LZ4=ON +-DARROW_WITH_SNAPPY=ON +-DARROW_WITH_BROTLI=ON +-DARROW_BOOST_USE_SHARED=ON +-DCMAKE_BUILD_TYPE=release +-DARROW_BUILD_TESTS=ON +-DARROW_BUILD_INTEGRATION=ON +-DARROW_CUDA=${ARROW_CUDA} +-DARROW_DEPENDENCY_SOURCE=AUTO +" + cmake $ARROW_CMAKE_OPTIONS .. + + make -j$NPROC install + + # TODO: ARROW-5036: plasma-serialization_tests broken + # TODO: ARROW-5054: libgtest.so link failure in flight-server-test + LD_LIBRARY_PATH=$PWD/release:$LD_LIBRARY_PATH ctest \ + --exclude-regex "plasma-serialization_tests" \ + -j$NPROC \ + --output-on-failure \ + -L unittest + popd +} + +test_csharp() { + pushd csharp + + local csharp_bin=${PWD}/bin + mkdir -p ${csharp_bin} + + if which dotnet > /dev/null 2>&1; then + if ! which sourcelink > /dev/null 2>&1; then + local dotnet_tools_dir=$HOME/.dotnet/tools + if [ -d "${dotnet_tools_dir}" ]; then + PATH="${dotnet_tools_dir}:$PATH" + fi + fi + else + local dotnet_version=3.1.405 + local dotnet_platform= + case "$(uname)" in + Linux) + dotnet_platform=linux + ;; + Darwin) + dotnet_platform=macos + ;; + esac + local dotnet_download_thank_you_url=https://dotnet.microsoft.com/download/thank-you/dotnet-sdk-${dotnet_version}-${dotnet_platform}-x64-binaries + local dotnet_download_url=$( \ + curl --location ${dotnet_download_thank_you_url} | \ + grep 'window\.open' | \ + grep -E -o '[^"]+' | \ + sed -n 2p) + curl ${dotnet_download_url} | \ + tar xzf - -C ${csharp_bin} + PATH=${csharp_bin}:${PATH} + fi + + dotnet test + mv dummy.git ../.git + dotnet pack -c Release + mv ../.git dummy.git + + if ! which sourcelink > /dev/null 2>&1; then + dotnet tool install --tool-path ${csharp_bin} sourcelink + PATH=${csharp_bin}:${PATH} + if ! sourcelink --help > /dev/null 2>&1; then + export DOTNET_ROOT=${csharp_bin} + fi + fi + + sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb + sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb + + popd +} + +# Build and test Python + +test_python() { + pushd python + + pip install -r requirements-build.txt -r requirements-test.txt + + export PYARROW_WITH_DATASET=1 + export PYARROW_WITH_PARQUET=1 + export PYARROW_WITH_PLASMA=1 + if [ "${ARROW_CUDA}" = "ON" ]; then + export PYARROW_WITH_CUDA=1 + fi + if [ "${ARROW_FLIGHT}" = "ON" ]; then + export PYARROW_WITH_FLIGHT=1 + fi + if [ "${ARROW_GANDIVA}" = "ON" ]; then + export PYARROW_WITH_GANDIVA=1 + fi + + python setup.py build_ext --inplace + pytest pyarrow -v --pdb + + popd +} + +test_glib() { + pushd c_glib + + pip install meson + + meson build --prefix=$ARROW_HOME --libdir=lib + ninja -C build + ninja -C build install + + export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH + + if ! bundle --version; then + gem install --no-document bundler + fi + + bundle install --path vendor/bundle + bundle exec ruby test/run-test.rb + + popd +} + +test_js() { + pushd js + + if [ "${INSTALL_NODE}" -gt 0 ]; then + export NVM_DIR="`pwd`/.nvm" + mkdir -p $NVM_DIR + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.35.3/install.sh | \ + PROFILE=/dev/null bash + [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" + + nvm install --lts + npm install -g yarn + fi + + yarn --frozen-lockfile + yarn run-s clean:all lint build + yarn test + popd +} + +test_ruby() { + pushd ruby + + local modules="red-arrow red-arrow-dataset red-plasma red-parquet" + if [ "${ARROW_CUDA}" = "ON" ]; then + modules="${modules} red-arrow-cuda" + fi + if [ "${ARROW_FLIGHT}" = "ON" ]; then + modules="${modules} red-arrow-flight" + fi + if [ "${ARROW_GANDIVA}" = "ON" ]; then + modules="${modules} red-gandiva" + fi + + for module in ${modules}; do + pushd ${module} + bundle install --path vendor/bundle + bundle exec ruby test/run-test.rb + popd + done + + popd +} + +test_go() { + local VERSION=1.15.14 + local ARCH=amd64 + + if [ "$(uname)" == "Darwin" ]; then + local OS=darwin + else + local OS=linux + fi + + local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz + wget https://dl.google.com/go/$GO_ARCHIVE + + mkdir -p local-go + tar -xzf $GO_ARCHIVE -C local-go + rm -f $GO_ARCHIVE + + export GOROOT=`pwd`/local-go/go + export GOPATH=`pwd`/local-go/gopath + export PATH=$GOROOT/bin:$GOPATH/bin:$PATH + + pushd go/arrow + + go get -v ./... + go test ./... + go clean -modcache + + popd +} + +# Run integration tests +test_integration() { + JAVA_DIR=$PWD/java + CPP_BUILD_DIR=$PWD/cpp/build + + export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar + export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release + + pip install -e dev/archery + + INTEGRATION_TEST_ARGS="" + + if [ "${ARROW_FLIGHT}" = "ON" ]; then + INTEGRATION_TEST_ARGS="${INTEGRATION_TEST_ARGS} --run-flight" + fi + + # Flight integration test executable have runtime dependency on + # release/libgtest.so + LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH \ + archery integration \ + --with-cpp=${TEST_INTEGRATION_CPP} \ + --with-java=${TEST_INTEGRATION_JAVA} \ + --with-js=${TEST_INTEGRATION_JS} \ + --with-go=${TEST_INTEGRATION_GO} \ + $INTEGRATION_TEST_ARGS +} + +clone_testing_repositories() { + # Clone testing repositories if not cloned already + if [ ! -d "arrow-testing" ]; then + git clone https://github.com/apache/arrow-testing.git + fi + if [ ! -d "parquet-testing" ]; then + git clone https://github.com/apache/parquet-testing.git + fi + export ARROW_TEST_DATA=$PWD/arrow-testing/data + export PARQUET_TEST_DATA=$PWD/parquet-testing/data +} + +test_source_distribution() { + export ARROW_HOME=$ARROW_TMPDIR/install + export PARQUET_HOME=$ARROW_TMPDIR/install + export LD_LIBRARY_PATH=$ARROW_HOME/lib:${LD_LIBRARY_PATH:-} + export PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig:${PKG_CONFIG_PATH:-} + + if [ "$(uname)" == "Darwin" ]; then + NPROC=$(sysctl -n hw.ncpu) + else + NPROC=$(nproc) + fi + + clone_testing_repositories + + if [ ${TEST_JAVA} -gt 0 ]; then + test_package_java + fi + if [ ${TEST_CPP} -gt 0 ]; then + test_and_install_cpp + fi + if [ ${TEST_CSHARP} -gt 0 ]; then + test_csharp + fi + if [ ${TEST_PYTHON} -gt 0 ]; then + test_python + fi + if [ ${TEST_GLIB} -gt 0 ]; then + test_glib + fi + if [ ${TEST_RUBY} -gt 0 ]; then + test_ruby + fi + if [ ${TEST_JS} -gt 0 ]; then + test_js + fi + if [ ${TEST_GO} -gt 0 ]; then + test_go + fi + if [ ${TEST_INTEGRATION} -gt 0 ]; then + test_integration + fi +} + +test_binary_distribution() { + if [ ${TEST_BINARY} -gt 0 ]; then + test_binary + fi + if [ ${TEST_APT} -gt 0 ]; then + test_apt + fi + if [ ${TEST_YUM} -gt 0 ]; then + test_yum + fi +} + +test_linux_wheels() { + if [ "$(uname -m)" = "aarch64" ]; then + local arch="aarch64" + else + local arch="x86_64" + fi + + local py_arches="3.6m 3.7m 3.8 3.9" + local platform_tags="manylinux_2_12_${arch}.manylinux2010_${arch} manylinux_2_17_${arch}.manylinux2014_${arch}" + + for py_arch in ${py_arches}; do + local env=_verify_wheel-${py_arch} + conda create -yq -n ${env} python=${py_arch//[mu]/} + conda activate ${env} + pip install -U pip + + for tag in ${platform_tags}; do + # check the mandatory and optional imports + pip install python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-${tag}.whl + INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} + done + + conda deactivate + done +} + +test_macos_wheels() { + local py_arches="3.6m 3.7m 3.8 3.9" + local macos_version=$(sw_vers -productVersion) + local macos_short_version=${macos_version:0:5} + + local check_s3=ON + local check_flight=ON + + # macOS version <= 10.13 + if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then + local check_s3=OFF + fi + # apple silicon processor + if [ "$(uname -m)" = "arm64" ]; then + local py_arches="3.8 3.9" + local check_flight=OFF + fi + + # verify arch-native wheels inside an arch-native conda environment + for py_arch in ${py_arches}; do + local env=_verify_wheel-${py_arch} + conda create -yq -n ${env} python=${py_arch//m/} + conda activate ${env} + pip install -U pip + + # check the mandatory and optional imports + pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION} + INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ + ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} + + conda deactivate + done + + # verify arm64 and universal2 wheels using an universal2 python binary + # the interpreter should be installed from python.org: + # https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg + if [ "$(uname -m)" = "arm64" ]; then + for py_arch in "3.9"; do + local pyver=${py_arch//m/} + local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}" + + # create and activate a virtualenv for testing as arm64 + for arch in "arm64" "x86_64"; do + local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv" + $python -m virtualenv $venv + source $venv/bin/activate + pip install -U pip + + # install pyarrow's universal2 wheel + pip install \ + --find-links python-rc/${VERSION}-rc${RC_NUMBER} \ + --target $(python -c 'import site; print(site.getsitepackages()[0])') \ + --platform macosx_11_0_universal2 \ + --only-binary=:all: \ + pyarrow==${VERSION} + # check the imports and execute the unittests + INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ + arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} + + deactivate + done + done + fi +} + +test_wheels() { + clone_testing_repositories + + local download_dir=binaries + mkdir -p ${download_dir} + + if [ "$(uname)" == "Darwin" ]; then + local filter_regex=.*macosx.* + else + local filter_regex=.*manylinux.* + fi + + python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + --package_type python \ + --regex=${filter_regex} \ + --dest=${download_dir} + + verify_dir_artifact_signatures ${download_dir} + + pushd ${download_dir} + + if [ "$(uname)" == "Darwin" ]; then + test_macos_wheels + else + test_linux_wheels + fi + + popd +} + +# By default test all functionalities. +# To deactivate one test, deactivate the test and all of its dependents +# To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 + +# Install NodeJS locally for running the JavaScript tests rather than using the +# system Node installation, which may be too old. +: ${INSTALL_NODE:=1} + +if [ "${ARTIFACT}" == "source" ]; then + : ${TEST_SOURCE:=1} +elif [ "${ARTIFACT}" == "wheels" ]; then + TEST_WHEELS=1 +else + TEST_BINARY_DISTRIBUTIONS=1 +fi +: ${TEST_SOURCE:=0} +: ${TEST_WHEELS:=0} +: ${TEST_BINARY_DISTRIBUTIONS:=0} + +: ${TEST_DEFAULT:=1} +: ${TEST_JAVA:=${TEST_DEFAULT}} +: ${TEST_CPP:=${TEST_DEFAULT}} +: ${TEST_CSHARP:=${TEST_DEFAULT}} +: ${TEST_GLIB:=${TEST_DEFAULT}} +: ${TEST_RUBY:=${TEST_DEFAULT}} +: ${TEST_PYTHON:=${TEST_DEFAULT}} +: ${TEST_JS:=${TEST_DEFAULT}} +: ${TEST_GO:=${TEST_DEFAULT}} +: ${TEST_INTEGRATION:=${TEST_DEFAULT}} +if [ ${TEST_BINARY_DISTRIBUTIONS} -gt 0 ]; then + TEST_BINARY_DISTRIBUTIONS_DEFAULT=${TEST_DEFAULT} +else + TEST_BINARY_DISTRIBUTIONS_DEFAULT=0 +fi +: ${TEST_BINARY:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} +: ${TEST_APT:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} +: ${TEST_YUM:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} + +# For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1 +: ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}} +: ${TEST_INTEGRATION_JAVA:=${TEST_INTEGRATION}} +: ${TEST_INTEGRATION_JS:=${TEST_INTEGRATION}} +: ${TEST_INTEGRATION_GO:=${TEST_INTEGRATION}} + +# Automatically test if its activated by a dependent +TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY})) +TEST_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON} + ${TEST_INTEGRATION_CPP})) +TEST_JAVA=$((${TEST_JAVA} + ${TEST_INTEGRATION_JAVA})) +TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) +TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO})) +TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO})) + +if [ "${ARTIFACT}" == "source" ]; then + NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION})) +elif [ "${ARTIFACT}" == "wheels" ]; then + NEED_MINICONDA=$((${TEST_WHEELS})) +else + if [ -z "${PYTHON:-}" ]; then + NEED_MINICONDA=$((${TEST_BINARY})) + else + NEED_MINICONDA=0 + fi +fi + +: ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz} +case "${TEST_ARCHIVE}" in + /*) + ;; + *) + TEST_ARCHIVE=${PWD}/${TEST_ARCHIVE} + ;; +esac + +TEST_SUCCESS=no + +setup_tempdir "arrow-${VERSION}" +echo "Working in sandbox ${ARROW_TMPDIR}" +cd ${ARROW_TMPDIR} + +if [ ${NEED_MINICONDA} -gt 0 ]; then + setup_miniconda +fi + +if [ "${ARTIFACT}" == "source" ]; then + dist_name="apache-arrow-${VERSION}" + if [ ${TEST_SOURCE} -gt 0 ]; then + import_gpg_keys + if [ ! -d "${dist_name}" ]; then + fetch_archive ${dist_name} + tar xf ${dist_name}.tar.gz + fi + else + mkdir -p ${dist_name} + if [ ! -f ${TEST_ARCHIVE} ]; then + echo "${TEST_ARCHIVE} not found" + exit 1 + fi + tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 + fi + pushd ${dist_name} + test_source_distribution + popd +elif [ "${ARTIFACT}" == "wheels" ]; then + import_gpg_keys + test_wheels +else + import_gpg_keys + test_binary_distribution +fi + +TEST_SUCCESS=yes +echo 'Release candidate looks good!' +exit 0 diff --git a/src/arrow/dev/release/verify-yum.sh b/src/arrow/dev/release/verify-yum.sh new file mode 100755 index 000000000..a7f572a44 --- /dev/null +++ b/src/arrow/dev/release/verify-yum.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -exu + +if [ $# -lt 2 ]; then + echo "Usage: $0 VERSION rc" + echo " $0 VERSION staging-rc" + echo " $0 VERSION release" + echo " $0 VERSION staging-release" + echo " $0 VERSION local" + echo " e.g.: $0 0.13.0 rc # Verify 0.13.0 RC" + echo " e.g.: $0 0.13.0 staging-rc # Verify 0.13.0 RC on staging" + echo " e.g.: $0 0.13.0 release # Verify 0.13.0" + echo " e.g.: $0 0.13.0 staging-release # Verify 0.13.0 on staging" + echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local" + exit 1 +fi + +VERSION="$1" +TYPE="$2" + +local_prefix="/arrow/dev/tasks/linux-packages" + +artifactory_base_url="https://apache.jfrog.io/artifactory/arrow" + +distribution=$(. /etc/os-release && echo "${ID}") +distribution_version=$(. /etc/os-release && echo "${VERSION_ID}" | grep -o "^[0-9]*") +distribution_prefix="centos" + +cmake_package=cmake +cmake_command=cmake +have_flight=yes +have_gandiva=yes +have_glib=yes +have_parquet=yes +have_python=yes +install_command="dnf install -y --enablerepo=powertools" + +case "${distribution}-${distribution_version}" in + almalinux-*) + distribution_prefix="almalinux" + ;; + amzn-2) + cmake_package=cmake3 + cmake_command=cmake3 + have_flight=no + have_gandiva=no + have_python=no + install_command="yum install -y" + distribution_prefix="amazon-linux" + amazon-linux-extras install epel -y + ;; + centos-7) + cmake_package=cmake3 + cmake_command=cmake3 + have_flight=no + have_gandiva=no + install_command="yum install -y" + ;; +esac +if [ "$(arch)" = "aarch64" ]; then + have_gandiva=no +fi + +if [ "${TYPE}" = "local" ]; then + case "${VERSION}" in + *-dev*) + package_version="$(echo "${VERSION}" | sed -e 's/-dev\(.*\)$/-0.dev\1/g')" + ;; + *-rc*) + package_version="$(echo "${VERSION}" | sed -e 's/-rc.*$//g')" + package_version+="-1" + ;; + *) + package_version="${VERSION}-1" + ;; + esac + release_path="${local_prefix}/yum/repositories" + case "${distribution}" in + almalinux) + package_version+=".el${distribution_version}" + release_path+="/almalinux" + ;; + amzn) + package_version+=".${distribution}${distribution_version}" + release_path+="/amazon-linux" + amazon-linux-extras install -y epel + ;; + *) + package_version+=".el${distribution_version}" + release_path+="/centos" + ;; + esac + release_path+="/${distribution_version}/$(arch)/Packages" + release_path+="/apache-arrow-release-${package_version}.noarch.rpm" + ${install_command} "${release_path}" +else + package_version="${VERSION}" + case "${TYPE}" in + rc|staging-rc|staging-release) + suffix=${TYPE%-release} + distribution_prefix+="-${suffix}" + ;; + esac + ${install_command} \ + ${artifactory_base_url}/${distribution_prefix}/${distribution_version}/apache-arrow-release-latest.rpm +fi + +if [ "${TYPE}" = "local" ]; then + sed \ + -i"" \ + -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \ + /etc/yum.repos.d/Apache-Arrow.repo + keys="${local_prefix}/KEYS" + if [ -f "${keys}" ]; then + cp "${keys}" /etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow + fi +else + case "${TYPE}" in + rc|staging-rc|staging-release) + suffix=${TYPE%-release} + sed \ + -i"" \ + -e "s,/almalinux/,/almalinux-${suffix}/,g" \ + -e "s,/centos/,/centos-${suffix}/,g" \ + -e "s,/amazon-linux/,/amazon-linux-${suffix}/,g" \ + /etc/yum.repos.d/Apache-Arrow.repo + ;; + esac +fi + +${install_command} --enablerepo=epel arrow-devel-${package_version} +${install_command} \ + ${cmake_package} \ + gcc-c++ \ + git \ + libarchive \ + make \ + pkg-config +mkdir -p build +cp -a /arrow/cpp/examples/minimal_build build +pushd build/minimal_build +${cmake_command} . +make -j$(nproc) +./arrow_example +c++ -std=c++11 -o arrow_example example.cc $(pkg-config --cflags --libs arrow) +./arrow_example +popd + +if [ "${have_glib}" = "yes" ]; then + ${install_command} --enablerepo=epel arrow-glib-devel-${package_version} + ${install_command} --enablerepo=epel arrow-glib-doc-${package_version} +fi + +if [ "${have_python}" = "yes" ]; then + ${install_command} --enablerepo=epel arrow-python-devel-${package_version} +fi + +if [ "${have_glib}" = "yes" ]; then + ${install_command} --enablerepo=epel plasma-glib-devel-${package_version} + ${install_command} --enablerepo=epel plasma-glib-doc-${package_version} +else + ${install_command} --enablerepo=epel plasma-devel-${package_version} +fi + +if [ "${have_flight}" = "yes" ]; then + ${install_command} --enablerepo=epel arrow-flight-glib-devel-${package_version} + ${install_command} --enablerepo=epel arrow-flight-glib-doc-${package_version} +fi + +if [ "${have_gandiva}" = "yes" ]; then + if [ "${have_glib}" = "yes" ]; then + ${install_command} --enablerepo=epel gandiva-glib-devel-${package_version} + ${install_command} --enablerepo=epel gandiva-glib-doc-${package_version} + else + ${install_command} --enablerepo=epel gandiva-devel-${package_version} + fi +fi + +if [ "${have_parquet}" = "yes" ]; then + if [ "${have_glib}" = "yes" ]; then + ${install_command} --enablerepo=epel parquet-glib-devel-${package_version} + ${install_command} --enablerepo=epel parquet-glib-doc-${package_version} + else + ${install_command} --enablerepo=epel parquet-devel-${package_version} + fi +fi diff --git a/src/arrow/dev/requirements_merge_arrow_pr.txt b/src/arrow/dev/requirements_merge_arrow_pr.txt new file mode 100644 index 000000000..7ac17dc1b --- /dev/null +++ b/src/arrow/dev/requirements_merge_arrow_pr.txt @@ -0,0 +1,3 @@ +jira +requests +six diff --git a/src/arrow/dev/tasks/README.md b/src/arrow/dev/tasks/README.md new file mode 100644 index 000000000..1af9739db --- /dev/null +++ b/src/arrow/dev/tasks/README.md @@ -0,0 +1,19 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +See the usage guide under the [documentation page](../../docs/source/developers/crossbow.rst) diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml new file mode 100644 index 000000000..dfc87c80b --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '10.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:10.2 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.6.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml new file mode 100644 index 000000000..3416b952c --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '10.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:10.2 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.7.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml new file mode 100644 index 000000000..f819ba722 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '10.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:10.2 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml new file mode 100644 index 000000000..3e2e0ef51 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '10.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:10.2 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml new file mode 100644 index 000000000..3aba0f129 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.6.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml new file mode 100644 index 000000000..ff26bc521 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.7.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml new file mode 100644 index 000000000..5703aba68 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml new file mode 100644 index 000000000..8ff58d717 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml @@ -0,0 +1,70 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml new file mode 100644 index 000000000..5bb4381fe --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml @@ -0,0 +1,69 @@ +BUILD: +- aarch64-conda_cos7-linux-gnu +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_arch: +- aarch64 +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.6.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-aarch64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml new file mode 100644 index 000000000..2b1715d58 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml @@ -0,0 +1,69 @@ +BUILD: +- aarch64-conda_cos7-linux-gnu +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_arch: +- aarch64 +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.7.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-aarch64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml new file mode 100644 index 000000000..5a0e7313e --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml @@ -0,0 +1,69 @@ +BUILD: +- aarch64-conda_cos7-linux-gnu +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_arch: +- aarch64 +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-aarch64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml new file mode 100644 index 000000000..16ace00bd --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml @@ -0,0 +1,69 @@ +BUILD: +- aarch64-conda_cos7-linux-gnu +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_arch: +- aarch64 +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- linux-aarch64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml new file mode 100644 index 000000000..0be59fe1a --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- x86_64-apple-darwin13.4.0 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.6.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml new file mode 100644 index 000000000..d2c046ab2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- x86_64-apple-darwin13.4.0 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.7.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml new file mode 100644 index 000000000..43f634454 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- x86_64-apple-darwin13.4.0 +numpy: +- '1.17' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml new file mode 100644 index 000000000..7cc730f9b --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- x86_64-apple-darwin13.4.0 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml new file mode 100644 index 000000000..e5f8e2ba2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '11.0' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge/label/rust_dev,conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- arm64-apple-darwin20.0.0 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-arm64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml new file mode 100644 index 000000000..cd3eca6d2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml @@ -0,0 +1,65 @@ +MACOSX_DEPLOYMENT_TARGET: +- '11.0' +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge/label/rust_dev,conda-forge +channel_targets: +- conda-forge main +cuda_compiler_version: +- None +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +macos_machine: +- arm64-apple-darwin20.0.0 +numpy: +- '1.19' +orc: +- 1.6.8 +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- osx-arm64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml new file mode 100644 index 000000000..dfdfae966 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml @@ -0,0 +1,29 @@ +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.0' +target_platform: +- linux-64 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cdt_name + - docker_image diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml new file mode 100644 index 000000000..c5f455c19 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml @@ -0,0 +1,29 @@ +c_compiler: +- gcc +c_compiler_version: +- '9' +cdt_name: +- cos6 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- gxx +cxx_compiler_version: +- '9' +docker_image: +- quay.io/condaforge/linux-anvil-comp7 +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.1' +target_platform: +- linux-64 +zip_keys: +- - c_compiler_version + - cxx_compiler_version +- - cdt_name + - docker_image diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml new file mode 100644 index 000000000..08bb81d08 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml @@ -0,0 +1,27 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +macos_machine: +- x86_64-apple-darwin13.4.0 +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.0' +target_platform: +- osx-64 +zip_keys: +- - c_compiler_version + - cxx_compiler_version diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml new file mode 100644 index 000000000..9974c6638 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml @@ -0,0 +1,27 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +c_compiler: +- clang +c_compiler_version: +- '11' +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- clangxx +cxx_compiler_version: +- '11' +macos_machine: +- x86_64-apple-darwin13.4.0 +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.1' +target_platform: +- osx-64 +zip_keys: +- - c_compiler_version + - cxx_compiler_version diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml new file mode 100644 index 000000000..02c2a7075 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.0.yaml @@ -0,0 +1,12 @@ +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.0' +target_platform: +- win-64 diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml new file mode 100644 index 000000000..2fe9ad314 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml @@ -0,0 +1,12 @@ +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +pin_run_as_build: + r-base: + min_pin: x.x + max_pin: x.x +r_base: +- '4.1' +target_platform: +- win-64 diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml new file mode 100644 index 000000000..8d4e25167 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml @@ -0,0 +1,55 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- vs2017 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- vs2017 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.6.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- win-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - numpy + - python +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml new file mode 100644 index 000000000..8da4a8380 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml @@ -0,0 +1,55 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- vs2017 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- vs2017 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.7.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- win-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - numpy + - python +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml new file mode 100644 index 000000000..1980e1be3 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml @@ -0,0 +1,55 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- vs2017 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- vs2017 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.17' +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.8.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- win-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - numpy + - python +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml new file mode 100644 index 000000000..1106037d3 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml @@ -0,0 +1,55 @@ +aws_sdk_cpp: +- 1.8.186 +bzip2: +- '1' +c_compiler: +- vs2017 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- None +cxx_compiler: +- vs2017 +gflags: +- '2.2' +glog: +- '0.5' +grpc_cpp: +- '1.38' +libprotobuf: +- '3.16' +lz4_c: +- 1.9.3 +numpy: +- '1.19' +pin_run_as_build: + bzip2: + max_pin: x + lz4-c: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x + zlib: + max_pin: x.x +python: +- 3.9.* *_cpython +re2: +- 2021.06.01 +snappy: +- '1' +target_platform: +- win-64 +thrift_cpp: +- 0.14.2 +zip_keys: +- - numpy + - python +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh b/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh new file mode 100644 index 000000000..a53ef3f2c --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/.scripts/logging_utils.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Provide a unified interface for the different logging +# utilities CI providers offer. If unavailable, provide +# a compatible fallback (e.g. bare `echo xxxxxx`). + +function startgroup { + # Start a foldable group of log lines + # Pass a single argument, quoted + case ${CI:-} in + azure ) + echo "##[group]$1";; + travis ) + echo "$1" + echo -en 'travis_fold:start:'"${1// /}"'\\r';; + * ) + echo "$1";; + esac +} + +function endgroup { + # End a foldable group of log lines + # Pass a single argument, quoted + case ${CI:-} in + azure ) + echo "##[endgroup]";; + travis ) + echo -en 'travis_fold:end:'"${1// /}"'\\r';; + esac +} diff --git a/src/arrow/dev/tasks/conda-recipes/README.md b/src/arrow/dev/tasks/conda-recipes/README.md new file mode 100644 index 000000000..39f82f1b0 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/README.md @@ -0,0 +1,67 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Conda Forge recipes + +This directory must be migrated periodically with the upstrem updates of +[arrow-cpp-feedstock][arrow-cpp-feedstock], +[parquet-cpp-feedstock][parquet-cpp-feedstock]. +conda-forge repositories because of multiple vendored files. + +## Keeping the recipes synchronized + +The recipes here are tested on nightly basis, so they follow the development +versions of arrow instead of the upstream recipes, which are suitable for the +latest releases. + +### Backporting from the upstream feedstocks + +In most of the cases these recipes are more accurate, then the upstream +feedstocks. Although the upstream feedstocks regularly receive automatic updates +by the conda-forge team so we need to backport those changes to the crossbow +recipes. Most of these updates are touching the version pinning files +(under `.ci_support`) and other CI related configuration files. + +Because all three recipes must be built in the same continuous integration +job prefer porting from the [arrpw-cpp feedstock][arrow-cpp-feedstock]. + +#### Updating the variants: + +Copy the configuration files from `arrow-cpp-feedstock/.ci_support` to the +`.ci_support` folder. + +#### Updating the CI configurations: + +The `.azure-pipelines/azure-pipelines-[linux|osx|win].yml` should be ported +to the local counterparts under `.azure-pipelines` with keeping the crossbow +related parts (the cloning of arrow and the jinja templated variables) and +moving the matrix definitions like [this][matrix-definition] to the crossbow +[tasks.yml][../tasks.yml] config file. + + +### Porting recipes from crossbow to the upstream feedstocks + +Theoretically these recipes should be up to date with the actual version of +Arrow, so during the release procedure the content of these recipes should be +copied to the upstream feedstocks. + + +[arrow-cpp-feedstock]: https://github.com/conda-forge/arrow-cpp-feedstock +[parquet-cpp-feedstock]: https://github.com/conda-forge/parquet-cpp-feedstock +[matrix-definition]: https://github.com/conda-forge/arrow-cpp-feedstock/blob/master/.azure-pipelines/azure-pipelines-linux.yml#L12 diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt new file mode 100644 index 000000000..461398bab --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/LLVM_LICENSE.txt @@ -0,0 +1,68 @@ +============================================================================== +LLVM Release License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +Copyrights and Licenses for Third Party Software Distributed with LLVM: +============================================================================== +The LLVM software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the LLVM Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +Google Test llvm/utils/unittest/googletest +OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} +ARM contributions llvm/lib/Target/ARM/LICENSE.TXT +md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat new file mode 100644 index 000000000..0527356f7 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat @@ -0,0 +1,55 @@ +@echo on + +mkdir "%SRC_DIR%"\cpp\build +pushd "%SRC_DIR%"\cpp\build + +:: Enable CUDA support +if "%cuda_compiler_version%"=="None" ( + set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=OFF" +) else ( + REM this should move to nvcc-feedstock + set "CUDA_PATH=%CUDA_PATH:\=/%" + set "CUDA_HOME=%CUDA_HOME:\=/%" + + set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=ON" +) + +cmake -G "Ninja" ^ + -DBUILD_SHARED_LIBS=ON ^ + -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^ + -DARROW_DEPENDENCY_SOURCE=SYSTEM ^ + -DARROW_PACKAGE_PREFIX="%LIBRARY_PREFIX%" ^ + -DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^ + -DPython3_EXECUTABLE="%PYTHON%" ^ + -DARROW_WITH_BZ2:BOOL=ON ^ + -DARROW_WITH_ZLIB:BOOL=ON ^ + -DARROW_WITH_ZSTD:BOOL=ON ^ + -DARROW_WITH_LZ4:BOOL=ON ^ + -DARROW_WITH_SNAPPY:BOOL=ON ^ + -DARROW_WITH_BROTLI:BOOL=ON ^ + -DARROW_BOOST_USE_SHARED:BOOL=ON ^ + -DARROW_BUILD_TESTS:BOOL=OFF ^ + -DARROW_BUILD_UTILITIES:BOOL=OFF ^ + -DARROW_BUILD_STATIC:BOOL=OFF ^ + -DCMAKE_BUILD_TYPE=release ^ + -DARROW_SSE42:BOOL=OFF ^ + -DARROW_PYTHON:BOOL=ON ^ + -DARROW_MIMALLOC:BOOL=ON ^ + -DARROW_DATASET:BOOL=ON ^ + -DARROW_FLIGHT:BOOL=ON ^ + -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS:BOOL=ON ^ + -DARROW_HDFS:BOOL=ON ^ + -DARROW_PARQUET:BOOL=ON ^ + -DARROW_GANDIVA:BOOL=ON ^ + -DARROW_ORC:BOOL=ON ^ + -DARROW_S3:BOOL=ON ^ + -DBoost_NO_BOOST_CMAKE=ON ^ + -DCMAKE_UNITY_BUILD=ON ^ + %EXTRA_CMAKE_ARGS% ^ + .. +if errorlevel 1 exit 1 + +cmake --build . --target install --config Release +if errorlevel 1 exit 1 + +popd diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat new file mode 100644 index 000000000..89cec3710 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat @@ -0,0 +1,44 @@ +@echo on +pushd "%SRC_DIR%"\python + +@rem the symlinks for cmake modules don't work here +@rem NOTE: In contrast to conda-forge, they work here as we clone from git. +@rem del cmake_modules\BuildUtils.cmake +@rem del cmake_modules\SetupCxxFlags.cmake +@rem del cmake_modules\CompilerInfo.cmake +@rem del cmake_modules\FindNumPy.cmake +@rem del cmake_modules\FindPythonLibsNew.cmake +@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\BuildUtils.cmake" cmake_modules\ +@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\SetupCxxFlags.cmake" cmake_modules\ +@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\CompilerInfo.cmake" cmake_modules\ +@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindNumPy.cmake" cmake_modules\ +@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindPythonLibsNew.cmake" cmake_modules\ + +SET ARROW_HOME=%LIBRARY_PREFIX% +SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION% +SET PYARROW_BUILD_TYPE=release +SET PYARROW_WITH_S3=1 +SET PYARROW_WITH_HDFS=1 +SET PYARROW_WITH_DATASET=1 +SET PYARROW_WITH_FLIGHT=1 +SET PYARROW_WITH_GANDIVA=1 +SET PYARROW_WITH_PARQUET=1 +SET PYARROW_CMAKE_GENERATOR=Ninja + +:: Enable CUDA support +if "%cuda_compiler_version%"=="None" ( + set "PYARROW_WITH_CUDA=0" +) else ( + set "PYARROW_WITH_CUDA=1" +) + +%PYTHON% setup.py ^ + build_ext ^ + install --single-version-externally-managed ^ + --record=record.txt +if errorlevel 1 exit 1 +popd + +if [%PKG_NAME%] == [pyarrow] ( + rd /s /q %SP_DIR%\pyarrow\tests +) diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh new file mode 100644 index 000000000..9e4c02c5c --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +set -e +set -x + +mkdir cpp/build +pushd cpp/build + +EXTRA_CMAKE_ARGS="" + +# Include g++'s system headers +if [ "$(uname)" == "Linux" ]; then + SYSTEM_INCLUDES=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';') + EXTRA_CMAKE_ARGS=" -DARROW_GANDIVA_PC_CXX_FLAGS=${SYSTEM_INCLUDES}" +fi + +# Enable CUDA support +if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]] +then + if [[ -z "${CUDA_HOME+x}" ]] + then + echo "cuda_compiler_version=${cuda_compiler_version} CUDA_HOME=$CUDA_HOME" + CUDA_GDB_EXECUTABLE=$(which cuda-gdb || exit 0) + if [[ -n "$CUDA_GDB_EXECUTABLE" ]] + then + CUDA_HOME=$(dirname $(dirname $CUDA_GDB_EXECUTABLE)) + else + echo "Cannot determine CUDA_HOME: cuda-gdb not in PATH" + return 1 + fi + fi + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CUDA_HOME}/lib64/stubs" +else + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF" +fi + +if [[ "${target_platform}" == "osx-arm64" ]]; then + # We need llvm 11+ support in Arrow for this + # Tell jemalloc to support 16K page size on apple arm64 silicon + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF -DARROW_JEMALLOC_LG_PAGE=14" + sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt +elif [[ "${target_platform}" == "linux-aarch64" ]]; then + # Tell jemalloc to support both 4k and 64k page arm64 systems + # See https://github.com/apache/arrow/pull/10940 + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON -DARROW_JEMALLOC_LG_PAGE=16" +else + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON" +fi + +cmake \ + -DARROW_BOOST_USE_SHARED=ON \ + -DARROW_BUILD_BENCHMARKS=OFF \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_BUILD_UTILITIES=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DARROW_DATASET=ON \ + -DARROW_DEPENDENCY_SOURCE=SYSTEM \ + -DARROW_FLIGHT=ON \ + -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \ + -DARROW_HDFS=ON \ + -DARROW_JEMALLOC=ON \ + -DARROW_MIMALLOC=ON \ + -DARROW_ORC=ON \ + -DARROW_PACKAGE_PREFIX=$PREFIX \ + -DARROW_PARQUET=ON \ + -DARROW_PLASMA=ON \ + -DARROW_PYTHON=ON \ + -DARROW_S3=ON \ + -DARROW_SIMD_LEVEL=NONE \ + -DARROW_USE_LD_GOLD=ON \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=release \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=$PREFIX \ + -DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \ + -DPython3_EXECUTABLE=${PYTHON} \ + -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \ + -GNinja \ + ${EXTRA_CMAKE_ARGS} \ + .. + +# Commented out until jemalloc and mimalloc are fixed upstream +if [[ "${target_platform}" == "osx-arm64" ]]; then + ninja jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-patch mimalloc_ep-prefix/src/mimalloc_ep-stamp/mimalloc_ep-patch + cp $BUILD_PREFIX/share/gnuconfig/config.* jemalloc_ep-prefix/src/jemalloc_ep/build-aux/ + sed -ie 's/list(APPEND mi_cflags -march=native)//g' mimalloc_ep-prefix/src/mimalloc_ep/CMakeLists.txt + # Use the correct register for thread-local storage + sed -ie 's/tpidr_el0/tpidrro_el0/g' mimalloc_ep-prefix/src/mimalloc_ep/include/mimalloc-internal.h +fi + +ninja install + +popd diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh new file mode 100644 index 000000000..f0cf9ceb4 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env sh + +set -e +set -x + +# Build dependencies +export ARROW_HOME=$PREFIX +export PARQUET_HOME=$PREFIX +export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION +export PYARROW_BUILD_TYPE=release +export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0 +export PYARROW_WITH_DATASET=1 +export PYARROW_WITH_FLIGHT=1 +if [[ "${target_platform}" == "osx-arm64" ]]; then + # We need llvm 11+ support in Arrow for this + export PYARROW_WITH_GANDIVA=0 +else + export PYARROW_WITH_GANDIVA=1 +fi +export PYARROW_WITH_HDFS=1 +export PYARROW_WITH_ORC=1 +export PYARROW_WITH_PARQUET=1 +export PYARROW_WITH_PLASMA=1 +export PYARROW_WITH_S3=1 +export PYARROW_CMAKE_GENERATOR=Ninja +BUILD_EXT_FLAGS="" + +# Enable CUDA support +if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]; then + export PYARROW_WITH_CUDA=1 +else + export PYARROW_WITH_CUDA=0 +fi + +# Resolve: Make Error at cmake_modules/SetupCxxFlags.cmake:338 (message): Unsupported arch flag: -march=. +if [[ "${target_platform}" == "linux-aarch64" ]]; then + export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a" +fi + +cd python + +$PYTHON setup.py \ + build_ext \ + install --single-version-externally-managed \ + --record=record.txt + +if [[ "$PKG_NAME" == "pyarrow" ]]; then + rm -r ${SP_DIR}/pyarrow/tests +fi diff --git a/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml new file mode 100644 index 000000000..48a862986 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -0,0 +1,302 @@ +# NOTE: In constrast to the conda-forge recipe, ARROW_VERSION is a templated variable here. +{% set version = ARROW_VERSION %} +{% set cuda_enabled = cuda_compiler_version != "None" %} +{% set build_ext_version = ARROW_VERSION %} +{% set build_ext = "cuda" if cuda_enabled else "cpu" %} +{% set proc_build_number = "0" %} + +package: + name: arrow-cpp-ext + version: {{ version }} + +source: + path: ../../../../ + +build: + number: 0 + # for cuda on win/linux, building with 9.2 is enough to be compatible with all later versions, + # since arrow is only using libcuda, and not libcudart. + skip: true # [(win or linux) and cuda_compiler_version not in ("None", "10.2")] + skip: true # [osx and cuda_compiler_version != "None"] + run_exports: + - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }} + +outputs: + - name: arrow-cpp-proc + version: {{ build_ext_version }} + build: + number: {{ proc_build_number }} + string: "{{ build_ext }}" + test: + commands: + - exit 0 + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: 'A meta-package to select Arrow build variant' + + - name: arrow-cpp + script: build-arrow.sh # [not win] + script: bld-arrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + run_exports: + - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }} + ignore_run_exports: + - cudatoolkit + track_features: + {{ "- arrow-cuda" if cuda_enabled else "" }} + requirements: + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - gnuconfig # [osx and arm64] + - libprotobuf + - grpc-cpp + - cmake + - autoconf # [unix] + - ninja + - make # [unix] + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + host: + - aws-sdk-cpp + - boost-cpp >=1.70 + - brotli + - bzip2 + - c-ares + - gflags + - glog + - grpc-cpp + - libprotobuf + - clangdev 10 # [not (osx and arm64)] + - llvmdev 10 # [not (osx and arm64)] + - libutf8proc + - lz4-c + - numpy + - orc # [unix] + - python + - rapidjson + - re2 + - snappy + - thrift-cpp + - zlib + - zstd + run: + - {{ pin_compatible('numpy', lower_bound='1.16') }} + - python + run_constrained: + - arrow-cpp-proc * {{ build_ext }} + - cudatoolkit >=9.2 # [cuda_compiler_version != "None"] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ libraries for Apache Arrow + + test: + commands: + # headers + - test -f $PREFIX/include/arrow/api.h # [unix] + - test -f $PREFIX/include/arrow/flight/types.h # [unix] + - test -f $PREFIX/include/plasma/client.h # [unix] + - test -f $PREFIX/include/gandiva/engine.h # [unix and not (osx and arm64)] + - test -f $PREFIX/include/parquet/api/reader.h # [unix] + - if not exist %LIBRARY_INC%\\arrow\\api.h exit 1 # [win] + - if not exist %LIBRARY_INC%\\gandiva\\engine.h exit 1 # [win] + - if not exist %LIBRARY_INC%\\parquet\\api\\reader.h exit 1 # [win] + + # shared + - test -f $PREFIX/lib/libarrow.so # [linux] + - test -f $PREFIX/lib/libarrow_dataset.so # [linux] + - test -f $PREFIX/lib/libarrow_flight.so # [linux] + - test -f $PREFIX/lib/libarrow_python.so # [linux] + - test -f $PREFIX/lib/libparquet.so # [linux] + - test -f $PREFIX/lib/libgandiva.so # [linux] + - test -f $PREFIX/lib/libplasma.so # [linux] + - test -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version != "None") and unix] + - test ! -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version == "None") and unix] + - if not exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version != "None") and win] + - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win] + - test -f $PREFIX/lib/libarrow.dylib # [osx] + - test -f $PREFIX/lib/libarrow_dataset.dylib # [osx] + - test -f $PREFIX/lib/libarrow_python.dylib # [osx] + - test -f $PREFIX/lib/libgandiva.dylib # [osx and not arm64] + - test -f $PREFIX/lib/libparquet.dylib # [osx] + - test -f $PREFIX/lib/libplasma.dylib # [osx] + - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1 # [win] + - if not exist %PREFIX%\\Library\\bin\\arrow_dataset.dll exit 1 # [win] + - if not exist %PREFIX%\\Library\\bin\\arrow_flight.dll exit 1 # [win] + - if not exist %PREFIX%\\Library\\bin\\arrow_python.dll exit 1 # [win] + - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win] + - if not exist %PREFIX%\\Library\\bin\\gandiva.dll exit 1 # [win] + + # absence of static libraries + - test ! -f $PREFIX/lib/libarrow.a # [unix] + - test ! -f $PREFIX/lib/libarrow_dataset.a # [unix] + - test ! -f $PREFIX/lib/libarrow_flight.a # [unix] + - test ! -f $PREFIX/lib/libarrow_python.a # [unix] + - test ! -f $PREFIX/lib/libplasma.a # [unix] + - test ! -f $PREFIX/lib/libparquet.a # [unix] + - test ! -f $PREFIX/lib/libgandiva.a # [unix] + - if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win] + - if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win] + - if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win] + - if exist %PREFIX%\\Library\\lib\\arrow_python_static.lib exit 1 # [win] + - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win] + - if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win] + + - name: pyarrow + script: build-pyarrow.sh # [not win] + script: bld-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports: + - cudatoolkit + track_features: + {{ "- arrow-cuda" if cuda_enabled else "" }} + requirements: + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + - make # [unix] + - {{ compiler('c') }} + - {{ compiler('cxx') }} + # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + host: + - {{ pin_subpackage('arrow-cpp', exact=True) }} + - cython + - numpy + - python + - setuptools + - setuptools_scm + - six + run: + - {{ pin_subpackage('arrow-cpp', exact=True) }} + - {{ pin_compatible('numpy', lower_bound='1.16') }} + # empty parquet-cpp metapackage, force old versions to be uninstalled + - parquet-cpp 1.5.1.* + - python + run_constrained: + - arrow-cpp-proc * {{ build_ext }} + - cudatoolkit >=9.2 # [cuda_compiler_version != "None"] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow + + test: + imports: + - pyarrow + - pyarrow.dataset + - pyarrow.flight + - pyarrow.gandiva # [not (osx and arm64)] + - pyarrow.orc # [unix] + - pyarrow.parquet + - pyarrow.plasma # [unix] + - pyarrow.fs + - pyarrow._s3fs + - pyarrow._hdfs + # We can only test importing cuda package but cannot run when a + # CUDA device is not available, for instance, when building from CI. + # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see + # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows + # However, we check below for (at least) the presence of a correctly-compiled module + - pyarrow.cuda # [cuda_compiler_version != "None" and not win] + commands: + - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix] + - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win] + # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y" + - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"] + + - name: pyarrow-tests + script: build-pyarrow.sh # [not win] + script: bld-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports: + - cudatoolkit + track_features: + {{ "- arrow-cuda" if cuda_enabled else "" }} + requirements: + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + - make # [unix] + - {{ compiler('c') }} + - {{ compiler('cxx') }} + # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + host: + - {{ pin_subpackage('arrow-cpp', exact=True) }} + - {{ pin_subpackage('pyarrow', exact=True) }} + - cython + - numpy + - python + - setuptools + - setuptools_scm + - six + run: + - {{ pin_subpackage('pyarrow', exact=True) }} + - python + run_constrained: + - arrow-cpp-proc * {{ build_ext }} + - cudatoolkit >=9.2 # [cuda_compiler_version != "None"] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python test files for Apache Arrow + + test: + commands: + - test -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix] + - if not exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win] + +about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: C++ and Python libraries for Apache Arrow + +extra: + recipe-maintainers: + - wesm + - xhochy + - leifwalsh + - jreback + - cpcloud + - pcmoritz + - robertnishihara + - siddharthteotia + - kou + - kszucs + - pitrou + - pearu + - nealrichardson + - jakirkham diff --git a/src/arrow/dev/tasks/conda-recipes/azure.clean.yml b/src/arrow/dev/tasks/conda-recipes/azure.clean.yml new file mode 100644 index 000000000..84f167812 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/azure.clean.yml @@ -0,0 +1,28 @@ +jobs: +- job: linux + pool: + vmImage: ubuntu-latest + timeoutInMinutes: 360 + + steps: + - script: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + displayName: Clone arrow + + - script: | + conda install -y -c conda-forge pandas anaconda-client packaging + displayName: Install requirements + + - script: | + {% if arrow.branch == 'master' %} + mkdir -p $HOME/.continuum/anaconda-client/tokens/ + echo $(CROSSBOW_ANACONDA_TOKEN) > $HOME/.continuum/anaconda-client/tokens/https%3A%2F%2Fapi.anaconda.org.token + {% endif %} + eval "$(conda shell.bash hook)" + conda activate base + python3 arrow/dev/tasks/conda-recipes/clean.py {% if arrow.branch == 'master' %}FORCE{% endif %} + displayName: Delete outdated packages + diff --git a/src/arrow/dev/tasks/conda-recipes/azure.linux.yml b/src/arrow/dev/tasks/conda-recipes/azure.linux.yml new file mode 100755 index 000000000..c05d284d2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/azure.linux.yml @@ -0,0 +1,38 @@ +{% import 'macros.jinja' as macros with context %} + +jobs: +- job: linux + pool: + vmImage: ubuntu-latest + timeoutInMinutes: 360 + + variables: + CONFIG: {{ config }} + R_CONFIG: {{ r_config|default("") }} + ARROW_VERSION: {{ arrow.no_rc_version }} + UPLOAD_PACKAGES: False + + steps: + # configure qemu binfmt-misc running. This allows us to run docker containers + # embedded qemu-static + - script: | + docker run --rm --privileged multiarch/qemu-user-static:register --reset --credential yes + ls /proc/sys/fs/binfmt_misc/ + displayName: Configure binfmt_misc + condition: not(startsWith(variables['CONFIG'], 'linux_64')) + + {{ macros.azure_checkout_arrow() }} + + - task: CondaEnvironment@1 + inputs: + packageSpecs: 'anaconda-client shyaml' + installOptions: '-c conda-forge' + updateConda: false + + - script: | + mkdir build_artifacts + CI=azure arrow/dev/tasks/conda-recipes/run_docker_build.sh $(pwd)/build_artifacts + displayName: Run docker build + + {{ macros.azure_upload_releases("build_artifacts/*/*.tar.bz2") }} + {{ macros.azure_upload_anaconda("build_artifacts/*/*.tar.bz2") }} diff --git a/src/arrow/dev/tasks/conda-recipes/azure.osx.yml b/src/arrow/dev/tasks/conda-recipes/azure.osx.yml new file mode 100755 index 000000000..99bb76ba5 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/azure.osx.yml @@ -0,0 +1,83 @@ +{% import 'macros.jinja' as macros with context %} + +jobs: +- job: osx + pool: + vmImage: macOS-10.14 + timeoutInMinutes: 360 + variables: + CONFIG: {{ config }} + R_CONFIG: {{ r_config|default("") }} + ARROW_VERSION: {{ arrow.no_rc_version }} + UPLOAD_PACKAGES: False + steps: + - bash: | + echo "##vso[task.prependpath]$CONDA/bin" + sudo chown -R $USER $CONDA + displayName: Add conda to PATH + + - script: | + source activate base + conda config --set channel_priority strict + conda install -n base -c conda-forge --quiet --yes conda-forge-ci-setup=3 conda-build + displayName: 'Add conda-forge-ci-setup=3' + + - script: | + echo "Removing homebrew from Azure to avoid conflicts." + /usr/bin/sudo mangle_homebrew + /usr/bin/sudo -k + displayName: Mangle homebrew + + {{ macros.azure_checkout_arrow() }} + + - script: | + source activate base + echo "Configuring conda." + + setup_conda_rc ./ ./ ./.ci_support/${CONFIG}.yaml + export CI=azure + source run_conda_forge_build_setup + conda update --yes --quiet --override-channels -c conda-forge --all + displayName: Configure conda and conda-build + workingDirectory: arrow/dev/tasks/conda-recipes + env: + OSX_FORCE_SDK_DOWNLOAD: "1" + + - script: | + source activate base + mangle_compiler ./ ./ ./.ci_support/${CONFIG}.yaml + workingDirectory: arrow/dev/tasks/conda-recipes + displayName: Mangle compiler + + - script: | + source activate base + make_build_number ./ ./ ./.ci_support/${CONFIG}.yaml + workingDirectory: arrow/dev/tasks/conda-recipes + displayName: Generate build number clobber file + + - script: | + source activate base + set +x + if [[ "${CONFIG}" == osx_arm* ]]; then + EXTRA_CB_OPTIONS="${EXTRA_CB_OPTIONS:-} --no-test" + fi + conda build arrow-cpp \ + -m ./.ci_support/${CONFIG}.yaml \ + --clobber-file ./.ci_support/clobber_${CONFIG}.yaml \ + ${EXTRA_CB_OPTIONS:-} \ + --output-folder ./build_artifacts + + if [ ! -z "${R_CONFIG}" ]; then + conda build r-arrow \ + -m ./.ci_support/r/${R_CONFIG}.yaml \ + --output-folder ./build_artifacts + fi + workingDirectory: arrow/dev/tasks/conda-recipes + displayName: Build recipes + + - script: | + sudo mv /usr/local/conda_mangled/* /usr/local/ + displayName: Unmangle homebrew + + {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }} + {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }} diff --git a/src/arrow/dev/tasks/conda-recipes/azure.win.yml b/src/arrow/dev/tasks/conda-recipes/azure.win.yml new file mode 100755 index 000000000..422e2f0e9 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/azure.win.yml @@ -0,0 +1,77 @@ +{% import 'macros.jinja' as macros with context %} + +jobs: +- job: win + pool: + vmImage: vs2017-win2016 + timeoutInMinutes: 360 + variables: + CONFIG: {{ config }} + R_CONFIG: {{ r_config|default("") }} + ARROW_VERSION: {{ arrow.no_rc_version }} + CONDA_BLD_PATH: D:\\bld\\ + UPLOAD_PACKAGES: False + + steps: + - script: | + choco install vcpython27 -fdv -y --debug + condition: contains(variables['CONFIG'], 'vs2008') + displayName: Install vcpython27.msi (if needed) + + - powershell: | + Set-PSDebug -Trace 1 + $batchcontent = @" + ECHO ON + SET vcpython=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0 + DIR "%vcpython%" + CALL "%vcpython%\vcvarsall.bat" %* + "@ + $batchDir = "C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC" + $batchPath = "$batchDir" + "\vcvarsall.bat" + New-Item -Path $batchPath -ItemType "file" -Force + Set-Content -Value $batchcontent -Path $batchPath + Get-ChildItem -Path $batchDir + Get-ChildItem -Path ($batchDir + '\..') + condition: contains(variables['CONFIG'], 'vs2008') + displayName: Patch vs2008 (if needed) + + - task: CondaEnvironment@1 + inputs: + packageSpecs: 'python=3.6 conda-build conda conda-forge::conda-forge-ci-setup=3 pip' # Optional + installOptions: "-c conda-forge" + updateConda: true + displayName: Install conda-build and activate environment + - script: set PYTHONUNBUFFERED=1 + + {{ macros.azure_checkout_arrow()|indent(2) }} + + # Configure the VM + - script: setup_conda_rc .\ .\ .\.ci_support\%CONFIG%.yaml + workingDirectory: arrow\dev\tasks\conda-recipes + + # Configure the VM. + - script: | + set "CI=azure" + call activate base + run_conda_forge_build_setup + displayName: conda-forge build setup + workingDirectory: arrow\dev\tasks\conda-recipes + + - script: | + conda.exe build arrow-cpp parquet-cpp -m .ci_support\%CONFIG%.yaml + displayName: Build recipe + workingDirectory: arrow\dev\tasks\conda-recipes + env: + PYTHONUNBUFFERED: 1 + condition: not(contains(variables['CONFIG'], 'vs2008')) + + - script: | + conda.exe build r-arrow -m .ci_support\r\%R_CONFIG%.yaml + displayName: Build recipe + workingDirectory: arrow\dev\tasks\conda-recipes + env: + PYTHONUNBUFFERED: 1 + condition: contains(variables['R_CONFIG'], 'win') + + {{ macros.azure_upload_releases("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }} + {{ macros.azure_upload_anaconda("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }} diff --git a/src/arrow/dev/tasks/conda-recipes/azure.yml b/src/arrow/dev/tasks/conda-recipes/azure.yml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/azure.yml diff --git a/src/arrow/dev/tasks/conda-recipes/build_steps.sh b/src/arrow/dev/tasks/conda-recipes/build_steps.sh new file mode 100755 index 000000000..25864c08a --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/build_steps.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI +# setup. The next time this is updated to the current version on conda-forge, +# you will also make this additions afterwards. + +# PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here +# will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent +# changes to this script, consider a proposal to conda-smithy so that other feedstocks can also +# benefit from the improvement. + +set -xeuo pipefail + +output_dir=${1} + +export PYTHONUNBUFFERED=1 +export FEEDSTOCK_ROOT="${FEEDSTOCK_ROOT:-/home/conda/feedstock_root}" +export CI_SUPPORT="${FEEDSTOCK_ROOT}/.ci_support" +export CONFIG_FILE="${CI_SUPPORT}/${CONFIG}.yaml" + +cat >~/.condarc <<CONDARC + +conda-build: + root-dir: ${output_dir} + +CONDARC + +conda install --yes --quiet conda-forge-ci-setup=3 conda-build pip -c conda-forge + +# set up the condarc +setup_conda_rc "${FEEDSTOCK_ROOT}" "${FEEDSTOCK_ROOT}" "${CONFIG_FILE}" + +source run_conda_forge_build_setup + +# make the build number clobber +make_build_number "${FEEDSTOCK_ROOT}" "${FEEDSTOCK_ROOT}" "${CONFIG_FILE}" + +export CONDA_BLD_PATH="${output_dir}" + +conda build \ + "${FEEDSTOCK_ROOT}/arrow-cpp" \ + "${FEEDSTOCK_ROOT}/parquet-cpp" \ + -m "${CI_SUPPORT}/${CONFIG}.yaml" \ + --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml" \ + --output-folder "${output_dir}" + +if [ ! -z "${R_CONFIG:-}" ]; then + conda build \ + "${FEEDSTOCK_ROOT}/r-arrow" \ + -m "${CI_SUPPORT}/r/${R_CONFIG}.yaml" \ + --output-folder "${output_dir}" +fi + + +touch "${output_dir}/conda-forge-build-done-${CONFIG}" diff --git a/src/arrow/dev/tasks/conda-recipes/clean.py b/src/arrow/dev/tasks/conda-recipes/clean.py new file mode 100644 index 000000000..bd31c875d --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/clean.py @@ -0,0 +1,80 @@ +from subprocess import check_output, check_call +from typing import List + +import json +import os +import pandas as pd +import sys + +from packaging.version import Version + + +VERSIONS_TO_KEEP = 5 +PACKAGES = [ + "arrow-cpp", + "arrow-cpp-proc", + "parquet-cpp", + "pyarrow", + "pyarrow-tests", + "r-arrow", +] +PLATFORMS = [ + "linux-64", + "linux-aarch64", + "osx-64", + "win-64", +] +EXCLUDED_PATTERNS = [ + ["r-arrow", "linux-aarch64"], +] + + +def packages_to_delete(package_name: str, platform: str) -> List[str]: + env = os.environ.copy() + env["CONDA_SUBDIR"] = platform + pkgs_json = check_output( + [ + "conda", + "search", + "--json", + "-c", + "arrow-nightlies", + "--override-channels", + package_name, + ], + env=env, + ) + pkgs = pd.DataFrame(json.loads(pkgs_json)[package_name]) + pkgs["version"] = pkgs["version"].map(Version) + pkgs["py_version"] = pkgs["build"].str.slice(0, 4) + + to_delete = [] + + for (subdir, python), group in pkgs.groupby(["subdir", "py_version"]): + group = group.sort_values(by="version", ascending=False) + + if len(group) > VERSIONS_TO_KEEP: + del_candidates = group[VERSIONS_TO_KEEP:] + to_delete += ( + f"arrow-nightlies/{package_name}/" + + del_candidates["version"].astype(str) + + del_candidates["url"].str.replace( + "https://conda.anaconda.org/arrow-nightlies", "" + ) + ).to_list() + + return to_delete + + +if __name__ == "__main__": + to_delete = [] + for package in PACKAGES: + for platform in PLATFORMS: + if [package, platform] in EXCLUDED_PATTERNS: + continue + to_delete += packages_to_delete(package, platform) + + for name in to_delete: + print(f"Deleting {name} …") + if "FORCE" in sys.argv: + check_call(["anaconda", "remove", "-f", name]) diff --git a/src/arrow/dev/tasks/conda-recipes/conda-forge.yml b/src/arrow/dev/tasks/conda-recipes/conda-forge.yml new file mode 100644 index 000000000..4c07b5dd3 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/conda-forge.yml @@ -0,0 +1 @@ +channel_priority: strict diff --git a/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml b/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml new file mode 100644 index 000000000..5de06c32b --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/parquet-cpp/meta.yaml @@ -0,0 +1,51 @@ +# ARROW-3229: this is a meta-package to prevent conflicts in the future + +{% set parquet_version = "1.5.1" %} + +package: + name: parquet-cpp + version: {{ parquet_version }} + +build: + number: 0 + skip: true # [win32] + skip: true # [win and py<35] + +requirements: + host: + # NOTE: in the upstream feedstock use >= instead of = + - arrow-cpp ={{ ARROW_VERSION }} + run: + - arrow-cpp ={{ ARROW_VERSION }} + +test: + commands: + # headers + - test -f $PREFIX/include/parquet/api/reader.h # [unix] + - if not exist %LIBRARY_INC%\\parquet\\api\\reader.h exit 1 # [win] + + # shared + - test -f $PREFIX/lib/libparquet.so # [linux] + - test -f $PREFIX/lib/libparquet.dylib # [osx] + - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win] + + # absence of static libraries + - test ! -f $PREFIX/lib/libparquet.a # [unix] + - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win] + +about: + home: http://github.com/apache/arrow + license: Apache 2.0 + summary: 'C++ libraries for the Apache Parquet file format' + +extra: + recipe-maintainers: + - wesm + - xhochy + - leifwalsh + - jreback + - cpcloud + - siddharthteotia + - kou + - kszucs + - pitrou diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat b/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat new file mode 100644 index 000000000..a193ddc0a --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/bld.bat @@ -0,0 +1,9 @@ +bash %RECIPE_DIR%/build_win.sh
+IF %ERRORLEVEL% NEQ 0 exit 1
+cp %RECIPE_DIR%/configure.win r
+IF %ERRORLEVEL% NEQ 0 exit 1
+cp %RECIPE_DIR%/install.libs.R r/src
+IF %ERRORLEVEL% NEQ 0 exit 1
+set "MAKEFLAGS=-j%CPU_COUNT%"
+"%R%" CMD INSTALL --build r
+IF %ERRORLEVEL% NEQ 0 exit 1
diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh b/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh new file mode 100644 index 000000000..e868189a2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +export DISABLE_AUTOBREW=1 +$R CMD INSTALL --build r/. diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh b/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh new file mode 100755 index 000000000..22c07d6e0 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/build_win.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -exuo pipefail + + +# Rename arrow.dll to lib_arrow.dll to avoid conflicts with the arrow-cpp arrow.dll +sed -i -e 's/void R_init_arrow/__declspec(dllexport) void R_init_lib_arrow/g' r/src/arrowExports.cpp +sed -i -e 's/useDynLib(arrow/useDynLib(lib_arrow/g' r/NAMESPACE diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win b/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win new file mode 100755 index 000000000..0b11d1335 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/configure.win @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +# Remove the -I../inst/include/ when unvendoring cpp11 in ARROW-13610 +echo "PKG_CPPFLAGS=-DNDEBUG -I\"${LIBRARY_PREFIX}/include\" -I\"${PREFIX}/include\" -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_S3 -DARROW_R_WITH_JSON -I../inst/include/" > src/Makevars.win +echo "PKG_CXXFLAGS=\$(CXX_VISIBILITY)" >> src/Makevars.win +echo 'CXX_STD=CXX11' >> src/Makevars.win +echo "PKG_LIBS=-L\"${LIBRARY_PREFIX}/lib\" -larrow_dataset -lparquet -larrow" >> src/Makevars.win diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R b/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R new file mode 100644 index 000000000..005bbe16b --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/install.libs.R @@ -0,0 +1,5 @@ +src_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/") +dest_dir <- file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/") + +dir.create(file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/"), recursive = TRUE, showWarnings = FALSE) +file.copy(file.path(src_dir, "arrow.dll", fsep = "/"), file.path(dest_dir, "lib_arrow.dll", fsep = "/")) diff --git a/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml b/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml new file mode 100644 index 000000000..5f0643bef --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/r-arrow/meta.yaml @@ -0,0 +1,66 @@ +{% set version = ARROW_VERSION %} +{% set posix = 'm2-' if win else '' %} +{% set native = 'm2w64-' if win else '' %} + +package: + name: r-arrow + version: {{ version|replace("-", "_") }} + +source: + path: ../../../../ + +build: + merge_build_host: true # [win] + number: 0 + rpaths: + - lib/R/lib/ + - lib/ + +requirements: + build: + - {{ compiler('c') }} # [not win] + - {{ compiler('cxx') }} # [not win] + - {{ compiler('r_clang') }} # [win] + - pkg-config # [not win] + - {{ posix }}make + - {{ posix }}sed # [win] + - {{ posix }}coreutils # [win] + - {{ posix }}filesystem # [win] + - {{ posix }}zip # [win] + host: + # Needs to be here, otherwise merge_build_host runs into issues + - pkg-config # [win] + - r-base + - arrow-cpp {{ version }} + - r-cpp11 + - r-r6 + - r-assertthat + - r-bit64 + - r-purrr + - r-rlang + - r-tidyselect + run: + - r-base + - r-r6 + - r-assertthat + - r-bit64 + - r-purrr + - r-rlang + - r-tidyselect + +test: + commands: + - $R -e "library('arrow')" # [not win] + - "\"%R%\" -e \"library('arrow'); data(mtcars); write_parquet(mtcars, 'test.parquet')\"" # [win] + +about: + home: https://github.com/apache/arrow + license: Apache-2.0 + license_file: LICENSE.txt + summary: R Integration to 'Apache' 'Arrow'. + license_family: APACHE + +extra: + recipe-maintainers: + - conda-forge/r + - conda-forge/arrow-cpp diff --git a/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh b/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh new file mode 100755 index 000000000..7645c43e2 --- /dev/null +++ b/src/arrow/dev/tasks/conda-recipes/run_docker_build.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI +# setup. The next time this is updated to the current version on conda-forge, +# you will also make this additions afterwards. + +# PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here +# will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent +# changes to this script, consider a proposal to conda-smithy so that other feedstocks can also +# benefit from the improvement. + +set -xeo pipefail + +build_dir=${1} + +THISDIR="$( cd "$( dirname "$0" )" >/dev/null && pwd )" +ARROW_ROOT=$(cd "$THISDIR/../../.."; pwd;) +FEEDSTOCK_ROOT=$THISDIR + +docker info + +# In order for the conda-build process in the container to write to the mounted +# volumes, we need to run with the same id as the host machine, which is +# normally the owner of the mounted volumes, or at least has write permission +export HOST_USER_ID=$(id -u) +# Check if docker-machine is being used (normally on OSX) and get the uid from +# the VM +if hash docker-machine 2> /dev/null && docker-machine active > /dev/null; then + export HOST_USER_ID=$(docker-machine ssh $(docker-machine active) id -u) +fi + +if [ -z "$CONFIG" ]; then + set +x + FILES=`ls .ci_support/linux_*` + CONFIGS="" + for file in $FILES; do + CONFIGS="${CONFIGS}'${file:12:-5}' or "; + done + echo "Need to set CONFIG env variable. Value can be one of ${CONFIGS:0:-4}" + exit 1 +fi + +if [ -z "${DOCKER_IMAGE}" ]; then + SHYAML_INSTALLED="$(shyaml -h || echo NO)" + if [ "${SHYAML_INSTALLED}" == "NO" ]; then + echo "WARNING: DOCKER_IMAGE variable not set and shyaml not installed. Falling back to condaforge/linux-anvil-comp7" + DOCKER_IMAGE="condaforge/linux-anvil-comp7" + else + DOCKER_IMAGE="$(cat "${FEEDSTOCK_ROOT}/.ci_support/${CONFIG}.yaml" | shyaml get-value docker_image.0 condaforge/linux-anvil-comp7 )" + fi +fi + +mkdir -p "${build_dir}" +DONE_CANARY="${build_dir}/conda-forge-build-done-${CONFIG}" +rm -f "$DONE_CANARY" + +if [ -z "${CI}" ]; then + DOCKER_RUN_ARGS="-it " +fi + +export UPLOAD_PACKAGES="${UPLOAD_PACKAGES:-True}" +docker run ${DOCKER_RUN_ARGS} \ + --shm-size=2G \ + -v "${ARROW_ROOT}":/arrow:rw,z \ + -v "${build_dir}":/build:rw \ + -e FEEDSTOCK_ROOT="/arrow/dev/tasks/conda-recipes" \ + -e CONFIG \ + -e R_CONFIG \ + -e HOST_USER_ID \ + -e UPLOAD_PACKAGES \ + -e ARROW_VERSION \ + -e CI \ + $DOCKER_IMAGE \ + bash /arrow/dev/tasks/conda-recipes/build_steps.sh /build + +# verify that the end of the script was reached +test -f "$DONE_CANARY" diff --git a/src/arrow/dev/tasks/cpp-examples/github.linux.yml b/src/arrow/dev/tasks/cpp-examples/github.linux.yml new file mode 100644 index 000000000..717d3c443 --- /dev/null +++ b/src/arrow/dev/tasks/cpp-examples/github.linux.yml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + test: + name: C++ Example + runs-on: ubuntu-latest + steps: + - name: Checkout Arrow + shell: bash + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Run + shell: bash + run: | + cd arrow/cpp/examples/{{ type }} + docker-compose run --rm {{ run }} diff --git a/src/arrow/dev/tasks/docker-tests/azure.linux.yml b/src/arrow/dev/tasks/docker-tests/azure.linux.yml new file mode 100644 index 000000000..b8f1151f7 --- /dev/null +++ b/src/arrow/dev/tasks/docker-tests/azure.linux.yml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +jobs: +- job: linux + pool: + vmImage: ubuntu-latest + timeoutInMinutes: 360 + {% if env is defined %} + variables: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + + steps: + - task: DockerInstaller@0 + displayName: Docker Installer + inputs: + dockerVersion: 17.09.0-ce + releaseType: stable + + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.6' + + - script: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + displayName: Clone arrow + + - script: pip install -e arrow/dev/archery[docker] + displayName: Setup Archery + + - script: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }} + displayName: Execute Docker Build diff --git a/src/arrow/dev/tasks/docker-tests/circle.linux.yml b/src/arrow/dev/tasks/docker-tests/circle.linux.yml new file mode 100644 index 000000000..3ddb93dc9 --- /dev/null +++ b/src/arrow/dev/tasks/docker-tests/circle.linux.yml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: 2 +jobs: + build: + machine: + image: ubuntu-1604:202004-01 + {%- if env is defined %} + environment: + {%- for key, value in env.items() %} + {{ key }}: {{ value }} + {%- endfor %} + {%- endif %} + steps: + - run: | + docker -v + docker-compose -v + - run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - run: + name: Execute Docker Build + command: | + pyenv versions + pyenv global 3.6.10 + pip install -e arrow/dev/archery[docker] + archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }} + no_output_timeout: "1h" + +workflows: + version: 2 + build: + jobs: + - build diff --git a/src/arrow/dev/tasks/docker-tests/github.linux.yml b/src/arrow/dev/tasks/docker-tests/github.linux.yml new file mode 100644 index 000000000..1faf482cb --- /dev/null +++ b/src/arrow/dev/tasks/docker-tests/github.linux.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + test: + name: Docker Test + runs-on: ubuntu-latest + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: "{{ value }}" + {% endfor %} + {% endif %} + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + + - name: Execute Docker Build + shell: bash + run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }} {{ command|default("") }} + + {% if '-r' in image %} + - name: Dump R install logs + run: cat arrow/r/check/arrow.Rcheck/00install.out + continue-on-error: true + if: always() + {% endif %} + + {% if arrow.branch == 'master' %} + {{ macros.github_login_dockerhub()|indent }} + - name: Push Docker Image + shell: bash + run: archery docker push {{ image }} + {% endif %} diff --git a/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb b/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb new file mode 100644 index 000000000..5f39666bd --- /dev/null +++ b/src/arrow/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -0,0 +1,69 @@ +class ApacheArrow < Formula + desc "Columnar in-memory analytics layer designed to accelerate big data" + homepage "https://arrow.apache.org/" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.1/apache-arrow-6.0.1.tar.gz" + sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" + license "Apache-2.0" + head "https://github.com/apache/arrow.git" + + depends_on "boost" => :build + depends_on "cmake" => :build + depends_on "llvm" => :build + depends_on "brotli" + depends_on "glog" + depends_on "grpc" + depends_on "lz4" + depends_on "numpy" + depends_on "openssl@1.1" + depends_on "protobuf" + depends_on "python@3.9" + depends_on "rapidjson" + depends_on "snappy" + depends_on "thrift" + depends_on "zstd" + + def install + ENV.cxx11 + # link against system libc++ instead of llvm provided libc++ + ENV.remove "HOMEBREW_LIBRARY_PATHS", Formula["llvm"].opt_lib + args = %W[ + -DARROW_FLIGHT=ON + -DARROW_GANDIVA=ON + -DARROW_JEMALLOC=ON + -DARROW_MIMALLOC=ON + -DARROW_ORC=ON + -DARROW_PARQUET=ON + -DARROW_PLASMA=ON + -DARROW_PROTOBUF_USE_SHARED=ON + -DARROW_PYTHON=ON + -DARROW_WITH_BZ2=ON + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_ZSTD=ON + -DARROW_WITH_LZ4=ON + -DARROW_WITH_SNAPPY=ON + -DARROW_WITH_BROTLI=ON + -DARROW_INSTALL_NAME_RPATH=OFF + -DPython3_EXECUTABLE=#{Formula["python@3.9"].bin/"python3"} + ] + # Re-enable -DARROW_S3=ON and add back aws-sdk-cpp to depends_on in ARROW-6437 + + mkdir "build" + cd "build" do + system "cmake", "../cpp", *std_cmake_args, *args + system "make" + system "make", "install" + end + end + + test do + (testpath/"test.cpp").write <<~EOS + #include "arrow/api.h" + int main(void) { + arrow::int64(); + return 0; + } + EOS + system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test" + system "./test" + end +end diff --git a/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb new file mode 100644 index 000000000..2a77b01fc --- /dev/null +++ b/src/arrow/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# https://github.com/autobrew/homebrew-core/blob/master/Formula/apache-arrow.rb +class ApacheArrow < Formula + desc "Columnar in-memory analytics layer designed to accelerate big data" + homepage "https://arrow.apache.org/" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.1/apache-arrow-6.0.1.tar.gz" + sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" + head "https://github.com/apache/arrow.git" + + bottle do + cellar :any + sha256 "a55211ba6f464681b7ca1b48defdad9cfbe1cf6fad8ff9ec875dc5a3c8f3c5ed" => :el_capitan_or_later + root_url "https://autobrew.github.io/bottles" + end + + # NOTE: if you add something here, be sure to add to PKG_LIBS in r/tools/autobrew + depends_on "boost" => :build + depends_on "cmake" => :build + depends_on "aws-sdk-cpp" + depends_on "lz4" + depends_on "snappy" + depends_on "thrift" + depends_on "zstd" + + def install + ENV.cxx11 + args = %W[ + -DARROW_BUILD_SHARED=OFF + -DARROW_BUILD_UTILITIES=ON + -DARROW_COMPUTE=ON + -DARROW_CSV=ON + -DARROW_DATASET=ON + -DARROW_FILESYSTEM=ON + -DARROW_HDFS=OFF + -DARROW_JEMALLOC=ON + -DARROW_JSON=ON + -DARROW_MIMALLOC=ON + -DARROW_PARQUET=ON + -DARROW_PYTHON=OFF + -DARROW_S3=ON + -DARROW_USE_GLOG=OFF + -DARROW_VERBOSE_THIRDPARTY_BUILD=ON + -DARROW_WITH_LZ4=ON + -DARROW_WITH_SNAPPY=ON + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_ZSTD=ON + -DCMAKE_UNITY_BUILD=OFF + -DPARQUET_BUILD_EXECUTABLES=ON + -DLZ4_HOME=#{Formula["lz4"].prefix} + -DTHRIFT_HOME=#{Formula["thrift"].prefix} + ] + + mkdir "build" + cd "build" do + system "cmake", "../cpp", *std_cmake_args, *args + system "make" + system "make", "install" + end + end + + test do + (testpath/"test.cpp").write <<~EOS + #include "arrow/api.h" + int main(void) { + arrow::int64(); + return 0; + } + EOS + system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test" + system "./test" + end +end diff --git a/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml b/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml new file mode 100644 index 000000000..232cc38a9 --- /dev/null +++ b/src/arrow/dev/tasks/homebrew-formulae/github.macos.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + autobrew: + name: "Autobrew" + runs-on: macOS-latest + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Configure homebrew formula for testing + env: + ARROW_FORMULA: ./arrow/dev/tasks/homebrew-formulae/{{ formula }} + run: | + # Pin the current commit in the formula to test so that we're not always pulling from master + sed -i.bak -E -e 's@https://github.com/apache/arrow.git"$@{{ arrow.remote }}.git", revision: "{{ arrow.head }}"@' $ARROW_FORMULA && rm -f $ARROW_FORMULA.bak + # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one + sed -i.bak -E -e 's@.git.git@.git@' $ARROW_FORMULA && rm -f $ARROW_FORMULA.bak + brew update + brew --version + brew unlink python@2 || true + brew config + brew doctor || true + cp $ARROW_FORMULA $(brew --repository homebrew/core)/Formula/apache-arrow.rb + - name: Test formula + run: | + brew install -v --HEAD apache-arrow + brew test apache-arrow + brew audit --strict apache-arrow diff --git a/src/arrow/dev/tasks/java-jars/README.md b/src/arrow/dev/tasks/java-jars/README.md new file mode 100644 index 000000000..1d61662d4 --- /dev/null +++ b/src/arrow/dev/tasks/java-jars/README.md @@ -0,0 +1,29 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +# Jars. + +This directory is responsible to generate the jar files for the Arrow components that depend on C++ shared libraries to execute. + +The Arrow C++ libraries are compiled both on MacOS and Linux distributions, with their dependencies linked statically, and they are added +in the jars at the end, so the file can be used on both systems. + +## Linux Docker Image +To compile the C++ libraries in Linux, a docker image is used. +It is created used the **ci/docker/java-bundled-jars.dockerfile** file. +If it is necessary to add any new dependency, you need to change that file.
\ No newline at end of file diff --git a/src/arrow/dev/tasks/java-jars/github.yml b/src/arrow/dev/tasks/java-jars/github.yml new file mode 100644 index 000000000..81d31dd4c --- /dev/null +++ b/src/arrow/dev/tasks/java-jars/github.yml @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + + build-cpp-ubuntu: + name: Build C++ Libs Ubuntu + runs-on: ubuntu-18.04 + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + - name: Build C++ Libs + run: archery docker run java-jni-manylinux-2014 + - name: Compress into single artifact + run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/ + - name: Upload Artifacts + uses: actions/upload-artifact@v2 + with: + name: ubuntu-shared-lib + path: arrow-shared-libs-linux.tar.gz + {% if arrow.branch == 'master' %} + {{ macros.github_login_dockerhub()|indent }} + - name: Push Docker Image + shell: bash + run: archery docker push java-jni-manylinux-2014 + {% endif %} + + build-cpp-macos: + name: Build C++ Libs MacOS + runs-on: macos-latest + env: + MACOSX_DEPLOYMENT_TARGET: "10.11" + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + - name: Install Dependencies + run: brew bundle --file=arrow/cpp/Brewfile + - name: Build C Data Interface lib + run: | + set -e + arrow/ci/scripts/java_cdata_build.sh \ + $GITHUB_WORKSPACE/arrow \ + $GITHUB_WORKSPACE/arrow/java-native-build \ + $GITHUB_WORKSPACE/arrow/java-dist + - name: Build C++ Libs + run: | + set -e + arrow/ci/scripts/java_jni_macos_build.sh \ + $GITHUB_WORKSPACE/arrow \ + $GITHUB_WORKSPACE/arrow/cpp-build \ + $GITHUB_WORKSPACE/arrow/java-dist + - name: Compress into single artifact + run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java-dist/ + - name: Upload Artifacts + uses: actions/upload-artifact@v2 + with: + name: macos-shared-lib + path: arrow-shared-libs-macos.tar.gz + + package-jars: + name: Build Jar Files + runs-on: macos-latest + needs: [build-cpp-macos, build-cpp-ubuntu] + steps: + {{ macros.github_checkout_arrow()|indent }} + - name: Download Linux C++ Libraries + uses: actions/download-artifact@v2 + with: + name: ubuntu-shared-lib + - name: Download MacOS C++ Library + uses: actions/download-artifact@v2 + with: + name: macos-shared-lib + - name: Descompress artifacts + run: | + tar -xvzf arrow-shared-libs-macos.tar.gz + tar -xvzf arrow-shared-libs-linux.tar.gz + - name: Test that Shared Libraries Exist + run: | + test -f arrow/java-dist/libarrow_cdata_jni.dylib + test -f arrow/java-dist/libarrow_dataset_jni.dylib + test -f arrow/java-dist/libgandiva_jni.dylib + test -f arrow/java-dist/libarrow_orc_jni.dylib + test -f arrow/java-dist/libarrow_cdata_jni.so + test -f arrow/java-dist/libarrow_dataset_jni.so + test -f arrow/java-dist/libarrow_orc_jni.so + test -f arrow/java-dist/libgandiva_jni.so + - name: Build Bundled Jar + run: | + set -e + pushd arrow/java + mvn versions:set -DnewVersion={{ arrow.no_rc_version }} + popd + arrow/ci/scripts/java_full_build.sh \ + $GITHUB_WORKSPACE/arrow \ + $GITHUB_WORKSPACE/arrow/java-dist + {{ macros.github_upload_releases(["arrow/java-dist/*.jar", "arrow/java-dist/*.pom"])|indent }} diff --git a/src/arrow/dev/tasks/linux-packages/.gitignore b/src/arrow/dev/tasks/linux-packages/.gitignore new file mode 100644 index 000000000..0e49a90c1 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/.gitignore @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/*/*.tar.gz +/*/apt/repositories/ +/*/apt/tmp/ +/*/apt/build.sh +/*/apt/env.sh +/*/yum/repositories/ +/*/yum/tmp/ +/*/yum/build.sh +/*/yum/env.sh +/apt/repositories/ +/yum/repositories/ diff --git a/src/arrow/dev/tasks/linux-packages/README.md b/src/arrow/dev/tasks/linux-packages/README.md new file mode 100644 index 000000000..cafcc04ed --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/README.md @@ -0,0 +1,40 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Linux packages for Apache Arrow C++ and GLib + +## Requirements + + * Ruby + * Docker + * Tools to build tar.gz for Apache Arrow C++ and GLib + +## How to build .deb packages + +```console +% rake version:update +% rake apt +``` + +## How to build .rpm packages + +```console +% rake version:update +% rake yum +``` diff --git a/src/arrow/dev/tasks/linux-packages/Rakefile b/src/arrow/dev/tasks/linux-packages/Rakefile new file mode 100644 index 000000000..58cd981fe --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/Rakefile @@ -0,0 +1,249 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "English" + +require_relative "../../release/binary-task" +require_relative "helper" + +packages = [ + "apache-arrow", + "apache-arrow-apt-source", + "apache-arrow-release", +] + + +namespace :apt do + desc "Build deb packages" + task :build do + packages.each do |package| + cd(package) do + ruby("-S", "rake", "apt:build") + end + end + end +end + +namespace :yum do + desc "Build RPM packages" + task :build do + packages.each do |package| + cd(package) do + ruby("-S", "rake", "yum:build") + end + end + end +end + +namespace :version do + desc "Update versions" + task :update do + packages.each do |package| + cd(package) do + ruby("-S", "rake", "version:update") + end + end + end +end + +namespace :docker do + desc "Pull built images" + task :pull do + packages.each do |package| + cd(package) do + ruby("-S", "rake", "docker:pull") + end + end + end + + desc "Push built images" + task :push do + packages.each do |package| + cd(package) do + ruby("-S", "rake", "docker:push") + end + end + end +end + + +class LocalBinaryTask < BinaryTask + include Helper::ApacheArrow + + def initialize(packages) + @packages = packages + super() + end + + def define + define_apt_test_task + define_yum_test_task + end + + private + def latest_commit_time(git_directory) + cd(git_directory) do + return Time.iso8601(`git log -n 1 --format=%aI`.chomp).utc + end + end + + def version + @version ||= detect_version(detect_release_time) + end + + def resolve_docker_image(target) + image = "" + target = target.gsub(/\Aamazon-linux/, "amazonlinux") + case target + when /-(?:arm64|aarch64)\z/ + target = $PREMATCH + image << "arm64v8/" + end + image << target.gsub(/-/, ":") + end + + def verify(target) + verify_command_line = [ + "docker", + "run", + "--rm", + "--log-driver", "none", + "--volume", "#{File.expand_path(arrow_source_dir)}:/arrow:delegated", + ] + if $stdin.tty? + verify_command_line << "--interactive" + verify_command_line << "--tty" + else + verify_command_line.concat(["--attach", "STDOUT"]) + verify_command_line.concat(["--attach", "STDERR"]) + end + verify_command_line << resolve_docker_image(target) + case target + when /\Adebian-/, /\Aubuntu-/ + verify_command_line << "/arrow/dev/release/verify-apt.sh" + else + verify_command_line << "/arrow/dev/release/verify-yum.sh" + end + verify_command_line << version + verify_command_line << "local" + sh(*verify_command_line) + end + + def apt_test_targets + targets = (ENV["APT_TARGETS"] || "").split(",") + targets = apt_test_targets_default if targets.empty? + targets + end + + def apt_test_targets_default + # Disable arm64 targets by default for now + # because they require some setups on host. + [ + "debian-buster", + # "debian-buster-arm64", + "debian-bullseye", + # "debian-bullseye-arm64", + "ubuntu-xenial", + # "ubuntu-xenial-arm64", + "ubuntu-bionic", + # "ubuntu-bionic-arm64", + "ubuntu-focal", + # "ubuntu-focal-arm64", + "ubuntu-hirsute", + # "ubuntu-hirsute-arm64", + "ubuntu-impish", + # "ubuntu-impish-arm64", + ] + end + + def define_apt_test_task + namespace :apt do + desc "Test deb packages" + task :test do + repositories_dir = "apt/repositories" + rm_rf(repositories_dir) + @packages.each do |package| + package_repositories = "#{package}/apt/repositories" + next unless File.exist?(package_repositories) + sh("rsync", "-a", "#{package_repositories}/", repositories_dir) + end + Dir.glob("#{repositories_dir}/ubuntu/pool/*") do |code_name_dir| + universe_dir = "#{code_name_dir}/universe" + next unless File.exist?(universe_dir) + mv(universe_dir, "#{code_name_dir}/main") + end + base_dir = "nonexistent" + merged_dir = "apt/merged" + apt_update(base_dir, repositories_dir, merged_dir) + Dir.glob("#{merged_dir}/*/dists/*") do |dists_code_name_dir| + prefix = dists_code_name_dir.split("/")[-3..-1].join("/") + mv(Dir.glob("#{dists_code_name_dir}/*Release*"), + "#{repositories_dir}/#{prefix}") + end + apt_test_targets.each do |target| + verify(target) + end + end + end + end + + def yum_test_targets + targets = (ENV["YUM_TARGETS"] || "").split(",") + targets = yum_test_targets_default if targets.empty? + targets + end + + def yum_test_targets_default + # Disable aarch64 targets by default for now + # because they require some setups on host. + [ + "almalinux-8", + # "almalinux-8-aarch64", + "amazon-linux-2", + # "amazon-linux-2-aarch64", + "centos-7", + "centos-8", + # "centos-8-aarch64", + ] + end + + def define_yum_test_task + namespace :yum do + desc "Test RPM packages" + task :test do + repositories_dir = "yum/repositories" + rm_rf(repositories_dir) + @packages.each do |package| + package_repositories = "#{package}/yum/repositories" + next unless File.exist?(package_repositories) + sh("rsync", "-a", "#{package_repositories}/", repositories_dir) + end + rpm_sign(repositories_dir) + base_dir = "nonexistent" + yum_update(base_dir, repositories_dir) + yum_test_targets.each do |target| + verify(target) + end + end + end + end +end + +local_binary_task = LocalBinaryTask.new(packages) +local_binary_task.define diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile new file mode 100644 index 000000000..210fa951e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile @@ -0,0 +1,64 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "../helper" +require_relative "../package-task" + +class ApacheArrowAptSourcePackageTask < PackageTask + include Helper::ApacheArrow + + def initialize + release_time = detect_release_time + super("apache-arrow-apt-source", + detect_version(release_time), + release_time, + :rc_build_type => :release) + end + + private + def define_archive_task + file @archive_name do + rm_rf(@archive_base_name) + mkdir(@archive_base_name) + download("https://downloads.apache.org/arrow/KEYS", + "#{@archive_base_name}/KEYS") + sh("tar", "czf", @archive_name, @archive_base_name) + rm_rf(@archive_base_name) + end + + if deb_archive_name != @archive_name + file deb_archive_name => @archive_name do + if @archive_base_name == deb_archive_base_name + cp(@archive_name, deb_archive_name) + else + sh("tar", "xf", @archive_name) + mv(@archive_base_name, deb_archive_base_name) + sh("tar", "czf", deb_archive_name, deb_archive_base_name) + end + end + end + end + + def enable_yum? + false + end +end + +task = ApacheArrowAptSourcePackageTask.new +task.define diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile new file mode 100644 index 000000000..f9541efde --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bookworm/Dockerfile @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM debian:bookworm + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile new file mode 100644 index 000000000..b0842a0c0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM debian:bullseye + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile new file mode 100644 index 000000000..0d37f5dee --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM debian:buster + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile new file mode 100644 index 000000000..53e11fb7e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu:bionic + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile new file mode 100644 index 000000000..dc902d14d --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu:focal + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile new file mode 100644 index 000000000..8b6fd7f0e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu:hirsute + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile new file mode 100644 index 000000000..640cd715d --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-impish/Dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu:impish + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + debhelper \ + devscripts \ + fakeroot \ + gnupg \ + lsb-release && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install new file mode 100644 index 000000000..7bcb2ecc9 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install @@ -0,0 +1,2 @@ +etc/apt/sources.list.d/* +usr/share/keyrings/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog new file mode 100644 index 000000000..27e14c802 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -0,0 +1,11 @@ +apache-arrow-apt-source (6.0.1-1) unstable; urgency=low + + * New upstream release. + + -- Sutou Kouhei <kou@clear-code.com> Wed, 10 Nov 2021 02:04:45 -0000 + +apache-arrow-apt-source (6.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 21 Oct 2021 19:01:54 -0000 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat new file mode 100644 index 000000000..ec635144f --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat @@ -0,0 +1 @@ +9 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control new file mode 100644 index 000000000..f54d52f98 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control @@ -0,0 +1,23 @@ +Source: apache-arrow-apt-source +Section: misc +Priority: important +Maintainer: Apache Arrow Developers <dev@arrow.apache.org> +Build-Depends: + debhelper (>= 9), + gnupg, + lsb-release +Standards-Version: 3.9.7 +Homepage: https://arrow.apache.org/ + +Package: apache-arrow-apt-source +Section: misc +Architecture: all +Replaces: apache-arrow-archive-keyring +Breaks: apache-arrow-archive-keyring +Depends: + ${misc:Depends}, + apt-transport-https, + gnupg +Description: GnuPG archive key of the Apache Arrow archive + The Apache Arrow project digitally signs its Release files. This + package contains the archive key used for that. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright new file mode 100644 index 000000000..274d64ca0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright @@ -0,0 +1,26 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: Apache Arrow +Upstream-Contact: Apache Arrow Developers <dev@arrow.apache.org> + +Files: * +Copyright: 2016 The Apache Software Foundation +License: Apache-2.0 + +License: Apache-2.0 + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the full text of the Apache Software License version 2 can + be found in the file `/usr/share/common-licenses/Apache-2.0'. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules new file mode 100755 index 000000000..bf7a85c8c --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules @@ -0,0 +1,37 @@ +#!/usr/bin/make -f +# -*- makefile-gmake -*- +# +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 +# This has to be exported to make some magic below work. +export DH_OPTIONS + +%: + dh $@ + +override_dh_auto_build: + gpg \ + --no-default-keyring \ + --keyring ./apache-arrow-apt-source.gpg \ + --import KEYS + + ( \ + distribution=$$(lsb_release --id --short | tr 'A-Z' 'a-z'); \ + code_name=$$(lsb_release --codename --short); \ + echo "Types: deb deb-src"; \ + echo "URIs: https://apache.jfrog.io/artifactory/arrow/$${distribution}/"; \ + echo "Suites: $${code_name}"; \ + echo "Components: main"; \ + echo "Signed-By: /usr/share/keyrings/apache-arrow-apt-source.gpg"; \ + ) > apache-arrow.sources + +override_dh_install: + install -d debian/tmp/usr/share/keyrings/ + install -m 0644 apache-arrow-apt-source.gpg \ + debian/tmp/usr/share/keyrings/ + + install -d debian/tmp/etc/apt/sources.list.d/ + install -m 0644 apache-arrow.sources \ + debian/tmp/etc/apt/sources.list.d/ + + dh_install diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format new file mode 100644 index 000000000..163aaf8d8 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile new file mode 100644 index 000000000..4a341c6f1 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/Rakefile @@ -0,0 +1,66 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "../helper" +require_relative "../package-task" + +class ApacheArrowReleasePackageTask < PackageTask + include Helper::ApacheArrow + + def initialize + release_time = detect_release_time + super("apache-arrow-release", + detect_version(release_time), + release_time, + :rc_build_type => :release) + end + + private + def repo_path + "#{yum_dir}/Apache-Arrow.repo" + end + + def define_archive_task + file @archive_name => [repo_path] do + rm_rf(@archive_base_name) + mkdir(@archive_base_name) + download("https://downloads.apache.org/arrow/KEYS", + "#{@archive_base_name}/KEYS") + cp(repo_path, @archive_base_name) + sh("tar", "czf", @archive_name, @archive_base_name) + rm_rf(@archive_base_name) + end + + if rpm_archive_name != @archive_name + file rpm_archive_name => @archive_name do + sh("tar", "xf", @archive_name) + rpm_archive_base_name = File.basename(rpm_archive_name, ".tar.gz") + mv(@archive_base_name, rpm_archive_base_name) + sh("tar", "czf", rpm_archive_name, rpm_archive_base_name) + end + end + end + + def enable_apt? + false + end +end + +task = ApacheArrowReleasePackageTask.new +task.define diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo new file mode 100644 index 000000000..0d1666520 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[apache-arrow-almalinux] +name=Apache Arrow for AlmaLinux $releasever - $basearch +baseurl=https://apache.jfrog.io/artifactory/arrow/almalinux/$releasever/$basearch/ +gpgcheck=1 +enabled=0 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow + +[apache-arrow-amazon-linux] +name=Apache Arrow for Amazon Linux $releasever - $basearch +baseurl=https://apache.jfrog.io/artifactory/arrow/amazon-linux/$releasever/$basearch/ +gpgcheck=1 +enabled=0 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow + +[apache-arrow-centos] +name=Apache Arrow for CentOS $releasever - $basearch +baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/ +gpgcheck=1 +enabled=0 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow + +[apache-arrow-rhel] +name=Apache Arrow for RHEL $releasever - $basearch +baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/ +gpgcheck=1 +enabled=0 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile new file mode 100644 index 000000000..5be1edc3d --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/almalinux-8/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM almalinux:8 + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + dnf install --enablerepo=powertools -y ${quiet} \ + rpmdevtools && \ + dnf clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile new file mode 100644 index 000000000..800df6c68 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM amazonlinux:2 + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + yum install -y ${quiet} \ + rpmdevtools && \ + yum clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in new file mode 100644 index 000000000..594cda373 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -0,0 +1,113 @@ +# -*- sh-shell: rpm -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +%define use_dnf (%{rhel} >= 8) +%if %{use_dnf} +%define yum_repository_enable() (dnf config-manager --set-enabled %1) +%define yum_repository_disable() (dnf config-manager --set-disabled %1) +%else +%define yum_repository_enable() (yum-config-manager --enable %1) +%define yum_repository_disable() (yum-config-manager --disable %1) +%endif + +Name: @PACKAGE@ +Version: @VERSION@ +Release: @RELEASE@%{?dist} +Summary: Apache Arrow release files + +License: Apache-2.0 +URL: https://arrow.apache.org/ +Source0: @PACKAGE@-%{version}.tar.gz + +BuildArch: noarch + +Requires: epel-release +%if %{use_dnf} +Requires: dnf-command(config-manager) +%else +Requires: yum-utils +%endif + +%description +Apache Arrow release files. + +%prep +%setup -q + +%build +distribution=$(. /etc/os-release && echo "${ID}") +if [ "${distribution}" = "rhel" ]; then + # We use distribution version explicitly for RHEL because we can't + # use symbolic link on Artifactory. CentOS uses 7 and 8 but RHEL uses + # 7Server and 8Server for $releasever. If we can use symbolic link + # on Artifactory we can use $releasever directly. + distribution_version=$(. /etc/os-release && echo "${VERSION_ID}") + sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo +fi + +%install +rm -rf $RPM_BUILD_ROOT + +%{__install} -Dp -m0644 KEYS \ + $RPM_BUILD_ROOT%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow + +%{__install} -d $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d/ +%{__install} -Dp -m0644 Apache-Arrow.repo \ + $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d/Apache-Arrow.repo + +%files +%defattr(-, root, root, 0755) +%doc +%dir %{_sysconfdir}/yum.repos.d/ +%dir %{_sysconfdir}/pki/rpm-gpg/ +%{_sysconfdir}/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow +%config(noreplace) %{_sysconfdir}/yum.repos.d/Apache-Arrow.repo + +%post +if grep -q 'Amazon Linux release 2' /etc/system-release 2>/dev/null; then + %{yum_repository_enable apache-arrow-amazon-linux} +elif grep -q 'Red Hat Enterprise Linux' /etc/system-release 2>/dev/null; then + %{yum_repository_enable apache-arrow-rhel} +elif grep -q 'AlmaLinux' /etc/system-release 2>/dev/null; then + %{yum_repository_enable apache-arrow-almalinux} +else + %{yum_repository_enable apache-arrow-centos} +fi + +%changelog +* Wed Nov 10 2021 Sutou Kouhei <kou@clear-code.com> - 6.0.1-1 +- New upstream release. + +* Thu Oct 21 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 6.0.0-1 +- New upstream release. + +* Mon Jan 18 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 3.0.0-1 +- New upstream release. + +* Mon Oct 12 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 2.0.0-1 +- New upstream release. + +* Mon Jul 20 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 1.0.0-1 +- New upstream release. + +* Thu Apr 16 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.17.0-1 +- New upstream release. + +* Thu Jan 30 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.16.0-1 +- New upstream release. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile new file mode 100644 index 000000000..236b0e297 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM centos:7 + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + yum install -y ${quiet} \ + rpmdevtools && \ + yum clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile new file mode 100644 index 000000000..e36850630 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM centos:8 + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + dnf install --enablerepo=powertools -y ${quiet} \ + rpmdevtools && \ + dnf clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile b/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile new file mode 100644 index 000000000..9dfae955e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/Rakefile @@ -0,0 +1,162 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "../helper" +require_relative "../package-task" + +class ApacheArrowPackageTask < PackageTask + include Helper::ApacheArrow + + def initialize + release_time = detect_release_time + super("apache-arrow", + detect_version(release_time), + release_time, + :rc_build_type => :release) + @rpm_package = "arrow" + end + + private + def define_archive_task + file @archive_name do + case @version + when /\A\d+\.\d+\.\d+-rc\d+\z/ + download_rc_archive + when /\A\d+\.\d+\.\d+\z/ + download_released_archive + else + build_archive + end + end + + if deb_archive_name != @archive_name + file deb_archive_name => @archive_name do + cp(@archive_name, deb_archive_name) + end + end + + if rpm_archive_name != @archive_name + file rpm_archive_name => @archive_name do + cp(@archive_name, rpm_archive_name) + end + end + end + + def download_rc_archive + base_url = "https://dist.apache.org/repos/dist/dev/arrow" + archive_name_no_rc = @archive_name.gsub(/-rc\d+(\.tar\.gz)\z/, "\\1") + url = "#{base_url}/#{@package}-#{@version}/#{archive_name_no_rc}" + download(url, @archive_name) + end + + def download_released_archive + mirror_base_url = "https://www.apache.org/dyn/closer.lua/arrow" + mirror_list_url = "#{mirror_base_url}/arrow-#{@version}/#{@archive_name}" + open(mirror_list_url) do |response| + if /href="(.+?\/#{Regexp.escape(@archive_name)})"/ =~ response.read + download($1, ".") + end + end + end + + def build_archive + cd(arrow_source_dir) do + sh("git", "archive", "HEAD", + "--prefix", "#{@archive_base_name}/", + "--output", @full_archive_name) + end + end + + def apt_arm64_cuda_available_target?(target) + false + end + + def apt_prepare_debian_control_cuda_architecture(control, target) + if apt_arm64_cuda_available_target?(target) + cuda_architecture = "any" + else + cuda_architecture = "i386 amd64" + end + control.gsub(/@CUDA_ARCHITECTURE@/, cuda_architecture) + end + + def apt_prepare_debian_control_c_ares(control, target) + case target + when /\Aubuntu-bionic/ + use_system_c_ares = "#" + else + use_system_c_ares = "" + end + control.gsub(/@USE_SYSTEM_C_ARES@/, use_system_c_ares) + end + + def apt_prepare_debian_control_grpc(control, target) + case target + when /\Adebian-buster/, /\Aubuntu-(?:bionic|focal)/ + use_system_grpc = "#" + else + use_system_grpc = "" + end + control.gsub(/@USE_SYSTEM_GRPC@/, use_system_grpc) + end + + def apt_prepare_debian_control_thrift(control, target) + case target + when /\Aubuntu-bionic/ + use_system_thrift = "#" + else + use_system_thrift = "" + end + control.gsub(/@USE_SYSTEM_THRIFT@/, use_system_thrift) + end + + def apt_prepare_debian_control_utf8proc(control, target) + case target + when /\Aubuntu-bionic/ + use_system_utf8proc = "#" + else + use_system_utf8proc = "" + end + control.gsub(/@USE_SYSTEM_UTF8PROC@/, use_system_utf8proc) + end + + def apt_prepare_debian_control_zstd(control, target) + case target + when /\Adebian-buster/, /\Aubuntu-bionic/ + use_system_zstd = "#" + else + use_system_zstd = "" + end + control.gsub(/@USE_SYSTEM_ZSTD@/, use_system_zstd) + end + + def apt_prepare_debian_control(control_in, target) + control = control_in.dup + control = apt_prepare_debian_control_cuda_architecture(control, target) + control = apt_prepare_debian_control_c_ares(control, target) + control = apt_prepare_debian_control_grpc(control, target) + control = apt_prepare_debian_control_thrift(control, target) + control = apt_prepare_debian_control_utf8proc(control, target) + control = apt_prepare_debian_control_zstd(control, target) + control + end +end + +task = ApacheArrowPackageTask.new +task.define diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from new file mode 100644 index 000000000..022eaf290 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/debian:bookworm diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile new file mode 100644 index 000000000..e88ed53ff --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=debian:bookworm +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libgtest-dev \ + liblz4-dev \ + libprotoc-dev \ + libprotobuf-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + protobuf-compiler-grpc \ + python3-dev \ + python3-numpy \ + python3-pip \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + if apt list | grep '^nvidia-cuda-toolkit/'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + fi && \ + pip3 install --upgrade meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + apt clean diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from new file mode 100644 index 000000000..34187b2af --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/debian:bullseye diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile new file mode 100644 index 000000000..2a5b18f5b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=debian:bullseye +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libgtest-dev \ + liblz4-dev \ + libprotoc-dev \ + libprotobuf-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + protobuf-compiler-grpc \ + python3-dev \ + python3-numpy \ + python3-pip \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + if apt list | grep '^nvidia-cuda-toolkit/'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + fi && \ + pip3 install --upgrade meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from new file mode 100644 index 000000000..8da222b86 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/debian:buster diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile new file mode 100644 index 000000000..11a33a130 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=debian:buster +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list + +RUN \ + echo "deb http://deb.debian.org/debian buster-backports main" > \ + /etc/apt/sources.list.d/backports.list + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + python3-dev \ + python3-numpy \ + python3-pip \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + apt install -y -V -t buster-backports ${quiet} \ + clang-11 \ + llvm-11-dev && \ + if apt list | grep '^nvidia-cuda-toolkit/'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + fi && \ + pip3 install --upgrade meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from new file mode 100644 index 000000000..c3ba00cf0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/ubuntu:bionic diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile new file mode 100644 index 000000000..ac0c6a58d --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=ubuntu:bionic +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang-10 \ + cmake \ + devscripts \ + fakeroot \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgoogle-glog-dev \ + libgtest-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + llvm-10-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + python3-dev \ + python3-numpy \ + python3-pip \ + python3-setuptools \ + python3-wheel \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + (echo "includedir=/usr/include" && \ + echo "libdir=/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)" && \ + echo "" && \ + echo "Name: re2" && \ + echo "Description: RE2 is a fast, safe, thread-friendly regular expression engine." && \ + echo "Version: 0.0.0" && \ + echo "Cflags: -std=c++11 -pthread -I\${includedir}" && \ + echo "Libs: -pthread -L\${libdir} -lre2") | \ + tee "/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/pkgconfig/re2.pc" && \ + if apt list | grep '^nvidia-cuda-toolkit/'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + fi && \ + apt install -y -V -t bionic-backports ${quiet} \ + debhelper && \ + pip3 install --upgrade meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from new file mode 100644 index 000000000..52ab48b66 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/ubuntu:focal diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile new file mode 100644 index 000000000..112cc1846 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=ubuntu:focal +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + python3-dev \ + python3-numpy \ + python3-pip \ + python3-setuptools \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + if apt list | grep '^nvidia-cuda-toolkit/'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + fi && \ + apt clean && \ + python3 -m pip install --no-use-pep517 meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from new file mode 100644 index 000000000..f19ea9022 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/ubuntu:hirsute diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile new file mode 100644 index 000000000..7e26d3eb2 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=ubuntu:hirsute +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libgtest-dev \ + liblz4-dev \ + libprotoc-dev \ + libprotobuf-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + protobuf-compiler-grpc \ + python3-dev \ + python3-numpy \ + python3-pip \ + python3-setuptools \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + if apt list | grep -q '^libcuda1'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + else \ + :; \ + fi && \ + apt clean && \ + python3 -m pip install --no-use-pep517 meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from new file mode 100644 index 000000000..0c8a1c7a1 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish-arm64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/ubuntu:impish diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile new file mode 100644 index 000000000..2e94dd9b6 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-impish/Dockerfile @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=ubuntu:impish +FROM ${FROM} + +RUN \ + echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN \ + echo 'APT::Install-Recommends "false";' > \ + /etc/apt/apt.conf.d/disable-install-recommends + +ARG DEBUG +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ + apt update ${quiet} && \ + apt install -y -V ${quiet} \ + build-essential \ + ccache \ + clang \ + cmake \ + debhelper \ + devscripts \ + git \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + libgtest-dev \ + liblz4-dev \ + libprotoc-dev \ + libprotobuf-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-dev \ + lsb-release \ + ninja-build \ + pkg-config \ + protobuf-compiler-grpc \ + python3-dev \ + python3-numpy \ + python3-pip \ + python3-setuptools \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + if apt list | grep -q '^libcuda1'; then \ + apt install -y -V ${quiet} nvidia-cuda-toolkit; \ + else \ + :; \ + fi && \ + apt clean && \ + python3 -m pip install --no-use-pep517 meson && \ + ln -s /usr/local/bin/meson /usr/bin/ && \ + rm -rf /var/lib/apt/lists/* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog new file mode 100644 index 000000000..65948629e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -0,0 +1,123 @@ +apache-arrow (6.0.1-1) unstable; urgency=low + + * New upstream release. + + -- Sutou Kouhei <kou@clear-code.com> Wed, 10 Nov 2021 02:04:45 -0000 + +apache-arrow (6.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 21 Oct 2021 19:01:54 -0000 + +apache-arrow (3.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 18 Jan 2021 21:33:18 -0000 + +apache-arrow (2.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 12 Oct 2020 23:38:01 -0000 + +apache-arrow (1.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 20 Jul 2020 20:41:07 -0000 + +apache-arrow (0.17.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 16 Apr 2020 12:05:43 -0000 + +apache-arrow (0.16.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Thu, 30 Jan 2020 20:21:44 -0000 + +apache-arrow (0.15.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Mon, 30 Sep 2019 17:19:02 -0000 + +apache-arrow (0.14.0-1) unstable; urgency=low + + * New upstream release. + + -- Sutou Kouhei <kou@clear-code.com> Fri, 28 Jun 2019 22:22:35 -0000 + +apache-arrow (0.13.0-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Thu, 28 Mar 2019 02:24:58 -0000 + +apache-arrow (0.12.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs <szucs.krisztian@gmail.com> Wed, 16 Jan 2019 03:29:25 -0000 + +apache-arrow (0.11.0-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Thu, 04 Oct 2018 00:33:42 -0000 + +apache-arrow (0.10.0-1) unstable; urgency=low + + * New upstream release. + + -- Phillip Cloud <cpcloud@gmail.com> Thu, 02 Aug 2018 23:58:23 -0000 + +apache-arrow (0.9.0-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Fri, 16 Mar 2018 16:56:31 -0000 + +apache-arrow (0.8.0-1) unstable; urgency=low + + * New upstream release. + + * Add libarrow-gpu-glib0, libarrow-gpu-glib-dev and gir1.2-arrow-gpu-1.0. + + -- Uwe L. Korn <uwelk@xhochy.com> Sun, 17 Dec 2017 20:24:44 -0000 + +apache-arrow (0.7.1-2) unstable; urgency=low + + * Add libarrow-gpu0 and libarrow-gpu-dev. + + * Add libarrow-python-dev. + + -- Kouhei Sutou <kou@clear-code.com> Sun, 29 Oct 2017 21:59:13 +0900 + +apache-arrow (0.7.1-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Wed, 27 Sep 2017 13:19:05 -0000 + +apache-arrow (0.7.0-1) unstable; urgency=low + + * New upstream release. + + -- Wes McKinney <wes.mckinney@twosigma.com> Tue, 12 Sep 2017 22:01:14 -0000 + +apache-arrow (0.6.0-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Fri, 11 Aug 2017 21:27:51 -0000 + +apache-arrow (0.6.0.20170802-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou <kou@clear-code.com> Wed, 02 Aug 2017 22:28:18 -0000 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat new file mode 100644 index 000000000..48082f72f --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/compat @@ -0,0 +1 @@ +12 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in new file mode 100644 index 000000000..6b0332ed2 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -0,0 +1,640 @@ +Source: apache-arrow +Section: devel +Priority: optional +Maintainer: Apache Arrow Developers <dev@arrow.apache.org> +Build-Depends: + cmake, + debhelper (>= 12), + git, + gobject-introspection, + gtk-doc-tools, + libboost-filesystem-dev, + libboost-system-dev, + libbrotli-dev, + libbz2-dev, +@USE_SYSTEM_C_ARES@ libc-ares-dev, + libcurl4-openssl-dev, + libgirepository1.0-dev, + libgoogle-glog-dev, +@USE_SYSTEM_GRPC@ libgrpc++-dev, + libgtest-dev, + liblz4-dev, +@USE_SYSTEM_GRPC@ libprotoc-dev, +@USE_SYSTEM_GRPC@ libprotobuf-dev, + libre2-dev, + libsnappy-dev, + libssl-dev, +@USE_SYSTEM_THRIFT@ libthrift-dev, +@USE_SYSTEM_UTF8PROC@ libutf8proc-dev, +@USE_SYSTEM_ZSTD@ libzstd-dev, + ninja-build, + nvidia-cuda-toolkit [!arm64], + pkg-config, +@USE_SYSTEM_GRPC@ protobuf-compiler-grpc, + python3-dev, + python3-numpy, + tzdata, + zlib1g-dev +Build-Depends-Indep: libglib2.0-doc +Standards-Version: 3.9.8 +Homepage: https://arrow.apache.org/ + +Package: libarrow600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends} +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files. + +Package: libarrow-cuda600 +Section: libs +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for CUDA support. + +Package: libarrow-dataset600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}), + libparquet600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for Dataset module. + +Package: libarrow-flight600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for Flight RPC system. + +Package: libarrow-python600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}), + python3, + python3-numpy +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for Python support. + +Package: libarrow-python-flight600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-flight600 (= ${binary:Version}), + libarrow-python600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for Flight and Python support. + +Package: libarrow-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow600 (= ${binary:Version}), + libbrotli-dev, + libbz2-dev, +@USE_SYSTEM_C_ARES@ libc-ares-dev, +@USE_SYSTEM_GRPC@ libgrpc++-dev, + liblz4-dev, + libre2-dev, + libsnappy-dev, + libssl-dev, +@USE_SYSTEM_THRIFT@ libthrift-dev, +@USE_SYSTEM_UTF8PROC@ libutf8proc-dev, +@USE_SYSTEM_ZSTD@ libzstd-dev, +@USE_SYSTEM_GRPC@ protobuf-compiler-grpc, + zlib1g-dev +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files. + +Package: libarrow-cuda-dev +Section: libdevel +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libarrow-cuda600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for CUDA support. + +Package: libarrow-dataset-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libarrow-dataset600 (= ${binary:Version}), + libparquet-dev (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for dataset module. + +Package: libarrow-flight-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libarrow-flight600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for Flight RPC system. + +Package: libarrow-python-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libarrow-python600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for Python support. + +Package: libarrow-python-flight-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-flight-dev (= ${binary:Version}), + libarrow-python-dev (= ${binary:Version}), + libarrow-python-flight600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for Flight and Python support. + +Package: libgandiva600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}) +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides C++ library files. + +Package: libgandiva-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libgandiva600 (= ${binary:Version}) +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides C++ header files. + +Package: libplasma600 +Section: libs +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-cuda600 (= ${binary:Version}) +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides C++ library files to connect plasma-store-server. + +Package: plasma-store-server +Section: utils +Architecture: @CUDA_ARCHITECTURE@ +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libplasma600 (= ${binary:Version}) +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides plasma-store-server. + +Package: libplasma-dev +Section: libdevel +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-cuda-dev (= ${binary:Version}), + libplasma600 (= ${binary:Version}) +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides C++ header files. + +Package: libparquet600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends} +Description: Apache Parquet is a columnar storage format + . + This package provides C++ library files to process Apache Parquet format. + +Package: libparquet-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dev (= ${binary:Version}), + libparquet600 (= ${binary:Version}) +Description: Apache Parquet is a columnar storage format + . + This package provides C++ header files. + +Package: libarrow-glib600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based library files. + +Package: gir1.2-arrow-1.0 +Section: introspection +Architecture: any +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Apache Arrow is a data processing library for analysis + . + This package provides GObject Introspection typelib files. + +Package: libarrow-glib-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libglib2.0-dev, + libarrow-dev (= ${binary:Version}), + libarrow-glib600 (= ${binary:Version}), + gir1.2-arrow-1.0 (= ${binary:Version}) +Suggests: libarrow-glib-doc +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based header files. + +Package: libarrow-glib-doc +Section: doc +Architecture: all +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libglib2.0-doc +Description: Apache Arrow is a data processing library for analysis + . + This package provides documentations. + +Package: libarrow-cuda-glib600 +Section: libs +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-glib600 (= ${binary:Version}), + libarrow-cuda600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based library files for CUDA support. + +Package: gir1.2-arrow-cuda-1.0 +Section: introspection +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Apache Arrow is a data processing library for analysis + . + This package provides GObject Introspection typelib files for CUDA support. + +Package: libarrow-cuda-glib-dev +Section: libdevel +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-cuda-dev (= ${binary:Version}), + libarrow-glib-dev (= ${binary:Version}), + libarrow-cuda-glib600 (= ${binary:Version}), + gir1.2-arrow-cuda-1.0 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based header files for CUDA support. + +Package: libarrow-dataset-glib600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-glib600 (= ${binary:Version}), + libarrow-dataset600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based library files for dataset module. + +Package: gir1.2-arrow-dataset-1.0 +Section: introspection +Architecture: any +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Apache Arrow is a data processing library for analysis + . + This package provides GObject Introspection typelib files for dataset module. + +Package: libarrow-dataset-glib-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-dataset-dev (= ${binary:Version}), + libarrow-glib-dev (= ${binary:Version}), + libarrow-dataset-glib600 (= ${binary:Version}), + gir1.2-arrow-dataset-1.0 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based header files for dataset module. + +Package: libarrow-dataset-glib-doc +Section: doc +Architecture: any +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libarrow-glib-doc +Description: Apache Arrow is a data processing library for analysis + . + This package provides documentations for dataset module. + +Package: libarrow-flight-glib600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-glib600 (= ${binary:Version}), + libarrow-flight600 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based library files for Apache Arrow Flight. + +Package: gir1.2-arrow-flight-1.0 +Section: introspection +Architecture: any +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Apache Arrow is a data processing library for analysis + . + This package provides GObject Introspection typelib files for Apache Arrow + Flight. + +Package: libarrow-flight-glib-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-flight-dev (= ${binary:Version}), + libarrow-glib-dev (= ${binary:Version}), + libarrow-flight-glib600 (= ${binary:Version}), + gir1.2-arrow-flight-1.0 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides GLib based header files for Apache Arrow Flight. + +Package: libarrow-flight-glib-doc +Section: doc +Architecture: any +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libarrow-glib-doc +Description: Apache Arrow is a data processing library for analysis + . + This package provides documentations for Apache Arrow Flight. + +Package: libgandiva-glib600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-glib600 (= ${binary:Version}), + libgandiva600 (= ${binary:Version}) +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides GLib based library files. + +Package: gir1.2-gandiva-1.0 +Section: introspection +Architecture: any +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides GObject Introspection typelib files. + +Package: libgandiva-glib-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libgandiva-dev (= ${binary:Version}), + libarrow-glib-dev (= ${binary:Version}), + libgandiva-glib600 (= ${binary:Version}), + gir1.2-gandiva-1.0 (= ${binary:Version}) +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides GLib based header files. + +Package: libgandiva-glib-doc +Section: doc +Architecture: any +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libglib2.0-doc +Description: Gandiva is a toolset for compiling and evaluating expressions + on Arrow Data. + . + This package provides documentations. + +Package: libplasma-glib600 +Section: libs +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-cuda-glib600 (= ${binary:Version}), + libplasma600 (= ${binary:Version}) +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides GLib based library files to connect plasma-store-server. + +Package: gir1.2-plasma-1.0 +Section: introspection +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides GObject Introspection typelib files. + +Package: libplasma-glib-dev +Section: libdevel +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: same +Depends: + ${misc:Depends}, + libplasma-dev (= ${binary:Version}), + libarrow-cuda-glib-dev (= ${binary:Version}), + libplasma-glib600 (= ${binary:Version}), + gir1.2-plasma-1.0 (= ${binary:Version}) +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides GLib based header files. + +Package: libplasma-glib-doc +Section: doc +Architecture: @CUDA_ARCHITECTURE@ +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libglib2.0-doc +Description: Plasma is an in-memory object store and cache for big data. + . + This package provides documentations. + +Package: libparquet-glib600 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow-glib600 (= ${binary:Version}), + libparquet600 (= ${binary:Version}) +Description: Apache Parquet is a columnar storage format + . + This package provides GLib based library files. + +Package: gir1.2-parquet-1.0 +Section: introspection +Architecture: any +Multi-Arch: same +Depends: + ${gir:Depends}, + ${misc:Depends} +Description: Apache Parquet is a columnar storage format + . + This package provides GObject Introspection typelib files. + +Package: libparquet-glib-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-glib-dev (= ${binary:Version}), + libparquet-dev (= ${binary:Version}), + libparquet-glib600 (= ${binary:Version}), + gir1.2-parquet-1.0 (= ${binary:Version}) +Suggests: libparquet-glib-doc +Description: Apache Parquet is a columnar storage format + . + This package provides GLib based header files. + +Package: libparquet-glib-doc +Section: doc +Architecture: all +Multi-Arch: foreign +Depends: + ${misc:Depends} +Recommends: libglib2.0-doc +Description: Apache Parquet is a columnar storage format + . + This package provides documentations. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright new file mode 100644 index 000000000..9db0ea76f --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/copyright @@ -0,0 +1,193 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: Apache Arrow +Upstream-Contact: <dev@arrow.apache.org> +Source: https://dist.apache.org/repos/dist/release/arrow/ + +Files: * +Copyright: 2016 The Apache Software Foundation +License: Apache-2.0 + +Files: TODO for "This product includes software from the SFrame project" +Copyright: 2015 Dato, Inc. + 2009 Carnegie Mellon University. +License: BSD-3-clause + +Files: TODO for "This product includes software from the Numpy project" +Copyright: 1995, 1996, 1997 Jim Hugunin, hugunin@mit.edu + 2005 Travis E. Oliphant oliphant@ee.byu.edu Brigham Young University +License: BSD-3-clause + +Files: TODO for "This product includes software from the Feather project" +Copyright: TODO +License: Apache-2.0 + +Files: TODO for "This product includes software from the DyND project" +Copyright: TODO +License: BSD-2-clause + +Files: TODO for "This product includes software from the LLVM project" +Copyright: 2003-2007 University of Illinois at Urbana-Champaign. +License: U-OF-I-BSD-LIKE + +Files: TODO for "This product includes software from the google-lint project" +Copyright: 2009 Google Inc. All rights reserved. +License: BSD-3-clause + +Files: TODO for "This product includes software from the mman-win32 project" +Copyright: 2010 kutuzov.viktor.84 +License: MIT + +Files: TODO for "This product includes software from the LevelDB project" +Copyright: 2011 The LevelDB Authors. All rights reserved. +License: BSD-3-clause + +Files: TODO for "This product includes software from the CMake project" +Copyright: 2001-2009 Kitware, Inc. + 2012-2014 Continuum Analytics, Inc. +License: BSD-3-clause + +Files: TODO for "This product includes software from https://github.com/matthew-brett/multibuild" +Copyright: 2013-2016, Matt Terry and Matthew Brett; all rights reserved. +License: BSD-2-clause + +Files: TODO for "This product includes software from the Ibis project" +Copyright: 2015 Cloudera, Inc. +License: Apache-2.0 + +Files: TODO for "This product includes code from Apache Kudu" +Copyright: 2016 The Apache Software Foundation +License: Apache-2.0 + +License: Apache-2.0 + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the full text of the Apache Software License version 2 can + be found in the file `/usr/share/common-licenses/Apache-2.0'. + +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + . + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +License: BSD-2-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + . + 1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . + 2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY + WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +License: U-OF-I-BSD-LIKE + ============================================================================== + LLVM Release License + ============================================================================== + University of Illinois/NCSA + Open Source License + . + Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign. + All rights reserved. + . + Developed by: + . + LLVM Team + . + University of Illinois at Urbana-Champaign + . + http://llvm.org + . + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal with + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished to do + so, subject to the following conditions: + . + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + . + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + . + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + +License: MIT + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install new file mode 100644 index 000000000..e0197fcd3 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/Arrow-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install new file mode 100644 index 000000000..ef0d9f56f --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/ArrowCUDA-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install new file mode 100644 index 000000000..27091dab3 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/ArrowDataset-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install new file mode 100644 index 000000000..0433b367a --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/Gandiva-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install new file mode 100644 index 000000000..13fde6681 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/Parquet-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install new file mode 100644 index 000000000..7b7ce2158 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install @@ -0,0 +1 @@ +usr/lib/*/girepository-1.0/Plasma-1.0.typelib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install new file mode 100644 index 000000000..77e0b70f6 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install @@ -0,0 +1,6 @@ +usr/lib/*/cmake/arrow/ArrowCUDAConfig*.cmake +usr/lib/*/cmake/arrow/ArrowCUDATargets*.cmake +usr/lib/*/cmake/arrow/FindArrowCUDA.cmake +usr/lib/*/libarrow_cuda.a +usr/lib/*/libarrow_cuda.so +usr/lib/*/pkgconfig/arrow-cuda.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install new file mode 100644 index 000000000..778ae5fd7 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/arrow-cuda-glib/ +usr/lib/*/libarrow-cuda-glib.so +usr/lib/*/pkgconfig/arrow-cuda-glib.pc +usr/share/gir-1.0/ArrowCUDA-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install new file mode 100644 index 000000000..a6d637526 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow-cuda-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install new file mode 100644 index 000000000..5ae464687 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_cuda.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install new file mode 100644 index 000000000..53e727ae0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install @@ -0,0 +1,6 @@ +usr/lib/*/cmake/arrow/ArrowDatasetConfig*.cmake +usr/lib/*/cmake/arrow/ArrowDatasetTargets*.cmake +usr/lib/*/cmake/arrow/FindArrowDataset.cmake +usr/lib/*/libarrow_dataset.a +usr/lib/*/libarrow_dataset.so +usr/lib/*/pkgconfig/arrow-dataset.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install new file mode 100644 index 000000000..4c50bde97 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/arrow-dataset-glib/ +usr/lib/*/libarrow-dataset-glib.so +usr/lib/*/pkgconfig/arrow-dataset-glib.pc +usr/share/gir-1.0/ArrowDataset-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base new file mode 100644 index 000000000..5ec8156b0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: arrow-dataset-glib +Title: Apache Arrow Dataset GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Apache Arrow Dataset GLib provides an API to read and write semantic datasets stored in different locations and formats that uses GLib. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/arrow-dataset-glib/index.html +Files: /usr/share/gtk-doc/html/arrow-dataset-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install new file mode 100644 index 000000000..523bc206e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install @@ -0,0 +1 @@ +usr/share/gtk-doc/html/arrow-dataset-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links new file mode 100644 index 000000000..3d880362b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/arrow-dataset-glib usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-dataset-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-dataset-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install new file mode 100644 index 000000000..10085f3a0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow-dataset-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install new file mode 100644 index 000000000..014634165 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_dataset.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install new file mode 100644 index 000000000..ccd0c4e5b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install @@ -0,0 +1,19 @@ +usr/include/arrow/ +usr/lib/*/cmake/arrow/ArrowConfig*.cmake +usr/lib/*/cmake/arrow/ArrowOptions.cmake +usr/lib/*/cmake/arrow/ArrowTargets*.cmake +usr/lib/*/cmake/arrow/Find*Alt.cmake +usr/lib/*/cmake/arrow/FindArrow.cmake +usr/lib/*/cmake/arrow/FindBrotli.cmake +usr/lib/*/cmake/arrow/FindLz4.cmake +usr/lib/*/cmake/arrow/Find[Suz]*.cmake +usr/lib/*/cmake/arrow/arrow-config.cmake +usr/lib/*/libarrow.a +usr/lib/*/libarrow.so +usr/lib/*/libarrow_bundled_dependencies.a +usr/lib/*/pkgconfig/arrow-compute.pc +usr/lib/*/pkgconfig/arrow-csv.pc +usr/lib/*/pkgconfig/arrow-filesystem.pc +usr/lib/*/pkgconfig/arrow-json.pc +usr/lib/*/pkgconfig/arrow-orc.pc +usr/lib/*/pkgconfig/arrow.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install new file mode 100644 index 000000000..20ca33d84 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install @@ -0,0 +1,6 @@ +usr/lib/*/cmake/arrow/ArrowFlightConfig*.cmake +usr/lib/*/cmake/arrow/ArrowFlightTargets*.cmake +usr/lib/*/cmake/arrow/FindArrowFlight.cmake +usr/lib/*/libarrow_flight.a +usr/lib/*/libarrow_flight.so +usr/lib/*/pkgconfig/arrow-flight.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install new file mode 100644 index 000000000..8a8dee3ac --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/arrow-flight-glib/ +usr/lib/*/libarrow-flight-glib.so +usr/lib/*/pkgconfig/arrow-flight-glib.pc +usr/share/gir-1.0/ArrowFlight-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base new file mode 100644 index 000000000..94b17c11b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: arrow-flight-glib +Title: Apache Arrow Flight GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Apache Arrow Flight GLib provides a general-purpose client-server framework to simplify high performance transport of large datasets over network interfaces. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/arrow-flight-glib/index.html +Files: /usr/share/gtk-doc/html/arrow-flight-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install new file mode 100644 index 000000000..3c95f17ed --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install @@ -0,0 +1 @@ +usr/share/gtk-doc/html/arrow-flight-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links new file mode 100644 index 000000000..d55c89a1b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install new file mode 100644 index 000000000..a6156ed94 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow-flight-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install new file mode 100644 index 000000000..abdb96d4c --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_flight.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install new file mode 100644 index 000000000..f6de7eedb --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install @@ -0,0 +1,6 @@ +usr/include/arrow-glib/ +usr/lib/*/libarrow-glib.so +usr/lib/*/pkgconfig/arrow-glib.pc +usr/lib/*/pkgconfig/arrow-orc-glib.pc +usr/share/arrow-glib/example/ +usr/share/gir-1.0/Arrow-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base new file mode 100644 index 000000000..8ae4ffb6d --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: arrow-glib +Title: Apache Arrow GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Apache Arrow GLib is a data processing library for analysis that uses GLib. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/arrow-glib/index.html +Files: /usr/share/gtk-doc/html/arrow-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install new file mode 100644 index 000000000..912a29c58 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install @@ -0,0 +1,2 @@ +usr/share/doc/arrow-glib/ +usr/share/gtk-doc/html/arrow-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links new file mode 100644 index 000000000..556987d0a --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/arrow-glib usr/share/doc/libarrow-glib-doc/arrow-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install new file mode 100644 index 000000000..ec369d153 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install new file mode 100644 index 000000000..807583f98 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install @@ -0,0 +1,6 @@ +usr/lib/*/cmake/arrow/ArrowPythonConfig*.cmake +usr/lib/*/cmake/arrow/ArrowPythonTargets*.cmake +usr/lib/*/cmake/arrow/FindArrowPython.cmake +usr/lib/*/libarrow_python.a +usr/lib/*/libarrow_python.so +usr/lib/*/pkgconfig/arrow-python.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install new file mode 100644 index 000000000..6cf96e227 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install @@ -0,0 +1,6 @@ +usr/lib/*/cmake/arrow/ArrowPythonFlightConfig*.cmake +usr/lib/*/cmake/arrow/ArrowPythonFlightTargets*.cmake +usr/lib/*/cmake/arrow/FindArrowPythonFlight.cmake +usr/lib/*/libarrow_python_flight.a +usr/lib/*/libarrow_python_flight.so +usr/lib/*/pkgconfig/arrow-python-flight.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install new file mode 100644 index 000000000..b7cbfec1f --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_python_flight.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install new file mode 100644 index 000000000..eef3e6648 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_python.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install new file mode 100644 index 000000000..98ef2139c --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install @@ -0,0 +1 @@ +usr/lib/*/libarrow.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install new file mode 100644 index 000000000..26e7e76fb --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install @@ -0,0 +1,7 @@ +usr/include/gandiva/ +usr/lib/*/cmake/arrow/GandivaConfig*.cmake +usr/lib/*/cmake/arrow/GandivaTargets*.cmake +usr/lib/*/cmake/arrow/FindGandiva.cmake +usr/lib/*/libgandiva.a +usr/lib/*/libgandiva.so +usr/lib/*/pkgconfig/gandiva.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install new file mode 100644 index 000000000..fe7d8bb79 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/gandiva-glib/ +usr/lib/*/libgandiva-glib.so +usr/lib/*/pkgconfig/gandiva-glib.pc +usr/share/gir-1.0/Gandiva-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base new file mode 100644 index 000000000..2bf913062 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: gandiva-glib +Title: Gandiva GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Gandiva GLib is a toolset for compiling and evaluating expressions on Arrow Data that uses GLib. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/gandiva-glib/index.html +Files: /usr/share/gtk-doc/html/gandiva-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install new file mode 100644 index 000000000..358e4e5c7 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install @@ -0,0 +1 @@ +usr/share/gtk-doc/html/gandiva-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links new file mode 100644 index 000000000..234794e23 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/gandiva-glib usr/share/doc/libgandiva-glib-doc/gandiva-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libgandiva-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libgandiva-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install new file mode 100644 index 000000000..6257fd438 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libgandiva-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install new file mode 100644 index 000000000..1475f49cf --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install @@ -0,0 +1 @@ +usr/lib/*/libgandiva.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install new file mode 100644 index 000000000..e163115f0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install @@ -0,0 +1,7 @@ +usr/include/parquet/ +usr/lib/*/cmake/arrow/ParquetConfig*.cmake +usr/lib/*/cmake/arrow/ParquetTargets*.cmake +usr/lib/*/cmake/arrow/FindParquet.cmake +usr/lib/*/libparquet.a +usr/lib/*/libparquet.so +usr/lib/*/pkgconfig/parquet.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install new file mode 100644 index 000000000..9cce737a7 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/parquet-glib/ +usr/lib/*/libparquet-glib.so +usr/lib/*/pkgconfig/parquet-glib.pc +usr/share/gir-1.0/Parquet-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base new file mode 100644 index 000000000..cc68e2df6 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: parquet-glib +Title: Apache Parquet GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Apache Parquet GLib is a columnar storage format processing library that uses GLib. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/parquet-glib/index.html +Files: /usr/share/gtk-doc/html/parquet-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install new file mode 100644 index 000000000..5843ea3da --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install @@ -0,0 +1 @@ +usr/share/gtk-doc/html/parquet-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links new file mode 100644 index 000000000..c31f346b1 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/parquet-glib usr/share/doc/libparquet-glib-doc/parquet-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libparquet-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libparquet-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install new file mode 100644 index 000000000..1c0e44199 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libparquet-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install new file mode 100644 index 000000000..540a91d5e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install @@ -0,0 +1 @@ +usr/lib/*/libparquet.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install new file mode 100644 index 000000000..c315d4dfc --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install @@ -0,0 +1,7 @@ +usr/include/plasma/ +usr/lib/*/cmake/arrow/PlasmaConfig*.cmake +usr/lib/*/cmake/arrow/PlasmaTargets*.cmake +usr/lib/*/cmake/arrow/FindPlasma.cmake +usr/lib/*/libplasma.a +usr/lib/*/libplasma.so +usr/lib/*/pkgconfig/plasma.pc diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install new file mode 100644 index 000000000..7800681d2 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install @@ -0,0 +1,4 @@ +usr/include/plasma-glib/ +usr/lib/*/libplasma-glib.so +usr/lib/*/pkgconfig/plasma-glib.pc +usr/share/gir-1.0/Plasma-1.0.gir diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base new file mode 100644 index 000000000..a9d306d8b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base @@ -0,0 +1,9 @@ +Document: plasma-glib +Title: Plasma GLib Reference Manual +Author: The Apache Software Foundation +Abstract: Plasma GLib is an in-memory object store and cache for big data that uses GLib. +Section: Programming + +Format: HTML +Index: /usr/share/gtk-doc/html/plasma-glib/index.html +Files: /usr/share/gtk-doc/html/plasma-glib/*.html diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install new file mode 100644 index 000000000..ad13b94cd --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install @@ -0,0 +1 @@ +usr/share/gtk-doc/html/plasma-glib diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links new file mode 100644 index 000000000..193262f9b --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links @@ -0,0 +1,3 @@ +usr/share/gtk-doc/html/plasma-glib usr/share/doc/libplasma-glib-doc/plasma-glib +usr/share/doc/libglib2.0-doc/glib usr/share/doc/libplasma-glib-doc/glib +usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libplasma-glib-doc/gobject diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install new file mode 100644 index 000000000..339bcca3e --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install @@ -0,0 +1 @@ +usr/lib/*/libplasma-glib.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install new file mode 100644 index 000000000..f8a744b65 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install @@ -0,0 +1 @@ +usr/lib/*/libplasma.so.* diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/patches/series diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install new file mode 100644 index 000000000..bd13b0e81 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install @@ -0,0 +1 @@ +usr/bin/plasma-store-server diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules new file mode 100755 index 000000000..2de533615 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -0,0 +1,104 @@ +#!/usr/bin/make -f +# -*- makefile-gmake -*- +# +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 +# This has to be exported to make some magic below work. +export DH_OPTIONS + +export DEB_BUILD_MAINT_OPTIONS=reproducible=-timeless + +BUILD_TYPE=release + +%: + dh $@ --with gir + +override_dh_auto_configure: + if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \ + ARROW_CUDA=ON; \ + ARROW_PLASMA=ON; \ + else \ + ARROW_CUDA=OFF; \ + ARROW_PLASMA=OFF; \ + fi; \ + dh_auto_configure \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build \ + --buildsystem=cmake+ninja \ + -- \ + -DARROW_CUDA=$${ARROW_CUDA} \ + -DARROW_FLIGHT=ON \ + -DARROW_GANDIVA=ON \ + -DARROW_GANDIVA_JAVA=OFF \ + -DARROW_MIMALLOC=ON \ + -DARROW_ORC=ON \ + -DARROW_PACKAGE_KIND=deb \ + -DARROW_PARQUET=ON \ + -DARROW_PLASMA=$${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_S3=ON \ + -DARROW_USE_CCACHE=OFF \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -DCMAKE_UNITY_BUILD=ON \ + -DPARQUET_REQUIRE_ENCRYPTION=ON \ + -DPythonInterp_FIND_VERSION=ON \ + -DPythonInterp_FIND_VERSION_MAJOR=3 + +override_dh_auto_build: + dh_auto_build \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build + dh_auto_configure \ + --sourcedirectory=c_glib \ + --builddirectory=c_glib_build \ + --buildsystem=meson+ninja \ + -- \ + -Darrow_cpp_build_type=$(BUILD_TYPE) \ + -Darrow_cpp_build_dir=../cpp_build \ + -Dgtk_doc=true + env \ + LD_LIBRARY_PATH=$(CURDIR)/cpp_build/$(BUILD_TYPE) \ + dh_auto_build \ + --sourcedirectory=c_glib \ + --builddirectory=c_glib_build \ + --buildsystem=meson+ninja + +override_dh_auto_install: + dh_auto_install \ + --sourcedirectory=c_glib \ + --builddirectory=c_glib_build \ + --buildsystem=meson+ninja + # Remove built files to reduce disk usage + dh_auto_clean \ + --sourcedirectory=c_glib \ + --builddirectory=c_glib_build \ + --buildsystem=meson+ninja + + dh_auto_install \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build + # Remove built files to reduce disk usage + dh_auto_clean \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build + +override_dh_auto_test: + # TODO: We need Boost 1.64 or later to build tests for + # Apache Arrow Flight. + # git clone --depth 1 https://github.com/apache/arrow-testing.git + # git clone --depth 1 https://github.com/apache/parquet-testing.git + # cd cpp_build && \ + # env \ + # ARROW_TEST_DATA=$(CURDIR)/arrow-testing/data \ + # PARQUET_TEST_DATA=$(CURDIR)/parquet-testing/data \ + # ctest --exclude-regex 'arrow-cuda-test|plasma-client_tests' + +# skip file failing with "Unknown DWARF DW_OP_255" (see bug#949296) +override_dh_dwz: + dh_dwz --exclude=libgandiva.so diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format new file mode 100644 index 000000000..163aaf8d8 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch new file mode 100644 index 000000000..5cb3f0091 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/debian/watch @@ -0,0 +1,2 @@ +version=3 +https://dist.apache.org/repos/dist/release/arrow/arrow-(.+)/apache-arrow-(.+).tar.gz diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from new file mode 100644 index 000000000..b570587e0 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8-aarch64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/almalinux:8 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile new file mode 100644 index 000000000..003b26747 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=almalinux:8 +FROM ${FROM} + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + dnf install -y ${quiet} epel-release && \ + dnf install --enablerepo=powertools -y ${quiet} \ + bison \ + boost-devel \ + brotli-devel \ + bzip2-devel \ + c-ares-devel \ + ccache \ + clang \ + cmake \ + curl-devel \ + flex \ + gcc-c++ \ + gflags-devel \ + git \ + glog-devel \ + gobject-introspection-devel \ + gtk-doc \ + libarchive \ + libzstd-devel \ + llvm-devel \ + llvm-static \ + lz4-devel \ + make \ + ncurses-devel \ + ninja-build \ + openssl-devel \ + pkg-config \ + python3 \ + python3-devel \ + python3-numpy \ + python3-pip \ + re2-devel \ + # rapidjson-devel \ + rpmdevtools \ + snappy-devel \ + tar \ + # utf8proc-devel \ + zlib-devel && \ + dnf clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile new file mode 100644 index 000000000..6ada89729 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=amazonlinux:2 +FROM ${FROM} + +COPY qemu-* /usr/bin/ + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + yum update -y ${quiet} && \ + amazon-linux-extras install -y epel && \ + yum install -y ${quiet} \ + bison \ + brotli-devel \ + bzip2-devel \ + ccache \ + cmake3 \ + flex \ + gcc-c++ \ + git \ + glog-devel \ + gobject-introspection-devel \ + gtk-doc \ + lz4-devel \ + make \ + openssl-devel \ + pkg-config \ + rapidjson-devel \ + rpmdevtools \ + snappy-devel \ + tar \ + utf8proc-devel \ + zlib-devel && \ + # Install ninja-build dependencies in amzn2-core + yum install -y ${quiet} ninja-build && \ + # Install ninja-build from EPEL because ninja-build in amzn2-core is old. + yum install -y ${quiet} --disablerepo=amzn2-core ninja-build && \ + yum clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static new file mode 100755 index 000000000..279d89545 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static @@ -0,0 +1,33 @@ +#!/usr/bin/env sh +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Do nothing. This exists only for not requiring qemu-aarch64-static copy. +# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or +# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian +# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image. +# +# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*", +# the "COPY" is failed. It means that we always require "qemu*" even if we +# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file, +# the "COPY" isn't failed. It means that we can copy "qemu*" only when we +# need. +# +# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml. +# Azure Pipelines uses old Ubuntu (18.04). +# So we need to put "qemu-aarch64-static" into this directory. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in new file mode 100644 index 000000000..3a8f71ecc --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -0,0 +1,892 @@ +# -*- sh-shell: rpm -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +%define _amzn %{?amzn:%{amzn}}%{!?amzn:0} +%define is_amazon_linux (%{_amzn} != 0) + +%define boost_version %( \ + if [ %{rhel} -eq 7 ]; then \ + echo 169; \ + fi) +%define cmake_version %( \ + if [ %{rhel} -lt 8 ]; then \ + echo 3; \ + fi) +%define python_version %( \ + if [ %{rhel} -eq 7 ]; then \ + echo 36; \ + else \ + echo 3; \ + fi) + +%define lz4_requirement %( \ + if [ %{_amzn} -eq 0 ]; then \ + echo ">= 1.8.0"; \ + fi) + +%define use_boost (!%{is_amazon_linux}) +%define use_flight (%{rhel} >= 8) +%define use_gandiva (%{rhel} >= 8 && %{_arch} != "aarch64") +%define use_gflags (!%{is_amazon_linux}) +%define use_mimalloc (%{rhel} >= 8) +%define use_python (!%{is_amazon_linux}) +# TODO: Enable this. This works on local but is fragile on GitHub Actions and +# Travis CI. +# %define use_s3 (%{rhel} >= 8) +%define use_s3 0 + +%define have_rapidjson (%{rhel} == 7) +%define have_re2 (%{rhel} >= 8) +# EPEL ships utf8proc but it's old. +# %define have_utf8proc (%{rhel} == 7) +%define have_utf8proc 0 +%define have_zstd (!%{is_amazon_linux}) + +Name: @PACKAGE@ +Version: @VERSION@ +Release: @RELEASE@%{?dist} +Summary: Apache Arrow is a data processing library for analysis + +License: Apache-2.0 +URL: https://arrow.apache.org/ +Source0: https://dist.apache.org/repos/dist/release/@PACKAGE@/@PACKAGE@-%{version}/apache-@PACKAGE@-%{version}.tar.gz + +BuildRequires: bison +%if %{use_boost} +BuildRequires: boost%{boost_version}-devel +%endif +BuildRequires: brotli-devel +BuildRequires: bzip2-devel +%if %{use_flight} +BuildRequires: c-ares-devel +%endif +BuildRequires: cmake%{cmake_version} +%if %{use_s3} +BuildRequires: curl-devel +%endif +BuildRequires: flex +BuildRequires: gcc-c++ +%if %{use_gflags} +BuildRequires: gflags-devel +%endif +BuildRequires: git +BuildRequires: glog-devel +%if %{have_zstd} +BuildRequires: libzstd-devel +%endif +BuildRequires: lz4-devel %{lz4_requirement} +BuildRequires: ninja-build +BuildRequires: openssl-devel +BuildRequires: pkgconfig +%if %{use_python} +BuildRequires: python%{python_version}-devel +BuildRequires: python%{python_version}-numpy +%endif +%if %{have_rapidjson} +BuildRequires: rapidjson-devel +%endif +%if %{have_re2} +BuildRequires: re2-devel +%endif +BuildRequires: snappy-devel +%if %{have_utf8proc} +BuildRequires: utf8proc-devel +%endif +BuildRequires: zlib-devel + +%if %{use_gandiva} +BuildRequires: llvm-devel +BuildRequires: ncurses-devel +%endif + +BuildRequires: gobject-introspection-devel +BuildRequires: gtk-doc + +%description +Apache Arrow is a data processing library for analysis. + +%prep +%setup -q -n apache-@PACKAGE@-%{version} + +%build +cpp_build_type=release +mkdir cpp/build +cd cpp/build +%cmake3 .. \ + -DARROW_CSV=ON \ + -DARROW_DATASET=ON \ +%if %{use_flight} + -DARROW_FLIGHT=ON \ +%endif +%if %{use_gandiva} + -DARROW_GANDIVA=ON \ +%endif + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ +%if %{use_mimalloc} + -DARROW_MIMALLOC=ON \ +%endif + -DARROW_ORC=ON \ + -DARROW_PACKAGE_KIND=rpm \ + -DARROW_PARQUET=ON \ + -DARROW_PLASMA=ON \ +%if %{use_python} + -DARROW_PYTHON=ON \ +%endif +%if %{use_s3} + -DARROW_S3=ON \ +%endif + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=$cpp_build_type \ + -DARROW_USE_CCACHE=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=ON \ +%if %{use_python} + -DPythonInterp_FIND_VERSION=ON \ + -DPythonInterp_FIND_VERSION_MAJOR=3 \ +%endif + -GNinja + +ninja %{?_smp_mflags} +cd - + +cd c_glib +pip3 install meson +meson setup build \ + --default-library=both \ + --libdir=%{_libdir} \ + --prefix=%{_prefix} \ + -Darrow_cpp_build_dir=../cpp/build \ + -Darrow_cpp_build_type=$cpp_build_type \ + -Dgtk_doc=true +LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type \ + ninja -C build %{?_smp_mflags} +cd - + +%install +cpp_build_type=release + +cd c_glib +DESTDIR=$RPM_BUILD_ROOT ninja -C build install +ninja -C build clean +cd - + +cd cpp/build +DESTDIR=$RPM_BUILD_ROOT ninja install +ninja clean +cd - + +%package libs +Summary: Runtime libraries for Apache Arrow C++ +License: Apache-2.0 +Requires: brotli +%if %{use_gflags} +Requires: gflags +%endif +Requires: glog +%if %{have_zstd} +Requires: libzstd +%endif +Requires: lz4 %{lz4_requirement} +%if %{have_re2} +Requires: re2 +%endif +Requires: snappy +%if %{have_utf8proc} +Requires: utf8proc +%endif +Requires: zlib + +%description libs +This package contains the libraries for Apache Arrow C++. + +%files libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_docdir}/arrow/ +%{_libdir}/libarrow.so.* + +%package devel +Summary: Libraries and header files for Apache Arrow C++ +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +Requires: brotli-devel +Requires: bzip2-devel +%if %{use_flight} +Requires: c-ares-devel +%endif +%if %{have_zstd} +Requires: libzstd-devel +%endif +Requires: lz4-devel %{lz4_requirement} +Requires: openssl-devel +%if %{have_rapidjson} +Requires: rapidjson-devel +%endif +%if %{have_re2} +Requires: re2-devel +%endif +Requires: snappy-devel +%if %{have_utf8proc} +Requires: utf8proc-devel +%endif +Requires: zlib-devel + +%description devel +Libraries and header files for Apache Arrow C++. + +%files devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow/ +%exclude %{_includedir}/arrow/dataset/ +%if %{use_flight} +%exclude %{_includedir}/arrow/flight/ +%endif +%exclude %{_includedir}/arrow/python/ +%{_libdir}/cmake/arrow/ArrowConfig*.cmake +%{_libdir}/cmake/arrow/ArrowOptions.cmake +%{_libdir}/cmake/arrow/ArrowTargets*.cmake +%{_libdir}/cmake/arrow/FindArrow.cmake +%{_libdir}/cmake/arrow/FindBrotli.cmake +%{_libdir}/cmake/arrow/FindLz4.cmake +%{_libdir}/cmake/arrow/FindSnappy.cmake +%if %{use_flight} +%{_libdir}/cmake/arrow/Findc-aresAlt.cmake +%endif +%if %{have_re2} +%{_libdir}/cmake/arrow/Findre2Alt.cmake +%endif +%if %{have_utf8proc} +%{_libdir}/cmake/arrow/Findutf8proc.cmake +%endif +%if %{have_zstd} +%{_libdir}/cmake/arrow/Findzstd.cmake +%endif +%{_libdir}/cmake/arrow/arrow-config.cmake +%{_libdir}/libarrow.a +%{_libdir}/libarrow.so +%{_libdir}/libarrow_bundled_dependencies.a +%{_libdir}/pkgconfig/arrow-compute.pc +%{_libdir}/pkgconfig/arrow-csv.pc +%{_libdir}/pkgconfig/arrow-filesystem.pc +%{_libdir}/pkgconfig/arrow-json.pc +%{_libdir}/pkgconfig/arrow-orc.pc +%{_libdir}/pkgconfig/arrow.pc + +%package dataset-libs +Summary: C++ library to read and write semantic datasets stored in different locations and formats +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} + +%description dataset-libs +This package contains the libraries for Apache Arrow dataset. + +%files dataset-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow_dataset.so.* + +%package dataset-devel +Summary: Libraries and header files for Apache Arrow dataset. +License: Apache-2.0 +Requires: %{name}-dataset-libs = %{version}-%{release} + +%description dataset-devel +Libraries and header files for Apache Arrow dataset. + +%files dataset-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow/dataset/ +%{_libdir}/cmake/arrow/ArrowDatasetConfig*.cmake +%{_libdir}/cmake/arrow/ArrowDatasetTargets*.cmake +%{_libdir}/cmake/arrow/FindArrowDataset.cmake +%{_libdir}/libarrow_dataset.a +%{_libdir}/libarrow_dataset.so +%{_libdir}/pkgconfig/arrow-dataset.pc + +%if %{use_flight} +%package flight-libs +Summary: C++ library for fast data transport. +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +%if %{use_flight} +Requires: c-ares +%endif +Requires: openssl + +%description flight-libs +This package contains the libraries for Apache Arrow Flight. + +%files flight-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow_flight.so.* + +%package flight-devel +Summary: Libraries and header files for Apache Arrow Flight. +License: Apache-2.0 +Requires: %{name}-flight-libs = %{version}-%{release} + +%description flight-devel +Libraries and header files for Apache Arrow Flight. + +%files flight-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow/flight/ +%{_libdir}/cmake/arrow/ArrowFlightConfig*.cmake +%{_libdir}/cmake/arrow/ArrowFlightTargets*.cmake +%{_libdir}/cmake/arrow/FindArrowFlight.cmake +%{_libdir}/libarrow_flight.a +%{_libdir}/libarrow_flight.so +%{_libdir}/pkgconfig/arrow-flight.pc +%endif + +%if %{use_gandiva} +%package -n gandiva-libs +Summary: C++ library for compiling and evaluating expressions on Apache Arrow data. +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +Requires: ncurses-libs + +%description -n gandiva-libs +This package contains the libraries for Gandiva. + +%files -n gandiva-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libgandiva.so.* + +%package -n gandiva-devel +Summary: Libraries and header files for Gandiva. +License: Apache-2.0 +Requires: gandiva-libs = %{version}-%{release} +Requires: llvm-devel + +%description -n gandiva-devel +Libraries and header files for Gandiva. + +%files -n gandiva-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/gandiva/ +%{_libdir}/cmake/arrow/GandivaConfig*.cmake +%{_libdir}/cmake/arrow/GandivaTargets*.cmake +%{_libdir}/cmake/arrow/FindGandiva.cmake +%{_libdir}/libgandiva.a +%{_libdir}/libgandiva.so +%{_libdir}/pkgconfig/gandiva.pc +%endif + +%if %{use_python} +%package python-libs +Summary: Python integration library for Apache Arrow +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +Requires: python%{python_version}-numpy + +%description python-libs +This package contains the Python integration library for Apache Arrow. + +%files python-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow_python.so.* + +%package python-devel +Summary: Libraries and header files for Python integration library for Apache Arrow +License: Apache-2.0 +Requires: %{name}-devel = %{version}-%{release} +Requires: %{name}-libs = %{version}-%{release} +Requires: python%{python_version}-devel + +%description python-devel +Libraries and header files for Python integration library for Apache Arrow. + +%files python-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow/python/ +%exclude %{_includedir}/arrow/python/flight.h +%{_libdir}/cmake/arrow/ArrowPythonConfig*.cmake +%{_libdir}/cmake/arrow/ArrowPythonTargets*.cmake +%{_libdir}/cmake/arrow/FindArrowPython.cmake +%{_libdir}/libarrow_python.a +%{_libdir}/libarrow_python.so +%{_libdir}/pkgconfig/arrow-python.pc + +%if %{use_flight} +%package python-flight-libs +Summary: Python integration library for Apache Arrow Flight +License: Apache-2.0 +Requires: %{name}-flight-libs = %{version}-%{release} +Requires: %{name}-python-libs = %{version}-%{release} + +%description python-flight-libs +This package contains the Python integration library for Apache Arrow Flight. + +%files python-flight-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow_python_flight.so.* + +%package python-flight-devel +Summary: Libraries and header files for Python integration library for Apache Arrow Flight. +License: Apache-2.0 +Requires: %{name}-flight-devel = %{version}-%{release} +Requires: %{name}-python-devel = %{version}-%{release} +Requires: %{name}-python-flight-libs = %{version}-%{release} + +%description python-flight-devel +Libraries and header files for Python integration library for +Apache Arrow Flight. + +%files python-flight-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow/python/flight.h +%{_libdir}/cmake/arrow/ArrowPythonFlightConfig*.cmake +%{_libdir}/cmake/arrow/ArrowPythonFlightTargets*.cmake +%{_libdir}/cmake/arrow/FindArrowPythonFlight.cmake +%{_libdir}/libarrow_python_flight.a +%{_libdir}/libarrow_python_flight.so +%{_libdir}/pkgconfig/arrow-python-flight.pc +%endif +%endif + +%package -n plasma-libs +Summary: Runtime libraries for Plasma in-memory object store +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} + +%description -n plasma-libs +This package contains the libraries for Plasma in-memory object store. + +%files -n plasma-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libplasma.so.* + +%package -n plasma-store-server +Summary: Server for Plasma in-memory object store +License: Apache-2.0 +Requires: plasma-libs = %{version}-%{release} + +%description -n plasma-store-server +This package contains the server for Plasma in-memory object store. + +%files -n plasma-store-server +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_bindir}/plasma-store-server + +%package -n plasma-devel +Summary: Libraries and header files for Plasma in-memory object store +License: Apache-2.0 +Requires: plasma-libs = %{version}-%{release} + +%description -n plasma-devel +Libraries and header files for Plasma in-memory object store. + +%files -n plasma-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/plasma/ +%{_libdir}/cmake/arrow/PlasmaConfig*.cmake +%{_libdir}/cmake/arrow/PlasmaTargets*.cmake +%{_libdir}/cmake/arrow/FindPlasma.cmake +%{_libdir}/libplasma.a +%{_libdir}/libplasma.so +%{_libdir}/pkgconfig/plasma*.pc + +%package -n parquet-libs +Summary: Runtime libraries for Apache Parquet C++ +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +Requires: openssl + +%description -n parquet-libs +This package contains the libraries for Apache Parquet C++. + +%files -n parquet-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libparquet.so.* + +%package -n parquet-devel +Summary: Libraries and header files for Apache Parquet C++ +License: Apache-2.0 +Requires: parquet-libs = %{version}-%{release} +Requires: zlib-devel + +%description -n parquet-devel +Libraries and header files for Apache Parquet C++. + +%files -n parquet-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/parquet/ +%{_libdir}/cmake/arrow/ParquetConfig*.cmake +%{_libdir}/cmake/arrow/ParquetTargets*.cmake +%{_libdir}/cmake/arrow/FindParquet.cmake +%{_libdir}/libparquet.a +%{_libdir}/libparquet.so +%{_libdir}/pkgconfig/parquet*.pc + +%package glib-libs +Summary: Runtime libraries for Apache Arrow GLib +License: Apache-2.0 +Requires: %{name}-libs = %{version}-%{release} +Requires: glib2 + +%description glib-libs +This package contains the libraries for Apache Arrow GLib. + +%files glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow-glib.so.* +%{_datadir}/gir-1.0/Arrow-1.0.gir + +%package glib-devel +Summary: Libraries and header files for Apache Arrow GLib +License: Apache-2.0 +Requires: %{name}-devel = %{version}-%{release} +Requires: glib2-devel +Requires: gobject-introspection-devel + +%description glib-devel +Libraries and header files for Apache Arrow GLib. + +%files glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow-glib/ +%{_libdir}/libarrow-glib.a +%{_libdir}/libarrow-glib.so +%{_libdir}/pkgconfig/arrow-glib.pc +%{_libdir}/pkgconfig/arrow-orc-glib.pc +%{_libdir}/girepository-1.0/Arrow-1.0.typelib +%{_datadir}/arrow-glib/example/ + +%package glib-doc +Summary: Documentation for Apache Arrow GLib +License: Apache-2.0 + +%description glib-doc +Documentation for Apache Arrow GLib. + +%files glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_docdir}/arrow-glib/ +%{_datadir}/gtk-doc/html/arrow-glib/ + +%package dataset-glib-libs +Summary: Runtime libraries for Apache Arrow Dataset GLib +License: Apache-2.0 +Requires: %{name}-dataset-libs = %{version}-%{release} +Requires: %{name}-glib-libs = %{version}-%{release} + +%description dataset-glib-libs +This package contains the libraries for Apache Arrow Dataset GLib. + +%files dataset-glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow-dataset-glib.so.* +%{_datadir}/gir-1.0/ArrowDataset-1.0.gir + +%package dataset-glib-devel +Summary: Libraries and header files for Apache Arrow Dataset GLib +License: Apache-2.0 +Requires: %{name}-dataset-devel = %{version}-%{release} +Requires: %{name}-glib-devel = %{version}-%{release} + +%description dataset-glib-devel +Libraries and header files for Apache Arrow Dataset GLib. + +%files dataset-glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow-dataset-glib/ +%{_libdir}/libarrow-dataset-glib.a +%{_libdir}/libarrow-dataset-glib.so +%{_libdir}/pkgconfig/arrow-dataset-glib.pc +%{_libdir}/girepository-1.0/ArrowDataset-1.0.typelib + +%package dataset-glib-doc +Summary: Documentation for Apache Arrow Dataset GLib +License: Apache-2.0 + +%description dataset-glib-doc +Documentation for Apache Arrow dataset GLib. + +%files dataset-glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_datadir}/gtk-doc/html/arrow-dataset-glib/ + +%if %{use_flight} +%package flight-glib-libs +Summary: Runtime libraries for Apache Arrow Flight GLib +License: Apache-2.0 +Requires: %{name}-flight-libs = %{version}-%{release} +Requires: %{name}-glib-libs = %{version}-%{release} + +%description flight-glib-libs +This package contains the libraries for Apache Arrow Flight GLib. + +%files flight-glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libarrow-flight-glib.so.* +%{_datadir}/gir-1.0/ArrowFlight-1.0.gir + +%package flight-glib-devel +Summary: Libraries and header files for Apache Arrow Flight GLib +License: Apache-2.0 +Requires: %{name}-flight-devel = %{version}-%{release} +Requires: %{name}-glib-devel = %{version}-%{release} + +%description flight-glib-devel +Libraries and header files for Apache Arrow Flight GLib. + +%files flight-glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/arrow-flight-glib/ +%{_libdir}/libarrow-flight-glib.a +%{_libdir}/libarrow-flight-glib.so +%{_libdir}/pkgconfig/arrow-flight-glib.pc +%{_libdir}/girepository-1.0/ArrowFlight-1.0.typelib + +%package flight-glib-doc +Summary: Documentation for Apache Arrow Flight GLib +License: Apache-2.0 + +%description flight-glib-doc +Documentation for Apache Arrow Flight GLib. + +%files flight-glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_datadir}/gtk-doc/html/arrow-flight-glib/ +%endif + +%if %{use_gandiva} +%package -n gandiva-glib-libs +Summary: Runtime libraries for Gandiva GLib +License: Apache-2.0 +Requires: gandiva-libs = %{version}-%{release} +Requires: %{name}-glib-libs = %{version}-%{release} + +%description -n gandiva-glib-libs +This package contains the libraries for Gandiva GLib. + +%files -n gandiva-glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libgandiva-glib.so.* +%{_datadir}/gir-1.0/Gandiva-1.0.gir + +%package -n gandiva-glib-devel +Summary: Libraries and header files for Gandiva GLib +License: Apache-2.0 +Requires: gandiva-devel = %{version}-%{release} +Requires: %{name}-glib-devel = %{version}-%{release} + +%description -n gandiva-glib-devel +Libraries and header files for Gandiva GLib. + +%files -n gandiva-glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/gandiva-glib/ +%{_libdir}/libgandiva-glib.a +%{_libdir}/libgandiva-glib.so +%{_libdir}/pkgconfig/gandiva-glib.pc +%{_libdir}/girepository-1.0/Gandiva-1.0.typelib + +%package -n gandiva-glib-doc +Summary: Documentation for Gandiva GLib +License: Apache-2.0 + +%description -n gandiva-glib-doc +Documentation for Gandiva GLib. + +%files -n gandiva-glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_datadir}/gtk-doc/html/gandiva-glib/ +%endif + +%package -n plasma-glib-libs +Summary: Runtime libraries for Plasma GLib +License: Apache-2.0 +Requires: plasma-libs = %{version}-%{release} +Requires: %{name}-glib-libs = %{version}-%{release} + +%description -n plasma-glib-libs +This package contains the libraries for Plasma GLib. + +%files -n plasma-glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libplasma-glib.so.* +%{_datadir}/gir-1.0/Plasma-1.0.gir + +%package -n plasma-glib-devel +Summary: Libraries and header files for Plasma GLib +License: Apache-2.0 +Requires: plasma-devel = %{version}-%{release} +Requires: %{name}-glib-devel = %{version}-%{release} + +%description -n plasma-glib-devel +Libraries and header files for Plasma GLib. + +%files -n plasma-glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/plasma-glib/ +%{_libdir}/libplasma-glib.a +%{_libdir}/libplasma-glib.so +%{_libdir}/pkgconfig/plasma-glib.pc +%{_libdir}/girepository-1.0/Plasma-1.0.typelib + +%package -n plasma-glib-doc +Summary: Documentation for Plasma GLib +License: Apache-2.0 + +%description -n plasma-glib-doc +Documentation for Plasma GLib. + +%files -n plasma-glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_datadir}/gtk-doc/html/plasma-glib/ + +%package -n parquet-glib-libs +Summary: Runtime libraries for Apache Parquet GLib +License: Apache-2.0 +Requires: parquet-libs = %{version}-%{release} +Requires: %{name}-glib-libs = %{version}-%{release} + +%description -n parquet-glib-libs +This package contains the libraries for Apache Parquet GLib. + +%files -n parquet-glib-libs +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_libdir}/libparquet-glib.so.* +%{_datadir}/gir-1.0/Parquet-1.0.gir + +%package -n parquet-glib-devel +Summary: Libraries and header files for Apache Parquet GLib +License: Apache-2.0 +Requires: parquet-devel = %{version}-%{release} +Requires: %{name}-glib-devel = %{version}-%{release} + +%description -n parquet-glib-devel +Libraries and header files for Apache Parquet GLib. + +%files -n parquet-glib-devel +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_includedir}/parquet-glib/ +%{_libdir}/libparquet-glib.a +%{_libdir}/libparquet-glib.so +%{_libdir}/pkgconfig/parquet-glib.pc +%{_libdir}/girepository-1.0/Parquet-1.0.typelib + +%package -n parquet-glib-doc +Summary: Documentation for Apache Parquet GLib +License: Apache-2.0 + +%description -n parquet-glib-doc +Documentation for Apache Parquet GLib. + +%files -n parquet-glib-doc +%defattr(-,root,root,-) +%doc README.md LICENSE.txt NOTICE.txt +%{_datadir}/gtk-doc/html/parquet-glib/ + +%changelog +* Wed Nov 10 2021 Sutou Kouhei <kou@clear-code.com> - 6.0.1-1 +- New upstream release. + +* Thu Oct 21 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 6.0.0-1 +- New upstream release. + +* Mon Jan 18 2021 Krisztián Szűcs <szucs.krisztian@gmail.com> - 3.0.0-1 +- New upstream release. + +* Mon Oct 12 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 2.0.0-1 +- New upstream release. + +* Mon Jul 20 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 1.0.0-1 +- New upstream release. + +* Thu Apr 16 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.17.0-1 +- New upstream release. + +* Thu Jan 30 2020 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.16.0-1 +- New upstream release. + +* Mon Sep 30 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.15.0-1 +- New upstream release. + +* Fri Jun 28 2019 Sutou Kouhei <kou@clear-code.com> - 0.14.0-1 +- New upstream release. + +* Thu Mar 28 2019 Kouhei Sutou <kou@clear-code.com> - 0.13.0-1 +- New upstream release. + +* Wed Jan 16 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.12.0-1 +- New upstream release. + +* Thu Oct 04 2018 Kouhei Sutou <kou@clear-code.com> - 0.11.0-1 +- New upstream release. + +* Thu Aug 02 2018 Phillip Cloud <cpcloud@gmail.com> - 0.10.0-1 +- New upstream release. + +* Fri Mar 16 2018 Kouhei Sutou <kou@clear-code.com> - 0.9.0-1 +- New upstream release. + +* Sun Dec 17 2017 Uwe Korn <uwelk@xhochy.com> - 0.8.0-1 +- New upstream release. + +* Wed Sep 27 2017 Kouhei Sutou <kou@clear-code.com> - 0.7.1-1 +- New upstream release. + +* Tue Sep 12 2017 Wes McKinney <wes.mckinney@twosigma.com> - 0.7.0-1 +- New upstream release. + +* Fri Aug 11 2017 Kouhei Sutou <kou@clear-code.com> - 0.6.0-1 +- New upstream release. + +* Wed Aug 02 2017 Kouhei Sutou <kou@clear-code.com> - 0.6.0.20170802-1 +- New upstream release. diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile new file mode 100644 index 000000000..6856e3854 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=centos:7 +FROM ${FROM} + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + yum update -y ${quiet} && \ + yum install -y ${quiet} epel-release && \ + yum install -y ${quiet} \ + bison \ + boost169-devel \ + brotli-devel \ + bzip2-devel \ + ccache \ + cmake3 \ + flex \ + gcc-c++ \ + gflags-devel \ + git \ + glog-devel \ + gobject-introspection-devel \ + gtk-doc \ + libzstd-devel \ + lz4-devel \ + make \ + ninja-build \ + openssl-devel \ + pkg-config \ + python36 \ + python36-devel \ + python36-numpy \ + rapidjson-devel \ + rpmdevtools \ + snappy-devel \ + tar \ + zlib-devel && \ + yum clean ${quiet} all + +ENV \ + BOOST_INCLUDEDIR=/usr/include/boost169 \ + BOOST_LIBRARYDIR=/usr/lib64/boost169 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from new file mode 100644 index 000000000..587ce9d4a --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8-aarch64/from @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arm64v8/centos:8 diff --git a/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile new file mode 100644 index 000000000..ad145c4ee --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG FROM=centos:8 +FROM ${FROM} + +ARG DEBUG + +RUN \ + quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ + dnf install -y ${quiet} epel-release && \ + dnf install --enablerepo=powertools -y ${quiet} \ + bison \ + boost-devel \ + brotli-devel \ + bzip2-devel \ + c-ares-devel \ + ccache \ + clang \ + cmake \ + curl-devel \ + flex \ + gcc-c++ \ + gflags-devel \ + git \ + glog-devel \ + gobject-introspection-devel \ + gtk-doc \ + libarchive \ + libzstd-devel \ + llvm-devel \ + llvm-static \ + lz4-devel \ + make \ + ncurses-devel \ + ninja-build \ + openssl-devel \ + pkg-config \ + python3 \ + python3-devel \ + python3-numpy \ + python3-pip \ + re2-devel \ + # rapidjson-devel \ + rpmdevtools \ + snappy-devel \ + tar \ + # utf8proc-devel \ + zlib-devel && \ + dnf clean ${quiet} all diff --git a/src/arrow/dev/tasks/linux-packages/apt/build.sh b/src/arrow/dev/tasks/linux-packages/apt/build.sh new file mode 100755 index 000000000..a54567a5c --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/apt/build.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# -*- sh-indentation: 2; sh-basic-offset: 2 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +LANG=C + +set -u + +run() +{ + "$@" + if test $? -ne 0; then + echo "Failed $@" + exit 1 + fi +} + +. /host/env.sh + +distribution=$(lsb_release --id --short | tr 'A-Z' 'a-z') +code_name=$(lsb_release --codename --short) +case "${distribution}" in + debian) + component=main + ;; + ubuntu) + component=universe + ;; +esac +architecture=$(dpkg-architecture -q DEB_BUILD_ARCH) + +debuild_options=() +dpkg_buildpackage_options=(-us -uc) + +run mkdir -p /build +run cd /build +find . -not -path ./ccache -a -not -path "./ccache/*" -delete +if which ccache > /dev/null 2>&1; then + export CCACHE_COMPILERCHECK=content + export CCACHE_COMPRESS=1 + export CCACHE_COMPRESSLEVEL=6 + export CCACHE_DIR="${PWD}/ccache" + export CCACHE_MAXSIZE=500M + ccache --show-stats + debuild_options+=(-eCCACHE_COMPILERCHECK) + debuild_options+=(-eCCACHE_COMPRESS) + debuild_options+=(-eCCACHE_COMPRESSLEVEL) + debuild_options+=(-eCCACHE_DIR) + debuild_options+=(-eCCACHE_MAXSIZE) + if [ -d /usr/lib/ccache ] ;then + debuild_options+=(--prepend-path=/usr/lib/ccache) + fi +fi +run cp /host/tmp/${PACKAGE}-${VERSION}.tar.gz \ + ${PACKAGE}_${VERSION}.orig.tar.gz +run tar xfz ${PACKAGE}_${VERSION}.orig.tar.gz +case "${VERSION}" in + *~dev*) + run mv ${PACKAGE}-$(echo $VERSION | sed -e 's/~dev/-dev/') \ + ${PACKAGE}-${VERSION} + ;; + *~rc*) + run mv ${PACKAGE}-$(echo $VERSION | sed -r -e 's/~rc[0-9]+//') \ + ${PACKAGE}-${VERSION} + ;; +esac +run cd ${PACKAGE}-${VERSION}/ +platform="${distribution}-${code_name}" +if [ -d "/host/tmp/debian.${platform}-${architecture}" ]; then + run cp -rp "/host/tmp/debian.${platform}-${architecture}" debian +elif [ -d "/host/tmp/debian.${platform}" ]; then + run cp -rp "/host/tmp/debian.${platform}" debian +else + run cp -rp "/host/tmp/debian" debian +fi +: ${DEB_BUILD_OPTIONS:="parallel=$(nproc)"} +# DEB_BUILD_OPTIONS="${DEB_BUILD_OPTIONS} noopt" +export DEB_BUILD_OPTIONS +if [ "${DEBUG:-no}" = "yes" ]; then + run debuild "${debuild_options[@]}" "${dpkg_buildpackage_options[@]}" +else + run debuild "${debuild_options[@]}" "${dpkg_buildpackage_options[@]}" > /dev/null +fi +if which ccache > /dev/null 2>&1; then + ccache --show-stats +fi +run cd - + +repositories="/host/repositories" +package_initial=$(echo "${PACKAGE}" | sed -e 's/\(.\).*/\1/') +pool_dir="${repositories}/${distribution}/pool/${code_name}/${component}/${package_initial}/${PACKAGE}" +run mkdir -p "${pool_dir}/" +run \ + find . \ + -maxdepth 1 \ + -type f \ + -not -path '*.build' \ + -not -path '*.buildinfo' \ + -exec cp '{}' "${pool_dir}/" ';' + +run chown -R "$(stat --format "%u:%g" "${repositories}")" "${repositories}" diff --git a/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml b/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml new file mode 100644 index 000000000..557c4ab41 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/github.linux.amd64.yml @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + package: + name: Package + runs-on: ubuntu-20.04 + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_login_dockerhub()|indent }} + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.0' + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Cache ccache + uses: actions/cache@v2 + with: + path: arrow/dev/tasks/linux-packages/apache-arrow/{{ task_namespace }}/build/{{ target }}/ccache + key: linux-{{ task_namespace }}-ccache-{{ target }}-{{ "${{ hashFiles('arrow/cpp/**') }}" }} + restore-keys: linux-{{ task_namespace }}-ccache-{{ target }}- + - name: Build + run: | + set -e + pushd arrow/dev/tasks/linux-packages + rake version:update + rake docker:pull || : + rake --trace {{ task_namespace }}:build BUILD_DIR=build + sudo rm -rf */*/build + popd + env: + APT_TARGETS: {{ target }} + ARROW_VERSION: {{ arrow.version }} + REPO: {{ '${{ secrets.REPO }}' }} + YUM_TARGETS: {{ target }} + - uses: actions/upload-artifact@v2 + with: + name: packages + path: packages/*/{{ task_namespace }}/repositories/ + - name: Docker Push + continue-on-error: true + shell: bash + run: | + pushd arrow/dev/tasks/linux-packages + rake docker:push + popd + env: + APT_TARGETS: {{ target }} + REPO: {{ '${{ secrets.REPO }}' }} + YUM_TARGETS: {{ target }} + - name: Set up test + run: | + set -e + sudo apt update + # We can install createrepo_c by package with Ubuntu 22.04. + # createrepo_c \ + sudo apt install -y \ + apt-utils \ + devscripts \ + gpg \ + rpm + gem install apt-dists-merge + (echo "Key-Type: RSA"; \ + echo "Key-Length: 4096"; \ + echo "Name-Real: Test"; \ + echo "Name-Email: test@example.com"; \ + echo "%no-protection") | \ + gpg --full-generate-key --batch + GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10) + echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} + gpg --export --armor test@example.com > arrow/dev/tasks/linux-packages/KEYS + # We can install createrepo_c by package with Ubuntu 22.04. + # This is workaround: + - name: Install createrepo_c + run: | + sudo apt install -y \ + cmake \ + libbz2-dev \ + libcurl4-openssl-dev \ + libglib2.0-dev \ + liblzma-dev \ + libmagic-dev \ + librpm-dev \ + libsqlite3-dev \ + libssl-dev \ + libxml2-dev \ + libzstd-dev \ + pkg-config \ + zlib1g-dev + git clone --depth 1 https://github.com/rpm-software-management/createrepo_c.git + pushd createrepo_c + /usr/bin/cmake \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DENABLE_BASHCOMP=OFF \ + -DENABLE_DRPM=OFF \ + -DENABLE_PYTHON=OFF \ + -DWITH_LIBMODULEMD=OFF \ + -DWITH_ZCHUNK=OFF \ + . + make -j$(nproc) + sudo make install + popd + rm -rf createrepo_c + - name: Test + run: | + set -e + pushd arrow/dev/tasks/linux-packages + rake --trace {{ task_namespace }}:test + rm -rf {{ task_namespace }}/repositories + popd + env: + APT_TARGETS: {{ target }} + ARROW_VERSION: {{ arrow.version }} + YUM_TARGETS: {{ target }} + + {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %} + {{ macros.github_upload_releases(patterns)|indent }} diff --git a/src/arrow/dev/tasks/linux-packages/helper.rb b/src/arrow/dev/tasks/linux-packages/helper.rb new file mode 100644 index 000000000..30ac3b898 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/helper.rb @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module ApacheArrow + private + def detect_release_time + release_time_env = ENV["ARROW_RELEASE_TIME"] + if release_time_env + Time.parse(release_time_env).utc + else + latest_commit_time(arrow_source_dir) || Time.now.utc + end + end + + def arrow_source_dir + File.join(__dir__, "..", "..", "..") + end + + def detect_version(release_time) + version_env = ENV["ARROW_VERSION"] + return version_env if version_env + + pom_xml_path = File.join(arrow_source_dir, "java", "pom.xml") + pom_xml_content = File.read(pom_xml_path) + version = pom_xml_content[/^ <version>(.+?)<\/version>/, 1] + formatted_release_time = release_time.strftime("%Y%m%d") + version.gsub(/-SNAPSHOT\z/) {"-dev#{formatted_release_time}"} + end + + def detect_env(name) + value = ENV[name] + return value if value and not value.empty? + + dot_env_path = File.join(arrow_source_dir, ".env") + File.open(dot_env_path) do |dot_env| + dot_env.each_line do |line| + case line.chomp + when /\A#{Regexp.escape(name)}=(.*)/ + return $1 + end + end + end + raise "Failed to detect #{name} environment variable" + end + + def detect_repo + detect_env("REPO") + end + + def docker_image(os, architecture) + architecture ||= "amd64" + "#{detect_repo}:#{architecture}-#{os}-package-#{@package}" + end + end +end diff --git a/src/arrow/dev/tasks/linux-packages/package-task.rb b/src/arrow/dev/tasks/linux-packages/package-task.rb new file mode 100644 index 000000000..9dd1b2d93 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/package-task.rb @@ -0,0 +1,645 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "English" +require "open-uri" +require "time" + +class PackageTask + include Rake::DSL + + def initialize(package, version, release_time, options={}) + @package = package + @version = version + @release_time = release_time + + @archive_base_name = "#{@package}-#{@version}" + @archive_name = "#{@archive_base_name}.tar.gz" + @full_archive_name = File.expand_path(@archive_name) + + @rpm_package = @package + case @version + when /-((dev|rc)\d+)\z/ + base_version = $PREMATCH + sub_version = $1 + type = $2 + if type == "rc" and options[:rc_build_type] == :release + @deb_upstream_version = base_version + @deb_archive_base_name_version = base_version + @rpm_version = base_version + @rpm_release = "1" + else + @deb_upstream_version = "#{base_version}~#{sub_version}" + @deb_archive_base_name_version = @version + @rpm_version = base_version + @rpm_release = "0.#{sub_version}" + end + else + @deb_upstream_version = @version + @deb_archive_base_name_version = @version + @rpm_version = @version + @rpm_release = "1" + end + @deb_release = ENV["DEB_RELEASE"] || "1" + end + + def define + define_dist_task + define_apt_task + define_yum_task + define_version_task + define_docker_tasks + end + + private + def env_value(name) + value = ENV[name] + raise "Specify #{name} environment variable" if value.nil? + value + end + + def debug_build? + ENV["DEBUG"] != "no" + end + + def git_directory?(directory) + candidate_paths = [".git", "HEAD"] + candidate_paths.any? do |candidate_path| + File.exist?(File.join(directory, candidate_path)) + end + end + + def latest_commit_time(git_directory) + return nil unless git_directory?(git_directory) + cd(git_directory) do + return Time.iso8601(`git log -n 1 --format=%aI`.chomp).utc + end + end + + def download(url, output_path) + if File.directory?(output_path) + base_name = url.split("/").last + output_path = File.join(output_path, base_name) + end + absolute_output_path = File.expand_path(output_path) + + unless File.exist?(absolute_output_path) + mkdir_p(File.dirname(absolute_output_path)) + rake_output_message "Downloading... #{url}" + open_url(url) do |downloaded_file| + File.open(absolute_output_path, "wb") do |output_file| + IO.copy_stream(downloaded_file, output_file) + end + end + end + + absolute_output_path + end + + def open_url(url, &block) + URI(url).open(&block) + end + + def substitute_content(content) + content.gsub(/@(.+?)@/) do |matched| + yield($1, matched) + end + end + + def docker_image(os, architecture) + image = "#{@package}-#{os}" + image << "-#{architecture}" if architecture + image + end + + def docker_run(os, architecture, console: false) + id = os + id = "#{id}-#{architecture}" if architecture + image = docker_image(os, architecture) + build_command_line = [ + "docker", + "build", + "--cache-from", image, + "--tag", image, + ] + run_command_line = [ + "docker", + "run", + "--rm", + "--log-driver", "none", + "--volume", "#{Dir.pwd}:/host:rw", + ] + if $stdin.tty? + run_command_line << "--interactive" + run_command_line << "--tty" + else + run_command_line.concat(["--attach", "STDOUT"]) + run_command_line.concat(["--attach", "STDERR"]) + end + build_dir = ENV["BUILD_DIR"] + if build_dir + build_dir = "#{File.expand_path(build_dir)}/#{id}" + mkdir_p(build_dir) + run_command_line.concat(["--volume", "#{build_dir}:/build:rw"]) + end + if debug_build? + build_command_line.concat(["--build-arg", "DEBUG=yes"]) + run_command_line.concat(["--env", "DEBUG=yes"]) + end + pass_through_env_names = [ + "DEB_BUILD_OPTIONS", + "RPM_BUILD_NCPUS", + ] + pass_through_env_names.each do |name| + value = ENV[name] + next unless value + run_command_line.concat(["--env", "#{name}=#{value}"]) + end + if File.exist?(File.join(id, "Dockerfile")) + docker_context = id + else + from = File.readlines(File.join(id, "from")).find do |line| + /^[a-z]/i =~ line + end + build_command_line.concat(["--build-arg", "FROM=#{from.chomp}"]) + docker_context = os + end + build_command_line.concat(docker_build_options(os, architecture)) + run_command_line.concat(docker_run_options(os, architecture)) + build_command_line << docker_context + run_command_line << image + run_command_line << "/host/build.sh" unless console + + sh(*build_command_line) + sh(*run_command_line) + end + + def docker_build_options(os, architecture) + [] + end + + def docker_run_options(os, architecture) + [] + end + + def docker_pull(os, architecture) + image = docker_image(os, architecture) + command_line = [ + "docker", + "pull", + image, + ] + command_line.concat(docker_pull_options(os, architecture)) + sh(*command_line) + end + + def docker_pull_options(os, architecture) + [] + end + + def docker_push(os, architecture) + image = docker_image(os, architecture) + command_line = [ + "docker", + "push", + image, + ] + command_line.concat(docker_push_options(os, architecture)) + sh(*command_line) + end + + def docker_push_options(os, architecture) + [] + end + + def define_dist_task + define_archive_task + desc "Create release package" + task :dist => [@archive_name] + end + + def split_target(target) + components = target.split("-") + if components[0, 2] == ["amazon", "linux"] + components[0, 2] = components[0, 2].join("-") + end + if components.size >= 3 + components[2..-1] = components[2..-1].join("-") + end + components + end + + def enable_apt? + true + end + + def apt_targets + return [] unless enable_apt? + + targets = (ENV["APT_TARGETS"] || "").split(",") + targets = apt_targets_default if targets.empty? + + targets.find_all do |target| + Dir.exist?(File.join(apt_dir, target)) + end + end + + def apt_targets_default + # Disable arm64 targets by default for now + # because they require some setups on host. + [ + "debian-buster", + # "debian-buster-arm64", + "debian-bullseye", + # "debian-bullseye-arm64", + "debian-bookworm", + # "debian-bookworm-arm64", + "ubuntu-bionic", + # "ubuntu-bionic-arm64", + "ubuntu-focal", + # "ubuntu-focal-arm64", + "ubuntu-hirsute", + # "ubuntu-hirsute-arm64", + "ubuntu-impish", + # "ubuntu-impish-arm64", + ] + end + + def deb_archive_base_name + "#{@package}-#{@deb_archive_base_name_version}" + end + + def deb_archive_name + "#{@package}-#{@deb_upstream_version}.tar.gz" + end + + def apt_dir + "apt" + end + + def apt_prepare_debian_dir(tmp_dir, target) + source_debian_dir = nil + specific_debian_dir = "debian.#{target}" + distribution, code_name, _architecture = split_target(target) + platform = [distribution, code_name].join("-") + platform_debian_dir = "debian.#{platform}" + if File.exist?(specific_debian_dir) + source_debian_dir = specific_debian_dir + elsif File.exist?(platform_debian_dir) + source_debian_dir = platform_debian_dir + else + source_debian_dir = "debian" + end + + prepared_debian_dir = "#{tmp_dir}/debian.#{target}" + cp_r(source_debian_dir, prepared_debian_dir) + control_in_path = "#{prepared_debian_dir}/control.in" + if File.exist?(control_in_path) + control_in = File.read(control_in_path) + rm_f(control_in_path) + File.open("#{prepared_debian_dir}/control", "w") do |control| + prepared_control = apt_prepare_debian_control(control_in, target) + control.print(prepared_control) + end + end + end + + def apt_prepare_debian_control(control_in, target) + message = "#{__method__} must be defined to use debian/control.in" + raise NotImplementedError, message + end + + def apt_build(console: false) + tmp_dir = "#{apt_dir}/tmp" + rm_rf(tmp_dir) + mkdir_p(tmp_dir) + cp(deb_archive_name, + File.join(tmp_dir, deb_archive_name)) + apt_targets.each do |target| + apt_prepare_debian_dir(tmp_dir, target) + end + + env_sh = "#{apt_dir}/env.sh" + File.open(env_sh, "w") do |file| + file.puts(<<-ENV) +PACKAGE=#{@package} +VERSION=#{@deb_upstream_version} + ENV + end + + apt_targets.each do |target| + cd(apt_dir) do + distribution, version, architecture = split_target(target) + os = "#{distribution}-#{version}" + docker_run(os, architecture, console: console) + end + end + end + + def define_apt_task + namespace :apt do + source_build_sh = "#{__dir__}/apt/build.sh" + build_sh = "#{apt_dir}/build.sh" + repositories_dir = "#{apt_dir}/repositories" + + file build_sh => source_build_sh do + cp(source_build_sh, build_sh) + end + + directory repositories_dir + + desc "Build deb packages" + if enable_apt? + build_dependencies = [ + deb_archive_name, + build_sh, + repositories_dir, + ] + else + build_dependencies = [] + end + task :build => build_dependencies do + apt_build if enable_apt? + end + + namespace :build do + desc "Open console" + task :console => build_dependencies do + apt_build(console: true) if enable_apt? + end + end + end + + desc "Release APT repositories" + apt_tasks = [ + "apt:build", + ] + task :apt => apt_tasks + end + + def enable_yum? + true + end + + def yum_targets + return [] unless enable_yum? + + targets = (ENV["YUM_TARGETS"] || "").split(",") + targets = yum_targets_default if targets.empty? + + targets.find_all do |target| + Dir.exist?(File.join(yum_dir, target)) + end + end + + def yum_targets_default + # Disable aarch64 targets by default for now + # because they require some setups on host. + [ + "almalinux-8", + # "almalinux-8-arch64", + "amazon-linux-2", + # "amazon-linux-2-arch64", + "centos-7", + # "centos-7-aarch64", + "centos-8", + # "centos-8-aarch64", + ] + end + + def rpm_archive_base_name + "#{@package}-#{@rpm_version}" + end + + def rpm_archive_name + "#{rpm_archive_base_name}.tar.gz" + end + + def yum_dir + "yum" + end + + def yum_build_sh + "#{yum_dir}/build.sh" + end + + def yum_expand_variable(key) + case key + when "PACKAGE" + @rpm_package + when "VERSION" + @rpm_version + when "RELEASE" + @rpm_release + else + nil + end + end + + def yum_spec_in_path + "#{yum_dir}/#{@rpm_package}.spec.in" + end + + def yum_build(console: false) + tmp_dir = "#{yum_dir}/tmp" + rm_rf(tmp_dir) + mkdir_p(tmp_dir) + cp(rpm_archive_name, + File.join(tmp_dir, rpm_archive_name)) + + env_sh = "#{yum_dir}/env.sh" + File.open(env_sh, "w") do |file| + file.puts(<<-ENV) +SOURCE_ARCHIVE=#{rpm_archive_name} +PACKAGE=#{@rpm_package} +VERSION=#{@rpm_version} +RELEASE=#{@rpm_release} + ENV + end + + spec = "#{tmp_dir}/#{@rpm_package}.spec" + spec_in_data = File.read(yum_spec_in_path) + spec_data = substitute_content(spec_in_data) do |key, matched| + yum_expand_variable(key) || matched + end + File.open(spec, "w") do |spec_file| + spec_file.print(spec_data) + end + + yum_targets.each do |target| + cd(yum_dir) do + distribution, version, architecture = split_target(target) + os = "#{distribution}-#{version}" + docker_run(os, architecture, console: console) + end + end + end + + def define_yum_task + namespace :yum do + source_build_sh = "#{__dir__}/yum/build.sh" + file yum_build_sh => source_build_sh do + cp(source_build_sh, yum_build_sh) + end + + repositories_dir = "#{yum_dir}/repositories" + directory repositories_dir + + desc "Build RPM packages" + if enable_yum? + build_dependencies = [ + repositories_dir, + rpm_archive_name, + yum_build_sh, + yum_spec_in_path, + ] + else + build_dependencies = [] + end + task :build => build_dependencies do + yum_build if enable_yum? + end + + namespace :build do + desc "Open console" + task :console => build_dependencies do + yum_build(console: true) if enable_yum? + end + end + end + + desc "Release Yum repositories" + yum_tasks = [ + "yum:build", + ] + task :yum => yum_tasks + end + + def define_version_task + namespace :version do + desc "Update versions" + task :update do + update_debian_changelog + update_spec + end + end + end + + def package_changelog_message + "New upstream release." + end + + def packager_name + ENV["DEBFULLNAME"] || ENV["NAME"] || guess_packager_name_from_git + end + + def guess_packager_name_from_git + name = `git config --get user.name`.chomp + return name unless name.empty? + `git log -n 1 --format=%aN`.chomp + end + + def packager_email + ENV["DEBEMAIL"] || ENV["EMAIL"] || guess_packager_email_from_git + end + + def guess_packager_email_from_git + email = `git config --get user.email`.chomp + return email unless email.empty? + `git log -n 1 --format=%aE`.chomp + end + + def update_content(path) + if File.exist?(path) + content = File.read(path) + else + content = "" + end + content = yield(content) + File.open(path, "w") do |file| + file.puts(content) + end + end + + def update_debian_changelog + return unless enable_apt? + + Dir.glob("debian*") do |debian_dir| + update_content("#{debian_dir}/changelog") do |content| + <<-CHANGELOG.rstrip +#{@package} (#{@deb_upstream_version}-#{@deb_release}) unstable; urgency=low + + * New upstream release. + + -- #{packager_name} <#{packager_email}> #{@release_time.rfc2822} + +#{content} + CHANGELOG + end + end + end + + def update_spec + return unless enable_yum? + + release_time = @release_time.strftime("%a %b %d %Y") + update_content(yum_spec_in_path) do |content| + content = content.sub(/^(%changelog\n)/, <<-CHANGELOG) +%changelog +* #{release_time} #{packager_name} <#{packager_email}> - #{@rpm_version}-#{@rpm_release} +- #{package_changelog_message} + + CHANGELOG + content = content.sub(/^(Release:\s+)\d+/, "\\11") + content.rstrip + end + end + + def define_docker_tasks + namespace :docker do + pull_tasks = [] + push_tasks = [] + + (apt_targets + yum_targets).each do |target| + distribution, version, architecture = split_target(target) + os = "#{distribution}-#{version}" + + namespace :pull do + desc "Pull built image for #{target}" + task target do + docker_pull(os, architecture) + end + pull_tasks << "docker:pull:#{target}" + end + + namespace :push do + desc "Push built image for #{target}" + task target do + docker_push(os, architecture) + end + push_tasks << "docker:push:#{target}" + end + end + + desc "Pull built images" + task :pull => pull_tasks + + desc "Push built images" + task :push => push_tasks + end + end +end diff --git a/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml b/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml new file mode 100644 index 000000000..3703f4c46 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/travis.linux.arm64.yml @@ -0,0 +1,155 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +arch: arm64-graviton2 +virt: vm +os: linux +dist: focal +group: edge +language: minimal + +addons: + apt: + packages: + - apt-utils + # We need Ubuntu 20.10 or later + # - createrepo_c + - devscripts + - gpg + - libgit2-dev + - python3-pip + - rake + - rpm + + # https://bugs.launchpad.net/ubuntu/+source/glibc/+bug/1916485 + # We need to use runc 1.0.0~rc93 or later from focal-updated. + - runc + + # To build createrepo_c from source. + # We can remove them when we can install createrepo_c package + - cmake + - libbz2-dev + - libcurl4-openssl-dev + - libglib2.0-dev + - liblzma-dev + - libmagic-dev + - librpm-dev + - libsqlite3-dev + - libssl-dev + - libxml2-dev + - libzstd-dev + - pkg-config + - zlib1g-dev + update: true + +services: + - docker + +# don't build twice +if: tag IS blank + +env: + global: + - APT_TARGETS={{ target }} + - ARROW_VERSION={{ arrow.version }} + - BUILD_REF={{ arrow.head }} + - TRAVIS_TAG={{ task.tag }} + - YUM_TARGETS={{ target }} + +before_script: + - set -e + {{ macros.travis_checkout_arrow() }} + {{ macros.travis_docker_login() }} + + # Build createrepo_c from source. + # We can remove them when we can install createrepo_c package + - git clone --depth 1 https://github.com/rpm-software-management/createrepo_c.git + - pushd createrepo_c + - | + /usr/bin/cmake \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DENABLE_BASHCOMP=OFF \ + -DENABLE_DRPM=OFF \ + -DENABLE_PYTHON=OFF \ + -DWITH_LIBMODULEMD=OFF \ + -DWITH_ZCHUNK=OFF \ + . + - make -j$(nproc) + - sudo make install + - popd + - rm -rf createrepo_c + +script: + # Build packages + - pushd arrow/dev/tasks/linux-packages + - rake version:update + - | + rake docker:pull || : + - pushd apache-arrow-apt-source/apt + - | + for target in debian-* ubuntu-*; do + cp -a ${target} ${target}-arm64 + done + - popd + - pushd apache-arrow-release/yum + - | + for target in almalinux-* centos-*; do + cp -a ${target} ${target}-aarch64 + done + - popd + - | + rake \ + --trace \ + {{ task_namespace }}:build \ + BUILD_DIR=build \ + DEB_BUILD_OPTIONS=parallel=2 \ + RPM_BUILD_NCPUS=2 + - sudo rm -rf */*/build + - popd + # Push Docker image + - pushd arrow/dev/tasks/linux-packages + - | + docker login -u "${DOCKERHUB_USER}" \ + -p "${DOCKERHUB_TOKEN}" || : + - | + rake docker:push || : + - popd + # Test built packages + - sudo gem install apt-dists-merge + - | + (echo "Key-Type: RSA"; \ + echo "Key-Length: 4096"; \ + echo "Name-Real: Test"; \ + echo "Name-Email: test@example.com"; \ + echo "%no-protection") | \ + gpg --full-generate-key --batch + - | + GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10) + - gpg --export --armor test@example.com > arrow/dev/tasks/linux-packages/KEYS + - pushd arrow/dev/tasks/linux-packages + - | + rake --trace {{ task_namespace }}:test \ + CREATEREPO=createrepo_c \ + GPG_KEY_ID=${GPG_KEY_ID} + - rm -rf {{ task_namespace }}/repositories + - popd + +after_success: + {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %} + {{ macros.travis_upload_releases(patterns) }} diff --git a/src/arrow/dev/tasks/linux-packages/yum/build.sh b/src/arrow/dev/tasks/linux-packages/yum/build.sh new file mode 100755 index 000000000..5224f23e9 --- /dev/null +++ b/src/arrow/dev/tasks/linux-packages/yum/build.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# -*- sh-indentation: 2; sh-basic-offset: 2 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -u + +run() +{ + "$@" + if test $? -ne 0; then + echo "Failed $@" + exit 1 + fi +} + +rpmbuild_options= + +. /host/env.sh + +if grep -q amazon /etc/system-release-cpe; then + distribution=$(cut -d ":" -f 5 /etc/system-release-cpe | tr '_' '-') + distribution_version=$(cut -d ":" -f 6 /etc/system-release-cpe) +else + distribution=$(cut -d ":" -f 4 /etc/system-release-cpe) + distribution_version=$(cut -d ":" -f 5 /etc/system-release-cpe) +fi +distribution_version=$(echo ${distribution_version} | sed -e 's/\..*$//g') + +architecture="$(arch)" +lib_directory=/usr/lib64 +case "${architecture}" in + i*86) + architecture=i386 + lib_directory=/usr/lib + ;; +esac + +run mkdir -p /build +run cd /build +find . -not -path ./ccache -a -not -path "./ccache/*" -delete +if which ccache > /dev/null 2>&1; then + export CCACHE_COMPILERCHECK=content + export CCACHE_COMPRESS=1 + export CCACHE_COMPRESSLEVEL=6 + export CCACHE_MAXSIZE=500M + export CCACHE_DIR="${PWD}/ccache" + ccache --show-stats + if [ -d "${lib_directory}/ccache" ]; then + PATH="${lib_directory}/ccache:$PATH" + fi +fi + +run mkdir -p rpmbuild +run cd +rm -rf rpmbuild +run ln -fs /build/rpmbuild ./ +if [ -x /usr/bin/rpmdev-setuptree ]; then + rm -rf .rpmmacros + run rpmdev-setuptree +else + run cat <<RPMMACROS > ~/.rpmmacros +%_topdir ${HOME}/rpmbuild +RPMMACROS + run mkdir -p rpmbuild/SOURCES + run mkdir -p rpmbuild/SPECS + run mkdir -p rpmbuild/BUILD + run mkdir -p rpmbuild/RPMS + run mkdir -p rpmbuild/SRPMS +fi + +repositories="/host/repositories" +repository="${repositories}/${distribution}/${distribution_version}" +rpm_dir="${repository}/${architecture}/Packages" +srpm_dir="${repository}/source/SRPMS" +run mkdir -p "${rpm_dir}" "${srpm_dir}" + +# for debug +# rpmbuild_options="$rpmbuild_options --define 'optflags -O0 -g3'" + +if [ -n "${SOURCE_ARCHIVE}" ]; then + case "${RELEASE}" in + 0.dev*) + source_archive_base_name=$( \ + echo ${SOURCE_ARCHIVE} | sed -e 's/\.tar\.gz$//') + run tar xf /host/tmp/${SOURCE_ARCHIVE} \ + --transform="s,^[^/]*,${PACKAGE}," + run mv \ + ${PACKAGE} \ + ${source_archive_base_name} + run tar czf \ + rpmbuild/SOURCES/${SOURCE_ARCHIVE} \ + ${source_archive_base_name} + run rm -rf ${source_archive_base_name} + ;; + *) + run cp /host/tmp/${SOURCE_ARCHIVE} rpmbuild/SOURCES/ + ;; + esac +else + run cp /host/tmp/${PACKAGE}-${VERSION}.* rpmbuild/SOURCES/ +fi +run cp \ + /host/tmp/${PACKAGE}.spec \ + rpmbuild/SPECS/ + +run cat <<BUILD > build.sh +#!/usr/bin/env bash + +rpmbuild -ba ${rpmbuild_options} rpmbuild/SPECS/${PACKAGE}.spec +BUILD +run chmod +x build.sh +if [ -n "${SCL:-}" ]; then + run cat <<WHICH_STRIP > which-strip.sh +#!/usr/bin/env bash + +which strip +WHICH_STRIP + run chmod +x which-strip.sh + run cat <<USE_SCL_STRIP >> ~/.rpmmacros +%__strip $(run scl enable ${SCL} ./which-strip.sh) +USE_SCL_STRIP + if [ "${DEBUG:-no}" = "yes" ]; then + run scl enable ${SCL} ./build.sh + else + run scl enable ${SCL} ./build.sh > /dev/null + fi +else + if [ "${DEBUG:-no}" = "yes" ]; then + run ./build.sh + else + run ./build.sh > /dev/null + fi +fi + +if which ccache > /dev/null 2>&1; then + ccache --show-stats +fi + +run mv rpmbuild/RPMS/*/* "${rpm_dir}/" +run mv rpmbuild/SRPMS/* "${srpm_dir}/" + +run chown -R "$(stat --format "%u:%g" "${repositories}")" "${repositories}" diff --git a/src/arrow/dev/tasks/macros.jinja b/src/arrow/dev/tasks/macros.jinja new file mode 100644 index 000000000..be265caa4 --- /dev/null +++ b/src/arrow/dev/tasks/macros.jinja @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{%- macro github_header() -%} +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow +on: + push: + branches: + - "*-github-*" +{% endmacro %} + +{%- macro github_checkout_arrow() -%} + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow config core.symlinks true + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh +{% endmacro %} + +{%- macro github_login_dockerhub() -%} + - name: Login to Dockerhub + uses: docker/login-action@v1 + with: + username: {{ '${{ secrets.DOCKERHUB_USER }}' }} + password: {{ '${{ secrets.DOCKERHUB_TOKEN }}' }} +{% endmacro %} + +{%- macro github_login_ghcr() -%} + - name: Login to GitHub Container Registry + shell: bash + run: docker login ghcr.io -u {{ '${{ github.repository_owner }}' }} -p {{ '${{ secrets.CROSSBOW_GHCR_TOKEN }}' }} +{% endmacro %} + +{%- macro github_install_archery() -%} + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install Archery + shell: bash + run: pip install -e arrow/dev/archery[all] +{% endmacro %} + +{%- macro github_upload_releases(pattern) -%} + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Setup Crossbow + shell: bash + run: pip install -e arrow/dev/archery[crossbow-upload] + - name: Upload artifacts + shell: bash + run: | + archery crossbow \ + --queue-path $(pwd) \ + --queue-remote {{ queue_remote_url }} \ + upload-artifacts \ + --sha {{ task.branch }} \ + --tag {{ task.tag }} \ + {% if pattern is string %} + "{{ pattern }}" + {% elif pattern is iterable %} + {% for p in pattern %} + "{{ p }}" {{ "\\" if not loop.last else "" }} + {% endfor %} + {% endif %} + env: + CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }} +{% endmacro %} + +{%- macro github_upload_gemfury(pattern) -%} + {%- if arrow.branch == 'master' -%} + - name: Upload package to Gemfury + shell: bash + run: | + path=$(ls {{ pattern }}) + curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/ + env: + CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }} + CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }} + {% endif %} +{% endmacro %} + +{%- macro azure_checkout_arrow() -%} + - script: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + displayName: Clone arrow +{% endmacro %} + +{%- macro azure_upload_releases(pattern) -%} + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.8' + - script: pip install -e arrow/dev/archery[crossbow-upload] + displayName: Install Crossbow + - bash: | + archery crossbow \ + --queue-path $(pwd) \ + --queue-remote {{ queue_remote_url }} \ + upload-artifacts \ + --sha {{ task.branch }} \ + --tag {{ task.tag }} \ + {% if pattern is string %} + "{{ pattern }}" + {% elif pattern is iterable %} + {% for p in pattern %} + "{{ p }}" {{ "\\" if not loop.last else "" }} + {% endfor %} + {% endif %} + env: + CROSSBOW_GITHUB_TOKEN: $(CROSSBOW_GITHUB_TOKEN) + displayName: Upload packages as a GitHub release +{% endmacro %} + +{%- macro azure_upload_anaconda(pattern) -%} + {%- if arrow.branch == 'master' -%} + - task: CondaEnvironment@1 + inputs: + packageSpecs: 'anaconda-client' + installOptions: '-c conda-forge' + updateConda: no + - script: | + conda install -y anaconda-client + anaconda -t $(CROSSBOW_ANACONDA_TOKEN) upload --force {{ pattern }} + displayName: Upload packages to Anaconda + {% endif %} +{% endmacro %} + +{%- macro travis_checkout_arrow() -%} + - git clone --no-checkout {{ arrow.remote }} arrow + - git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + - git -C arrow checkout FETCH_HEAD + - git -C arrow submodule update --init --recursive +{% endmacro %} + +{%- macro travis_install_archery() -%} + - sudo -H pip3 install --upgrade pip + - sudo -H pip3 install docker-compose + - sudo -H pip3 install -e arrow/dev/archery[docker] +{% endmacro %} + +{%- macro travis_docker_login() -%} + - echo "${DOCKERHUB_TOKEN}" | docker login --username "${DOCKERHUB_USER}" --password-stdin +{% endmacro %} + +{%- macro travis_upload_releases(pattern) -%} + - sudo -H pip3 install pygit2==1.0 + - sudo -H pip3 install -e arrow/dev/archery[crossbow-upload] + - | + archery crossbow \ + --queue-path $(pwd) \ + --queue-remote {{ queue_remote_url }} \ + upload-artifacts \ + --sha {{ task.branch }} \ + --tag {{ task.tag }} \ + {% if pattern is string %} + "{{ pattern }}" + {% elif pattern is iterable %} + {% for p in pattern %} + "{{ p }}" {{ "\\" if not loop.last else "" }} + {% endfor %} + {% endif %} +{% endmacro %} + +{%- macro travis_upload_gemfury(pattern) -%} + {%- if arrow.branch == 'master' -%} + - | + WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl) + curl \ + -F "package=@${WHEEL_PATH}" \ + "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/" + {% endif %} +{% endmacro %} diff --git a/src/arrow/dev/tasks/nightlies.sample.yml b/src/arrow/dev/tasks/nightlies.sample.yml new file mode 100644 index 000000000..710f7c0ad --- /dev/null +++ b/src/arrow/dev/tasks/nightlies.sample.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this travis configuration can be used to submit cron scheduled tasks +# 1. copy this file to one of crossbow's branch (master for example) with +# filename .travis.yml +# 2. setup daily cron jobs for that particular branch, see travis' +# documentation https://docs.travis-ci.com/user/cron-jobs/ + +branches: + # don't attempt to build branches intended for windows builds + except: + - /.*win.*/ + +os: linux +dist: trusty +language: generic + +before_install: + # Install Miniconda. + - echo `pwd` + - | + echo "" + echo "Installing a fresh version of Miniconda." + MINICONDA_URL="https://repo.continuum.io/miniconda" + MINICONDA_FILE="Miniconda3-latest-Linux-x86_64.sh" + curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}" + bash $MINICONDA_FILE -b + + # Configure conda. + - | + echo "" + echo "Configuring conda." + source /home/travis/miniconda3/bin/activate root + conda config --remove channels defaults + conda config --add channels defaults + conda config --add channels conda-forge + conda config --set show_channel_urls true + +install: + - pushd .. + # to build against a specific branch of a fork + # git clone -b <branch> https://github.com/<user>/arrow + - git clone https://github.com/apache/arrow + - pip install dev/archery[crossbow] + +script: + # submit packaging tasks + - | + if [ $TRAVIS_EVENT_TYPE = "cron" ]; then + archery crossbow submit -g conda -g wheel -g linux + else + archery crossbow submit --dry-run -g conda -g wheel -g linux + fi diff --git a/src/arrow/dev/tasks/nuget-packages/github.linux.yml b/src/arrow/dev/tasks/nuget-packages/github.linux.yml new file mode 100644 index 000000000..cd03a7bfe --- /dev/null +++ b/src/arrow/dev/tasks/nuget-packages/github.linux.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + package: + name: Package + runs-on: ubuntu-latest + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Prepare version + run: | + sed -i'' -E -e \ + "s/^ <Version>.+<\/Version>/ <Version>{{ arrow.no_rc_semver_version }}<\/Version>/" \ + arrow/csharp/Directory.Build.props + - name: Build package + run: | + pushd arrow + archery docker run {{ run }} + popd + + {% set patterns = ["arrow/csharp/artifacts/**/*.nupkg", + "arrow/csharp/artifacts/**/*.snupkg"] %} + {{ macros.github_upload_releases(patterns)|indent }} diff --git a/src/arrow/dev/tasks/python-sdist/github.yml b/src/arrow/dev/tasks/python-sdist/github.yml new file mode 100644 index 000000000..68371876a --- /dev/null +++ b/src/arrow/dev/tasks/python-sdist/github.yml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + build: + name: "Build sdist" + runs-on: ubuntu-20.04 + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Build sdist + run: | + archery docker run python-sdist + {% if arrow.branch == 'master' %} + archery docker push python-sdist || : + {% endif %} + env: + PYARROW_VERSION: {{ arrow.no_rc_version }} + + - name: Test sdist + run: archery docker run ubuntu-python-sdist-test + env: + PYARROW_VERSION: {{ arrow.no_rc_version }} + + {{ macros.github_upload_releases("arrow/python/dist/*.tar.gz")|indent }} + {{ macros.github_upload_gemfury("arrow/python/dist/*.tar.gz")|indent }} diff --git a/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml b/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml new file mode 100644 index 000000000..dc2386482 --- /dev/null +++ b/src/arrow/dev/tasks/python-wheels/github.linux.amd64.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + build: + name: "Build wheel for Manylinux {{ manylinux_version }}" + runs-on: ubuntu-latest + env: + # archery uses these environment variables + ARCH: amd64 + PYTHON: "{{ python_version }}" + + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_install_archery()|indent }} + {{ macros.github_login_dockerhub()|indent }} + + - name: Build wheel + shell: bash + run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }} + + # TODO(kszucs): auditwheel show + - name: Test wheel + shell: bash + run: | + archery docker run python-wheel-manylinux-test-imports + archery docker run python-wheel-manylinux-test-unittests + + {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }} + {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }} + + {% if arrow.branch == 'master' %} + - name: Push Docker Image + shell: bash + run: | + archery docker push python-wheel-manylinux-{{ manylinux_version }} + archery docker push python-wheel-manylinux-test-unittests + {% endif %} diff --git a/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml b/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml new file mode 100644 index 000000000..8078abfd5 --- /dev/null +++ b/src/arrow/dev/tasks/python-wheels/github.osx.amd64.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +env: + ARROW_S3: {{ arrow_s3 }} + CC: "clang" + CXX: "clang++" + MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}" + PYARROW_BUILD_VERBOSE: 1 + PYARROW_VERSION: "{{ arrow.no_rc_version }}" + PYTHON_VERSION: "{{ python_version }}" + PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}" + VCPKG_DEFAULT_TRIPLET: x64-osx-static-release + VCPKG_FEATURE_FLAGS: "-manifests" + VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }} + VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }} + VCPKG_VERSION: "{{ vcpkg_version }}" + +jobs: + build: + name: Build wheel for OS X + runs-on: macos-10.15 + steps: + {{ macros.github_checkout_arrow()|indent }} + + - name: Install System Dependencies + run: brew install bash bison coreutils ninja cmake + + - uses: actions/cache@v2 + id: vcpkg-cache + with: + path: vcpkg + key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/*.patch', 'arrow/ci/vcpkg/*osx*.cmake') }}" }} + + - name: Install Vcpkg + if: steps.vcpkg-cache.outputs.cache-hit != 'true' + shell: bash + env: + MACOSX_DEPLOYMENT_TARGET: "10.15" + run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT + + - name: Install Packages + run: | + $VCPKG_ROOT/vcpkg install \ + abseil \ + boost-filesystem \ + brotli \ + bzip2 \ + c-ares \ + curl \ + flatbuffers \ + gflags \ + glog \ + grpc \ + lz4 \ + openssl \ + orc \ + protobuf \ + rapidjson \ + re2 \ + snappy \ + thrift \ + utf8proc \ + zlib \ + zstd + + {% if arrow_s3 == "ON" %} + - name: Install AWS SDK C++ + run: $VCPKG_ROOT/vcpkg install aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] + {% endif %} + + - name: Install Python {{ python_version }} + shell: bash + run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }} + + - name: Build Wheel + shell: bash + run: | + $PYTHON -m virtualenv build-env + source build-env/bin/activate + pip install --upgrade pip wheel + arrow/ci/scripts/python_wheel_macos_build.sh x86_64 $(pwd)/arrow $(pwd)/build + + - name: Test Wheel + shell: bash + run: | + $PYTHON -m virtualenv test-env + source test-env/bin/activate + pip install --upgrade pip wheel + arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow + + {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }} + {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }} diff --git a/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml b/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml new file mode 100644 index 000000000..e5456dbfc --- /dev/null +++ b/src/arrow/dev/tasks/python-wheels/github.osx.arm64.yml @@ -0,0 +1,157 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Prerequisits on the host: +# - brew install bash bison coreutils ninja cmake +# - sudo arrow/ci/scripts/install_python.sh macos 3.9 + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +env: + ARROW_FLIGHT: OFF + ARROW_JEMALLOC: OFF + ARROW_SIMD_LEVEL: "{{ arrow_simd_level }}" + CC: "clang" + CMAKE_BUILD_TYPE: release + CMAKE_CXX_COMPILER_LAUNCHER: "ccache" + CXX: "clang++" + MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}" + PYARROW_BUILD_VERBOSE: 1 + PYARROW_VERSION: "{{ arrow.no_rc_version }}" + PYTHON_VERSION: "{{ python_version }}" + PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}" + VCPKG_DEFAULT_TRIPLET: {{ arch }}-osx-static-release + VCPKG_FEATURE_FLAGS: "-manifests" + VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }} + VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }} + VCPKG_VERSION: "{{ vcpkg_version }}" + +jobs: + build: + name: Build wheel for OS X + runs-on: self-hosted + steps: + - name: Cleanup + shell: bash + run: rm -rf arrow vcpkg build crossbow-env build-env test-*-env + + {{ macros.github_checkout_arrow()|indent }} + + - name: Add Brew's Bison to PATH + shell: bash + run: echo "/opt/homebrew/opt/bison/bin" >> $GITHUB_PATH + + - name: Install Vcpkg + shell: bash + env: + MACOSX_DEPLOYMENT_TARGET: "11.0" + run: arch -arm64 arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT + + - name: Install OpenSSL + shell: bash + run: arch -arm64 $VCPKG_ROOT/vcpkg install openssl + + {% if arch == "universal2" %} + # OpenSSL doesn't provide an universal2 configuration yet, so vcpkg is + # unable to propagate the list of architectures from VCPKG_OSX_ARCHIETCTURES. + # In order to prevent link time warnings (which may turn out to be errors) + # we compile OpenSSL separately for the two architectures and merge the + # binaries into universal2 ones using `lipo`. + - name: Create universal binaries for OpenSSL + shell: bash + run: | + for arch in arm64 x64; do + VCPKG_DEFAULT_TRIPLET=${arch}-osx-static-release arch -arm64 $VCPKG_ROOT/vcpkg install openssl + done + for lib in libcrypto libssl; do + lipo -create $VCPKG_ROOT/installed/arm64-osx-static-release/lib/${lib}.a \ + $VCPKG_ROOT/installed/x64-osx-static-release/lib/${lib}.a \ + -output $VCPKG_ROOT/installed/universal2-osx-static-release/lib/${lib}.a + done + {% endif %} + + - name: Install Packages + run: | + arch -arm64 $VCPKG_ROOT/vcpkg install \ + aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \ + boost-filesystem \ + brotli \ + bzip2 \ + c-ares \ + curl \ + flatbuffers \ + gflags \ + glog \ + lz4 \ + orc \ + protobuf \ + rapidjson \ + re2 \ + snappy \ + thrift \ + utf8proc \ + zlib \ + zstd + + - name: Build Wheel + shell: bash + run: | + $PYTHON -m virtualenv build-env + source build-env/bin/activate + pip install --upgrade pip wheel + arch -arm64 arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build + + - name: Test Wheel on ARM64 + shell: bash + env: + PYTEST_ADDOPTS: "-k 'not test_cancellation'" + run: | + $PYTHON -m virtualenv test-arm64-env + source test-arm64-env/bin/activate + pip install --upgrade pip wheel + arch -arm64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow + + {% if arch == "universal2" %} + - name: Test Wheel on AMD64 + shell: bash + env: + PYTEST_ADDOPTS: "-k 'not test_cancellation'" + run: | + $PYTHON -m virtualenv test-amd64-env + source test-amd64-env/bin/activate + pip install --upgrade pip wheel + arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow + {% endif %} + + - name: Upload artifacts + shell: bash + run: | + $PYTHON -m virtualenv crossbow-env + source crossbow-env/bin/activate + arch -x86_64 pip install -e arrow/dev/archery[crossbow-upload] + arch -x86_64 archery crossbow \ + --queue-path $(pwd) \ + --queue-remote {{ queue_remote_url }} \ + upload-artifacts \ + --sha {{ task.branch }} \ + --tag {{ task.tag }} \ + "arrow/python/repaired_wheels/*.whl" + env: + CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }} + + {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }} diff --git a/src/arrow/dev/tasks/python-wheels/github.windows.yml b/src/arrow/dev/tasks/python-wheels/github.windows.yml new file mode 100644 index 000000000..f9989aed0 --- /dev/null +++ b/src/arrow/dev/tasks/python-wheels/github.windows.yml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + build: + name: "Build wheel for Windows" + runs-on: windows-2019 + env: + # archery uses this environment variable + PYTHON: "{{ python_version }}" + # this is a private repository at the moment (mostly because of licensing + # consideration of windows images with visual studio), but anyone can + # recreate the image by manually building it via: + # `archery build python-wheel-windows-vs2017` + # note that we don't run docker build since there wouldn't be a cache hit + # and rebuilding the dependencies takes a fair amount of time + REPO: ghcr.io/ursacomputing/arrow + # prefer the docker cli over docker-compose + ARCHERY_USE_DOCKER_CLI: 1 + + steps: + {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_login_ghcr()|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Build wheel + shell: cmd + run: archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017 + + # Two layers of the official python 3.6 windows image are not available for download. + # Docker pull returns with unexpected status resolving reader: 403 Forbidden. + # See https://issues.apache.org/jira/browse/ARROW-14424 + {% if python_version != "3.6" %} + - name: Test wheel + shell: cmd + run: archery docker run python-wheel-windows-test + {% endif %} + + {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }} + {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }} diff --git a/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml b/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml new file mode 100644 index 000000000..d32d89d83 --- /dev/null +++ b/src/arrow/dev/tasks/python-wheels/travis.linux.arm64.yml @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +arch: arm64-graviton2 +virt: vm +os: linux +dist: focal +group: edge +language: minimal + +addons: + apt: + packages: + - libgit2-dev + - python3-pip + +services: + - docker + +# don't build twice +if: tag IS blank + +env: + global: + - BUILD_REF={{ arrow.head }} + - TRAVIS_TAG={{ task.tag }} + # archery uses these environment variables + - ARCH=arm64v8 + - PYTHON="{{ python_version }}" + +before_script: + - set -e + {{ macros.travis_checkout_arrow() }} + {{ macros.travis_docker_login() }} + +script: + # Install Archery and Crossbow dependencies + {{ macros.travis_install_archery() }} + + # Build and Test packages + # output something every minutes to prevent travis from killing the build + - while sleep 1m; do echo "=====[ $SECONDS seconds still running ]====="; done & + - archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }} + - archery docker run python-wheel-manylinux-test-imports + - archery docker run python-wheel-manylinux-test-unittests + - kill %1 + +after_success: + # Upload wheel as github artifact + {{ macros.travis_upload_releases("arrow/python/repaired_wheels/*.whl") }} + {{ macros.travis_upload_gemfury("arrow/python/repaired_wheels/*.whl") }} + + {% if arrow.branch == 'master' %} + # Push the docker image to dockerhub + - archery docker push python-wheel-manylinux-{{ manylinux_version }} + - archery docker push python-wheel-manylinux-test-unittests + {% endif %} diff --git a/src/arrow/dev/tasks/r/azure.linux.yml b/src/arrow/dev/tasks/r/azure.linux.yml new file mode 100644 index 000000000..92e725f68 --- /dev/null +++ b/src/arrow/dev/tasks/r/azure.linux.yml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +jobs: + - job: linux + pool: + vmImage: ubuntu-latest + timeoutInMinutes: 360 + steps: + - script: | + set -ex + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + displayName: Clone arrow + + - script: | + set -ex + docker -v + docker-compose -v + cd arrow + export R_ORG={{ r_org }} + export R_IMAGE={{ r_image }} + export R_TAG={{ r_tag }} + export DEVTOOLSET_VERSION={{ devtoolset_version|default("-1") }} + docker-compose pull --ignore-pull-failures r + docker-compose build r + displayName: Docker build + + - script: | + set -ex + cd arrow + export R_ORG={{ r_org }} + export R_IMAGE={{ r_image }} + export R_TAG={{ r_tag }} + export ARROW_R_DEV={{ not_cran|default("TRUE") }} + # Note that by default, ci/scripts/r_test.sh sets NOT_CRAN=true + # if ARROW_R_DEV=TRUE. Pass `-e NOT_CRAN=false` to turn that off. + docker-compose run {{ flags|default("") }} r + displayName: Docker run + + - script: | + set -ex + cat arrow/r/check/arrow.Rcheck/00install.out + displayName: Dump install logs + condition: succeededOrFailed() + - script: | + set -ex + cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout* + displayName: Dump test logs + condition: succeededOrFailed() diff --git a/src/arrow/dev/tasks/r/github.devdocs.yml b/src/arrow/dev/tasks/r/github.devdocs.yml new file mode 100644 index 000000000..5591e6587 --- /dev/null +++ b/src/arrow/dev/tasks/r/github.devdocs.yml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + devdocs: + name: 'R devdocs {{ "${{ matrix.os }}" }}' + runs-on: {{ "${{ matrix.os }}" }} + strategy: + fail-fast: false + matrix: + os: [macOS-latest, ubuntu-20.04] + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-pandoc@v1 + - name: Install knitr, rmarkdown + run: | + install.packages(c("rmarkdown", "knitr", "sessioninfo")) + shell: Rscript {0} + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + - name: Remove system gfortran so that brew can install gcc successfully + run: rm -f /usr/local/bin/gfortran + - name: Write the install script + env: + RUN_DEVDOCS: TRUE + DEVDOCS_MACOS: {{ "${{contains(matrix.os, 'macOS')}}" }} + DEVDOCS_UBUNTU: {{ "${{contains(matrix.os, 'ubuntu')}}" }} + run: | + # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh + # which can be sourced to install arrow. + rmarkdown::render("arrow/r/vignettes/developing.Rmd") + shell: Rscript {0} + - name: Install from the devdocs + env: + LIBARROW_BINARY: FALSE + ARROW_R_DEV: TRUE + run: bash arrow/r/vignettes/script.sh + shell: bash + - name: Ensure that the Arrow package is loadable and we have the correct one + run: | + echo $LD_LIBRARY_PATH + R --no-save <<EOF + Sys.getenv("LD_LIBRARY_PATH") + library(arrow) + arrow_info() + EOF + shell: bash -l {0} + - name: Save the install script + uses: actions/upload-artifact@v2 + with: + name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }} + path: arrow/r/vignettes/script.sh + if: always() diff --git a/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml new file mode 100644 index 000000000..e48b67ac6 --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.arrow.version.back.compat.yml @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push + +jobs: + write-files: + name: "Write files" + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + env: + ARROW_R_DEV: "TRUE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - uses: r-lib/actions/setup-r@v1 + - name: Install dependencies + run: | + install.packages(c("remotes", "glue", "sys")) + remotes::install_deps("arrow/r", dependencies = TRUE) + shell: Rscript {0} + - name: Install Arrow + run: | + cd arrow/r + R CMD INSTALL . + shell: bash + - name: Write files + run: | + cd arrow/r + R -f extra-tests/write-files.R + shell: bash + + - name: Upload the parquet artifacts + uses: actions/upload-artifact@v2 + with: + name: files + path: arrow/r/extra-tests/files + + read-files: + name: "Read files with Arrow {{ '${{ matrix.config.old_arrow_version }}' }}" + needs: [write-files] + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + config: + # We use the R version that was released at the time of the arrow release in order + # to make sure we can download binaries from RSPM. + - { old_arrow_version: '5.0.0', r: '4.1' } + - { old_arrow_version: '4.0.0', r: '4.0' } + - { old_arrow_version: '3.0.0', r: '4.0' } + - { old_arrow_version: '2.0.0', r: '4.0' } + - { old_arrow_version: '1.0.1', r: '4.0' } + env: + ARROW_R_DEV: "TRUE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + OLD_ARROW_VERSION: {{ '${{ matrix.config.old_arrow_version }}' }} + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - uses: r-lib/actions/setup-r@v1 + with: + r-version: {{ '${{ matrix.config.r }}' }} + - name: Install old Arrow + run: | + install.packages(c("remotes", "testthat")) + remotes::install_version("arrow", "{{ '${{ matrix.config.old_arrow_version }}' }}") + shell: Rscript {0} + - name: Setup our testing directory, copy only the tests to it. + run: | + mkdir -p extra-tests/files + cp arrow/r/extra-tests/helper*.R extra-tests/ + cp arrow/r/extra-tests/test-*.R extra-tests/ + - name: Download artifacts + uses: actions/download-artifact@v2 + with: + name: files + path: extra-tests/files + - name: Test reading + run: | + testthat::test_dir("extra-tests") + shell: Rscript {0} diff --git a/src/arrow/dev/tasks/r/github.linux.cran.yml b/src/arrow/dev/tasks/r/github.linux.cran.yml new file mode 100644 index 000000000..03d22dcbf --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.cran.yml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + as-cran: + name: "rhub/{{ MATRIX }}" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # See https://hub.docker.com/r/rhub + r_image: + - debian-gcc-devel + - debian-gcc-patched + - debian-gcc-release + - fedora-gcc-devel + - fedora-clang-devel + env: + R_ORG: "rhub" + R_IMAGE: {{ MATRIX }} + R_TAG: "latest" + ARROW_R_DEV: "FALSE" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - name: Docker Pull + shell: bash + run: cd arrow && docker-compose pull --ignore-pull-failures r + - name: Docker Build + shell: bash + run: cd arrow && docker-compose build r + - name: Docker Run + shell: bash + run: cd arrow && docker-compose run r + - name: Dump install logs + run: cat arrow/r/check/arrow.Rcheck/00install.out + if: always() + - name: Dump test logs + run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout* + if: always() + - name: Save the test output + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-output + path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout* diff --git a/src/arrow/dev/tasks/r/github.linux.offline.build.yml b/src/arrow/dev/tasks/r/github.linux.offline.build.yml new file mode 100644 index 000000000..60685b18c --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.offline.build.yml @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + grab-dependencies: + name: "Download thirdparty dependencies" + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + env: + ARROW_R_DEV: "TRUE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - uses: r-lib/actions/setup-r@v1 + - name: Pull Arrow dependencies + run: | + cd arrow/r + # This is `make build`, but with no vignettes and not running `make doc` + cp ../NOTICE.txt inst/NOTICE.txt + rsync --archive --delete ../cpp tools/ + cp -p ../.env tools/ + cp -p ../NOTICE.txt tools/ + cp -p ../LICENSE.txt tools/ + R CMD build --no-build-vignettes --no-manual . + built_tar=$(ls -1 arrow*.tar.gz | head -n 1) + R -e "source('R/install-arrow.R'); create_package_with_all_dependencies(dest_file = 'arrow_with_deps.tar.gz', source_file = \"${built_tar}\")" + shell: bash + - name: Upload the third party dependency artifacts + uses: actions/upload-artifact@v2 + with: + name: thirdparty_deps + path: arrow/r/arrow_with_deps.tar.gz + + intall-offline: + name: "Install offline" + needs: [grab-dependencies] + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + env: + ARROW_R_DEV: TRUE + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - uses: r-lib/actions/setup-r@v1 + - name: Download artifacts + uses: actions/download-artifact@v2 + with: + name: thirdparty_deps + path: arrow/r/ + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt install libcurl4-openssl-dev libssl-dev + - name: Install dependencies + run: | + install.packages(c("remotes", "glue", "sys")) + remotes::install_deps("arrow/r", dependencies = TRUE) + shell: Rscript {0} + - name: Install + env: + TEST_OFFLINE_BUILD: true + LIBARROW_MINIMAL: false + run: | + cd arrow/r + R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz + - name: Run the tests + run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")' + - name: Dump test logs + run: cat arrow-tests/testthat.Rout* + if: always() + - name: Save the test output + uses: actions/upload-artifact@v2 + with: + name: test-output + path: arrow-tests/testthat.Rout* + if: always() diff --git a/src/arrow/dev/tasks/r/github.linux.rchk.yml b/src/arrow/dev/tasks/r/github.linux.rchk.yml new file mode 100644 index 000000000..72ff26969 --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.rchk.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + as-cran: + name: "rchk" + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + env: + ARROW_R_DEV: "FALSE" + RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-pandoc@v1 + - name: Install dependencies + run: | + install.packages("remotes") + remotes::install_deps("arrow/r", dependencies = TRUE) + shell: Rscript {0} + - name: Build arrow package + run: | + R CMD build --no-build-vignettes arrow/r + mkdir packages + mv arrow_*.tar.gz packages + - name: rchk + run: | + docker run -v `pwd`/packages:/rchk/packages kalibera/rchk:latest /rchk/packages/arrow_*.tar.gz |& tee rchk.out + - name: Confirm that rchk has no errors + # Suspicious call, [UP], and [PB] are all of the error types currently at + # https://github.com/kalibera/cran-checks/tree/master/rchk/results + # though this might not be exhaustive, there does not appear to be a way to have rchk return an error code + # CRAN also will remove some of the outputs (especially those related to Rcpp and strptime, e.g. + # ERROR: too many states (abstraction error?)) + # https://github.com/kalibera/rchk + run: | + if [ $(grep -c "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -c "\[UP\]" rchk.out) -gt 0 ] || [ $(grep -c "\[PB\]" rchk.out) -gt 0 ]; then + echo "Found rchk errors" + cat rchk.out + exit 1 + fi + if: always() + - name: Dump rchk output logs + run: cat rchk.out + if: always() diff --git a/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml b/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml new file mode 100644 index 000000000..80071171b --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.revdepcheck.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + r-versions: + name: "rstudio/r-base:latest-focal" + runs-on: ubuntu-latest + strategy: + fail-fast: false + env: + R_ORG: "rstudio" + R_IMAGE: "r-base" + R_TAG: "latest-focal" + ARROW_R_DEV: "TRUE" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - name: Docker Pull + shell: bash + run: cd arrow && docker-compose pull --ignore-pull-failures r + - name: Docker Build + shell: bash + run: cd arrow && docker-compose build r-revdepcheck + - name: Docker Run + shell: bash + run: cd arrow && docker-compose run r-revdepcheck + - name: revdepcheck CRAN report + if: always() + shell: bash + run: cat arrow/r/revdep/cran.md + - name: revdepcheck failures + if: always() + shell: bash + run: cat arrow/r/revdep/failures.md + - name: revdepcheck problems + if: always() + shell: bash + run: cat arrow/r/revdep/problems.md + - name: Save the revdep output + if: always() + uses: actions/upload-artifact@v2 + with: + name: revdepcheck-folder + path: arrow/r/revdep diff --git a/src/arrow/dev/tasks/r/github.linux.versions.yml b/src/arrow/dev/tasks/r/github.linux.versions.yml new file mode 100644 index 000000000..f383fe8d0 --- /dev/null +++ b/src/arrow/dev/tasks/r/github.linux.versions.yml @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + r-versions: + name: "rstudio/r-base:{{ MATRIX }}-bionic" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # See https://hub.docker.com/r/rstudio/r-base + r_version: + # We test devel, release, and oldrel in regular CI. + # This is for older versions + # rlang and vctrs depend on R >= 3.3 + - "3.3" + - "3.4" + - "3.5" + - "3.6" + env: + R_ORG: "rstudio" + R_IMAGE: "r-base" + R_TAG: "{{ MATRIX }}-bionic" + ARROW_R_DEV: "TRUE" + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Free Up Disk Space + shell: bash + run: arrow/ci/scripts/util_cleanup.sh + - name: Fetch Submodules and Tags + shell: bash + run: cd arrow && ci/scripts/util_checkout.sh + - name: Docker Pull + shell: bash + run: cd arrow && docker-compose pull --ignore-pull-failures r + - name: Docker Build + shell: bash + run: cd arrow && docker-compose build r + - name: Docker Run + shell: bash + run: cd arrow && docker-compose run r + - name: Dump install logs + run: cat arrow/r/check/arrow.Rcheck/00install.out + if: always() + - name: Dump test logs + run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout* + if: always() + - name: Save the test output + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-output + path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout* diff --git a/src/arrow/dev/tasks/r/github.macos-linux.local.yml b/src/arrow/dev/tasks/r/github.macos-linux.local.yml new file mode 100644 index 000000000..79e3332af --- /dev/null +++ b/src/arrow/dev/tasks/r/github.macos-linux.local.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + autobrew: + name: "install from local source" + runs-on: {{ "${{ matrix.os }}" }} + strategy: + fail-fast: false + matrix: + os: [macOS-latest, ubuntu-20.04] + + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Configure non-autobrew dependencies (macos) + run: | + cd arrow/r + brew install openssl + if: contains(matrix.os, 'macOS') + - name: Configure non-autobrew dependencies (linux) + run: | + cd arrow/r + sudo apt-get update + sudo apt install libcurl4-openssl-dev libssl-dev + if: contains(matrix.os, 'ubuntu') + - uses: r-lib/actions/setup-r@v1 + - name: Install dependencies + run: | + install.packages("remotes") + remotes::install_deps("arrow/r", dependencies = TRUE) + remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo")) + shell: Rscript {0} + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + - name: Install + env: + _R_CHECK_CRAN_INCOMING_: false + ARROW_USE_PKG_CONFIG: false + FORCE_BUNDLED_BUILD: true + LIBARROW_MINIMAL: false + ARROW_R_DEV: TRUE + run: | + cd arrow/r + R CMD INSTALL . --install-tests + - name: Run the tests + run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")' + - name: Dump test logs + run: cat arrow-tests/testthat.Rout* + if: failure() + - name: Save the test output + uses: actions/upload-artifact@v2 + with: + name: test-output + path: arrow-tests/testthat.Rout* + if: always() diff --git a/src/arrow/dev/tasks/r/github.macos.autobrew.yml b/src/arrow/dev/tasks/r/github.macos.autobrew.yml new file mode 100644 index 000000000..1b8500f64 --- /dev/null +++ b/src/arrow/dev/tasks/r/github.macos.autobrew.yml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + autobrew: + name: "Autobrew" + runs-on: macOS-latest + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Configure autobrew script + run: | + cd arrow/r + # Put the formula inside r/ so that it's included in the package build + cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb + # Pin the current commit in the formula to test so that we're not always pulling from master + sed -i.bak -E -e 's@https://github.com/apache/arrow.git"$@{{ arrow.remote }}.git", :revision => "{{ arrow.head }}"@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak + # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one + sed -i.bak -E -e 's@.git.git@.git@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak + # Get minio for S3 testing + brew install minio + - uses: r-lib/actions/setup-r@v1 + - name: Install dependencies + run: | + install.packages("remotes") + remotes::install_deps("arrow/r", dependencies = TRUE) + remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo")) + shell: Rscript {0} + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + - name: Check + env: + _R_CHECK_CRAN_INCOMING_: false + ARROW_USE_PKG_CONFIG: false + run: arrow/ci/scripts/r_test.sh arrow + - name: Dump install logs + run: cat arrow/r/check/arrow.Rcheck/00install.out + if: always() + - name: Dump test logs + run: cat arrow/r/check/arrow.Rcheck/tests/testthat.Rout* + if: always() + - name: Save the test output + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-output + path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout* diff --git a/src/arrow/dev/tasks/tasks.yml b/src/arrow/dev/tasks/tasks.yml new file mode 100644 index 000000000..8e67d3116 --- /dev/null +++ b/src/arrow/dev/tasks/tasks.yml @@ -0,0 +1,1308 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +groups: + # these groups are just for convenience + # makes it easier to submit related tasks + +{############################# Packaging tasks ###############################} + + conda: + - conda-* + + wheel: + - wheel-* + + linux: + - almalinux-* + - amazon-linux-* + - centos-* + - debian-* + - ubuntu-* + + linux-amd64: + - almalinux-*-amd64 + - amazon-linux-*-amd64 + - centos-*-amd64 + - debian-*-amd64 + - ubuntu-*-amd64 + + linux-arm64: + - almalinux-*-arm64 + - centos-*-arm64 + - debian-*-arm64 + - ubuntu-*-arm64 + + homebrew: + - homebrew-* + + packaging: + - almalinux-* + - amazon-linux-* + - centos-* + - debian-* + - java-jars + - nuget + - python-sdist + - ubuntu-* + - wheel-* + +{############################# Testing tasks #################################} + + test: + - test-* + + cpp: + - test-*cpp* + + c-glib: + - test-*c-glib* + + python: + - test-*python* + + r: + - test*-r-* + - homebrew-r-autobrew + # r-conda tasks + - conda-linux-gcc-py*-cpu-r* + - conda-osx-clang-py*-r* + - conda-win-vs2017-py*-r* + + ruby: + - test-*ruby* + + vcpkg: + - test-*vcpkg* + + integration: + - test-*dask* + - test-*hdfs* + - test-*kartothek* + - test-*pandas* + - test-*spark* + # TEMP disable because those are failing due to needing upstream fix (ARROW-13594) + # - test-*turbodbc* + + example: + - example-* + + example-cpp: + - example-*cpp* + + verify-rc: + - verify-rc-* + + verify-rc-binaries: + - verify-rc-binaries-* + + verify-rc-wheels: + - verify-rc-wheels-* + + verify-rc-source: + - verify-rc-source-* + + verify-rc-source-macos: + - verify-rc-source-*-macos-* + + verify-rc-source-linux: + - verify-rc-source-*-linux-* + +{######################## Tasks to run regularly #############################} + + nightly: + - almalinux-* + - amazon-linux-* + - debian-* + - ubuntu-* + - centos-* + - conda-* + - java-jars + # List the homebrews explicitly because we don't care about running homebrew-cpp-autobrew + - homebrew-cpp + - homebrew-r-autobrew + - nuget + - test-* + - example-* + - wheel-* + - python-sdist + +tasks: + # arbitrary_task_name: + # template: path of jinja2 templated yml + # params: optional extra parameters + # artifacts: list of regex patterns, each needs to match a single github + # release asset, version variable is replaced in the pattern + # e.g.: + # - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2 + +{############################## Conda Linux ##################################} + + conda-clean: + ci: azure + template: conda-recipes/azure.clean.yml + + # Important notes on the conda setup here: + # + # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in + # the same feedstock as the dependency matrix is the same for them as + # Python and the OS are the main dimension. The R package `r-arrow` is + # an independent feedstock as it doesn't have the Python but the + # R dimension. To limit the number of CI jobs, we are building `r-arrow` + # for R 4.0 with the Python 3.6 jobs and for R 4.1 with the Python 3.7 jobs. + # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically + # generated and to be synced regularly from the feedstock. We have no way + # yet to generate them inside the arrow repository automatically. + + conda-linux-gcc-py36-cpu-r40: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython + r_config: linux_64_r_base4.0 + artifacts: + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-linux-gcc-py37-cpu-r41: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython + r_config: linux_64_r_base4.1 + artifacts: + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-linux-gcc-py38-cpu: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-linux-gcc-py39-cpu: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + +{% for python_version, numpy_version in [("3.6", "1.17"), + ("3.7", "1.17"), + ("3.8", "1.17"), + ("3.9", "1.19"), + ("3.10", "1.21")] %} + {% set pyver = python_version | replace(".", "") %} + + conda-linux-gcc-py{{ pyver }}-cuda: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_64_cuda_compiler_version10.2numpy{{ numpy_version }}python{{ python_version }}.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2 + - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2 + + conda-linux-gcc-py{{ pyver }}-arm64: + ci: azure + template: conda-recipes/azure.linux.yml + params: + config: linux_aarch64_numpy{{ numpy_version }}python{{ python_version }}.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2 + +{% endfor %} + + ############################## Conda OSX #################################### + + conda-osx-clang-py36-r40: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_64_numpy1.17python3.6.____cpython + r_config: osx_64_r_base4.0 + artifacts: + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-osx-clang-py37-r41: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_64_numpy1.17python3.7.____cpython + r_config: osx_64_r_base4.1 + artifacts: + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-osx-clang-py38: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_64_numpy1.17python3.8.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-osx-clang-py39: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_64_numpy1.19python3.9.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-osx-arm64-clang-py38: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_arm64_python3.8.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-osx-arm64-clang-py39: + ci: azure + template: conda-recipes/azure.osx.yml + params: + config: osx_arm64_python3.9.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2 + + ############################## Conda Windows ################################ + + conda-win-vs2017-py36-r40: + ci: azure + template: conda-recipes/azure.win.yml + params: + config: win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython + r_config: win_64_r_base4.0 + artifacts: + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-win-vs2017-py37-r41: + ci: azure + template: conda-recipes/azure.win.yml + params: + config: win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython + r_config: win_64_r_base4.1 + artifacts: + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-win-vs2017-py38: + ci: azure + template: conda-recipes/azure.win.yml + params: + config: win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + + conda-win-vs2017-py39: + ci: azure + template: conda-recipes/azure.win.yml + params: + config: win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython + artifacts: + - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2 + + +{% for python_version, python_tag, abi_tag in [("3.6", "cp36", "cp36m"), + ("3.7", "cp37", "cp37m"), + ("3.8", "cp38", "cp38"), + ("3.9", "cp39", "cp39"), + ("3.10", "cp310", "cp310")] %} + +{############################## Wheel Linux ##################################} + +{% for ci, arch, arch_alias, x_y, manylinux in [("github", "amd64", "x86_64", "2_12", "2010"), + ("github", "amd64", "x86_64", "2_17", "2014"), + ("travis", "arm64", "aarch64", "2_17", "2014")] %} + wheel-manylinux{{ manylinux }}-{{ python_tag }}-{{ arch }}: + ci: {{ ci }} + template: python-wheels/{{ ci }}.linux.{{ arch }}.yml + params: + python_version: "{{ python_version }}" + manylinux_version: {{ manylinux }} + artifacts: + - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-manylinux_{{ x_y }}_{{ arch_alias }}.manylinux{{ manylinux }}_{{ arch_alias }}.whl +{% endfor %} + +{############################## Wheel OSX ####################################} + +# enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl +{% for macos_version, macos_codename, arrow_s3 in [("10.9", "mavericks", "OFF"), + ("10.13", "high-sierra", "ON")] %} + {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %} + + wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64: + ci: github + template: python-wheels/github.osx.amd64.yml + params: + vcpkg_version: "2021.04.30" + python_version: "{{ python_version }}" + macos_deployment_target: {{ macos_version }} + arrow_s3: {{ arrow_s3 }} + artifacts: + - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl + +{% endfor %} + +{############################## Wheel Windows ################################} + + wheel-windows-{{ python_tag }}-amd64: + ci: github + template: python-wheels/github.windows.yml + params: + python_version: "{{ python_version }}" + artifacts: + - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl + +{% endfor %} + +{############################## Wheel OSX M1 #################################} + + # The python 3.8 universal2 installer has been built with macos deployment + # target 11.0, so we cannot build binaries with earlier deployment target + # otherwise distutils will raise a deployment target version mismatch error. + wheel-macos-big-sur-cp38-arm64: + ci: github + template: python-wheels/github.osx.arm64.yml + params: + arch: arm64 + arrow_simd_level: "DEFAULT" + vcpkg_version: "2021.04.30" + python_version: "3.8" + macos_deployment_target: "11.0" + + artifacts: + - pyarrow-{no_rc_version}-cp38-cp38-macosx_11_0_arm64.whl + +{% for python_version, python_tag in [("3.9", "cp39"), ("3.10", "cp310")] %} + wheel-macos-big-sur-{{ python_tag }}-arm64: + ci: github + template: python-wheels/github.osx.arm64.yml + params: + arch: arm64 + arrow_simd_level: "DEFAULT" + vcpkg_version: "2021.04.30" + python_version: "{{ python_version }}" + macos_deployment_target: "11.0" + artifacts: + - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl + + wheel-macos-big-sur-{{ python_tag }}-universal2: + ci: github + template: python-wheels/github.osx.arm64.yml + params: + arch: universal2 + # Universal2 builds for both rosetta and native, but we currently can't + # configure SIMD for both architectures at the same time + arrow_simd_level: "NONE" + vcpkg_version: "2021.04.30" + python_version: "{{ python_version }}" + macos_deployment_target: "10.13" + artifacts: + - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_10_13_universal2.whl +{% endfor %} + +{############################ Python sdist ####################################} + + python-sdist: + ci: github + template: python-sdist/github.yml + artifacts: + - pyarrow-{no_rc_version}.tar.gz + +{############################## Linux PKGS ####################################} + +{% for target in ["debian-buster", + "debian-bullseye", + "debian-bookworm", + "ubuntu-bionic", + "ubuntu-focal", + "ubuntu-hirsute", + "ubuntu-impish"] %} + {% for architecture in ["amd64", "arm64"] %} + {{ target }}-{{ architecture }}: + {% if architecture == "amd64" %} + ci: github + template: linux-packages/github.linux.amd64.yml + {% else %} + ci: travis + template: linux-packages/travis.linux.arm64.yml + {% endif %} + params: + {% if architecture == "amd64" %} + target: "{{ target }}" + {% else %} + target: "{{ target }}-arm64" + {% endif %} + task_namespace: "apt" + upload_extensions: + - .ddeb + - .deb + - .debian.tar.xz + - .dsc + - .orig.tar.gz + artifacts: + {% if architecture == "amd64" %} + - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz + - apache-arrow-apt-source_{no_rc_version}-1.dsc + - apache-arrow-apt-source_{no_rc_version}-1_all.deb + - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz + - apache-arrow_{no_rc_version}-1.debian.tar.xz + - apache-arrow_{no_rc_version}-1.dsc + - apache-arrow_{no_rc_version}.orig.tar.gz + {% endif %} + - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - gir1.2-arrow-flight-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-python-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-python-flight600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-python600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-python600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow600_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva600_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet600_{no_rc_version}-1_[a-z0-9]+.deb + {% if architecture == "amd64" %} + - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda600_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libplasma-glib600_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libplasma600_{no_rc_version}-1_[a-z0-9]+.deb + - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb + {% endif %} + {% endfor %} +{% endfor %} + +{% for target in ["almalinux-8", + "amazon-linux-2", + "centos-7", + "centos-8"] %} + {% set is_rhel8_based = (target == "almalinux-8" or target == "centos-8") %} + {% for architecture in ["amd64", "arm64"] %} + {% if not (target in ["amazon-linux-2", "centos-7"] and architecture == "arm64") %} + {{ target }}-{{ architecture }}: + {% if architecture == "amd64" %} + ci: github + template: linux-packages/github.linux.amd64.yml + {% else %} + ci: travis + template: linux-packages/travis.linux.arm64.yml + {% endif %} + params: + {% if architecture == "amd64" %} + target: "{{ target }}" + {% else %} + target: "{{ target }}-aarch64" + {% endif %} + task_namespace: "yum" + upload_extensions: + - .rpm + artifacts: + {% if architecture == "amd64" %} + - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.noarch.rpm + - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.src.rpm + {% endif %} + - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-dataset-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-dataset-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-dataset-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-dataset-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if target != "amazon-linux-2" %} + - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + {% if architecture == "amd64" %} + - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm + {% endif %} + {% if is_rhel8_based and architecture == "amd64" %} + - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - gandiva-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - parquet-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - parquet-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - parquet-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - parquet-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - parquet-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - parquet-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - parquet-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - plasma-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - plasma-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - plasma-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - plasma-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if is_rhel8_based %} + - plasma-store-server-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - plasma-store-server-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + {% endfor %} +{% endfor %} + + ############################## Homebrew Tasks ################################ + + homebrew-cpp: + ci: github + template: homebrew-formulae/github.macos.yml + params: + formula: apache-arrow.rb + + homebrew-cpp-autobrew: + ci: github + template: homebrew-formulae/github.macos.yml + params: + formula: autobrew/apache-arrow.rb + + homebrew-r-autobrew: + # This tests that the autobrew formula + script work in practice + ci: github + template: r/github.macos.autobrew.yml + + ############################## Arrow JAR's ################################## + + java-jars: + # Build jar's that contains cpp libraries dependencies + ci: github + template: java-jars/github.yml + artifacts: + - arrow-algorithm-{no_rc_version}-tests.jar + - arrow-algorithm-{no_rc_version}.jar + - arrow-algorithm-{no_rc_version}.pom + - arrow-avro-{no_rc_version}-tests.jar + - arrow-avro-{no_rc_version}.jar + - arrow-avro-{no_rc_version}.pom + - arrow-c-data-{no_rc_version}-tests.jar + - arrow-c-data-{no_rc_version}.jar + - arrow-c-data-{no_rc_version}.pom + - arrow-compression-{no_rc_version}-tests.jar + - arrow-compression-{no_rc_version}.jar + - arrow-compression-{no_rc_version}.pom + - arrow-dataset-{no_rc_version}-tests.jar + - arrow-dataset-{no_rc_version}.jar + - arrow-dataset-{no_rc_version}.pom + - arrow-format-{no_rc_version}-tests.jar + - arrow-format-{no_rc_version}.jar + - arrow-format-{no_rc_version}.pom + - arrow-gandiva-{no_rc_version}-tests.jar + - arrow-gandiva-{no_rc_version}.jar + - arrow-gandiva-{no_rc_version}.pom + - arrow-java-root-{no_rc_version}.pom + - arrow-jdbc-{no_rc_version}-tests.jar + - arrow-jdbc-{no_rc_version}.jar + - arrow-jdbc-{no_rc_version}.pom + - arrow-memory-{no_rc_version}.pom + - arrow-memory-core-{no_rc_version}-tests.jar + - arrow-memory-core-{no_rc_version}.jar + - arrow-memory-core-{no_rc_version}.pom + - arrow-memory-netty-{no_rc_version}-tests.jar + - arrow-memory-netty-{no_rc_version}.jar + - arrow-memory-netty-{no_rc_version}.pom + - arrow-memory-unsafe-{no_rc_version}-tests.jar + - arrow-memory-unsafe-{no_rc_version}.jar + - arrow-memory-unsafe-{no_rc_version}.pom + - arrow-orc-{no_rc_version}-tests.jar + - arrow-orc-{no_rc_version}.jar + - arrow-orc-{no_rc_version}.pom + - arrow-performance-{no_rc_version}-tests.jar + - arrow-performance-{no_rc_version}.jar + - arrow-performance-{no_rc_version}.pom + - arrow-plasma-{no_rc_version}-tests.jar + - arrow-plasma-{no_rc_version}.jar + - arrow-plasma-{no_rc_version}.pom + - arrow-tools-{no_rc_version}-jar-with-dependencies.jar + - arrow-tools-{no_rc_version}-tests.jar + - arrow-tools-{no_rc_version}.jar + - arrow-tools-{no_rc_version}.pom + - arrow-vector-{no_rc_version}-shade-format-flatbuffers.jar + - arrow-vector-{no_rc_version}-tests.jar + - arrow-vector-{no_rc_version}.jar + - arrow-vector-{no_rc_version}.pom + - flight-core-{no_rc_version}-jar-with-dependencies.jar + - flight-core-{no_rc_version}-shaded-ext.jar + - flight-core-{no_rc_version}-shaded.jar + - flight-core-{no_rc_version}-tests.jar + - flight-core-{no_rc_version}.jar + - flight-core-{no_rc_version}.pom + - flight-grpc-{no_rc_version}-tests.jar + - flight-grpc-{no_rc_version}.jar + - flight-grpc-{no_rc_version}.pom + + ############################## NuGet packages ############################### + + nuget: + ci: github + template: nuget-packages/github.linux.yml + params: + run: ubuntu-csharp + artifacts: + - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.nupkg + - Apache.Arrow.Flight.AspNetCore.{no_rc_version}.snupkg + - Apache.Arrow.Flight.{no_rc_version}.nupkg + - Apache.Arrow.Flight.{no_rc_version}.snupkg + - Apache.Arrow.{no_rc_version}.nupkg + - Apache.Arrow.{no_rc_version}.snupkg + + ########################### Release verification ############################ + +{% for target in ["binary", "yum", "apt"] %} + verify-rc-binaries-{{ target }}-amd64: + ci: github + template: verify-rc/github.linux.amd64.yml + params: + env: + TEST_DEFAULT: 0 + TEST_{{ target|upper }}: 1 + artifact: "binaries" +{% endfor %} + +{% for platform, arch, runner in [("linux", "amd64", "ubuntu-20.04"), + ("macos", "amd64", "macos-10.15")] %} + {% for target in ["cpp", + "csharp", + "go", + "integration", + "java", + "js", + "python", + "ruby"] %} + + verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + ci: github + template: verify-rc/github.{{ platform }}.{{ arch }}.yml + params: + env: + INSTALL_NODE: 0 + TEST_DEFAULT: 0 + TEST_{{ target|upper }}: 1 + artifact: "source" + github_runner: {{ runner }} + {% endfor %} +{% endfor %} + +{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %} + {% for target in ["cpp", + "csharp", + "go", + "integration", + "js", + "python", + "ruby"] %} + + verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + ci: github + template: verify-rc/github.{{ platform }}.{{ arch }}.yml + params: + env: + ARROW_FLIGHT: 0 + ARROW_GANDIVA: 0 + INSTALL_NODE: 0 + TEST_DEFAULT: 0 + TEST_INTEGRATION_JAVA: 0 + TEST_{{ target|upper }}: 1 + artifact: "source" + github_runner: {{ runner }} + {% endfor %} +{% endfor %} + + verify-rc-wheels-linux-amd64: + ci: github + template: verify-rc/github.linux.amd64.yml + params: + env: + TEST_DEFAULT: 0 + artifact: "wheels" + + verify-rc-wheels-macos-10.15-amd64: + ci: github + template: verify-rc/github.macos.amd64.yml + params: + github_runner: "macos-10.15" + env: + TEST_DEFAULT: 0 + artifact: "wheels" + + # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: + # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources + verify-rc-wheels-macos-11-amd64: + ci: github + template: verify-rc/github.macos.arm64.yml + params: + github_runner: "self-hosted" + arch_emulation: "x86_64" + env: + TEST_DEFAULT: 0 + artifact: "wheels" + + verify-rc-wheels-macos-11-arm64: + ci: github + template: verify-rc/github.macos.arm64.yml + params: + github_runner: "self-hosted" + arch_emulation: "arm64" + env: + TEST_DEFAULT: 0 + artifact: "wheels" + + verify-rc-source-windows: + ci: github + template: verify-rc/github.win.yml + params: + script: "verify-release-candidate.bat" + + verify-rc-wheels-windows: + ci: github + template: verify-rc/github.win.yml + params: + script: "verify-release-candidate-wheels.bat" + +{############################## Docker tests #################################} + +{% for image in ["conda-cpp", + "debian-c-glib", + "ubuntu-c-glib", + "debian-ruby", + "ubuntu-ruby"] %} + test-{{ image }}: + ci: github + template: docker-tests/github.linux.yml + params: + image: {{ image }} +{% endfor %} + + # Use azure to run valgrind tests to prevent OOM + test-conda-cpp-valgrind: + ci: azure + template: docker-tests/azure.linux.yml + params: + run: conda-cpp-valgrind + +{% for ubuntu_version in ["18.04", "20.04"] %} + test-ubuntu-{{ ubuntu_version }}-cpp: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: {{ ubuntu_version }} + image: ubuntu-cpp +{% endfor %} + + test-ubuntu-20.04-cpp-bundled: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 20.04 + image: ubuntu-cpp-bundled + + test-debian-11-cpp: + ci: github + template: docker-tests/github.linux.yml + params: + env: + DEBIAN: 11 + image: debian-cpp + + test-fedora-33-cpp: + ci: github + template: docker-tests/github.linux.yml + params: + env: + FEDORA: 33 + image: fedora-cpp + + test-ubuntu-18.04-cpp-release: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 18.04 + flags: "-e ARROW_BUILD_TYPE=release" + image: ubuntu-cpp + + test-ubuntu-18.04-cpp-static: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 18.04 + flags: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static" + image: ubuntu-cpp + +{% for cpp_standard in [14, 17] %} + test-ubuntu-20.04-cpp-{{ cpp_standard }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 20.04 + flags: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }}" + image: ubuntu-cpp +{% endfor %} + + test-ubuntu-20.04-cpp-thread-sanitizer: + ci: github + template: docker-tests/github.linux.yml + params: + env: + # clang-tools and llvm version need to be synchronized so as + # to have the right llvm-symbolizer version + CLANG_TOOLS: 11 + LLVM: 11 + UBUNTU: 20.04 + image: ubuntu-cpp-thread-sanitizer + +{% for python_version in ["3.6", "3.7", "3.8", "3.9", "3.10"] %} + test-conda-python-{{ python_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: "{{ python_version }}" + image: conda-python +{% endfor %} + + test-conda-python-3.8-hypothesis: + ci: github + template: docker-tests/github.linux.yml + params: + env: + HYPOTHESIS_PROFILE: ci + PYARROW_TEST_HYPOTHESIS: ON + PYTHON: 3.8 + # limit to execute hypothesis tests only + PYTEST_ARGS: "-m hypothesis" + image: conda-python-pandas + + test-debian-11-python-3: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + DEBIAN: 11 + run: debian-python + + test-ubuntu-18.04-python-3: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + UBUNTU: 18.04 + run: ubuntu-python + + test-fedora-33-python-3: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + FEDORA: 33 + run: fedora-python + + test-r-linux-valgrind: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + ARROW_R_DEV: "TRUE" + UBUNTU: 18.04 + run: ubuntu-r-valgrind + + test-r-linux-rchk: + ci: github + template: r/github.linux.rchk.yml + + test-r-linux-as-cran: + ci: github + template: r/github.linux.cran.yml + params: + MATRIX: {{ "${{ matrix.r_image }}" }} + + test-r-arrow-backwards-compatibility: + ci: github + template: r/github.linux.arrow.version.back.compat.yml + + test-r-versions: + ci: github + template: r/github.linux.versions.yml + params: + MATRIX: {{ "${{ matrix.r_version }}" }} + + test-r-install-local: + ci: github + template: r/github.macos-linux.local.yml + + test-r-devdocs: + ci: github + template: r/github.devdocs.yml + + test-r-depsource-auto: + ci: azure + template: r/azure.linux.yml + params: + r_org: rocker + r_image: r-base + r_tag: latest + flags: '-e ARROW_DEPENDENCY_SOURCE=AUTO' + + test-r-depsource-system: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 21.04 + CLANG_TOOLS: 9 # can remove this when >=9 is the default + flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_DEPENDENCY_SOURCE=SYSTEM' + image: ubuntu-r-only-r + + test-r-offline-minimal: + ci: azure + template: r/azure.linux.yml + params: + r_org: rocker + r_image: r-base + r_tag: latest + flags: '-e TEST_OFFLINE_BUILD=true' + + test-r-offline-maximal: + ci: github + template: r/github.linux.offline.build.yml + + +{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"), + ("rocker", "r-base", "latest"), + ("rstudio", "r-base", "3.6-bionic"), + ("rstudio", "r-base", "3.6-centos8"), + ("rstudio", "r-base", "3.6-opensuse15"), + ("rstudio", "r-base", "3.6-opensuse42")] %} + test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}: + ci: azure + template: r/azure.linux.yml + params: + r_org: {{ r_org }} + r_image: {{ r_image }} + r_tag: {{ r_tag }} +{% endfor %} + + # This is with R built with --enable-lto + # CRAN also does R CMD INSTALL --use-LTO + # which overrides the UseLTO field in r/DESCRIPTION + test-r-rhub-debian-gcc-devel-lto-latest: + ci: azure + template: r/azure.linux.yml + params: + r_org: rhub + r_image: debian-gcc-devel-lto + r_tag: latest + flags: '-e NOT_CRAN=false -e INSTALL_ARGS=--use-LTO' + + # This one has -flto=auto + test-r-ubuntu-21.04: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 21.04 + CLANG_TOOLS: 9 # can remove this when >=9 is the default + flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE' + image: ubuntu-r-only-r + + # This also has -flto=auto + test-r-gcc-11: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 21.04 + CLANG_TOOLS: 9 # can remove this when >=9 is the default + GCC_VERSION: 11 + # S3 support is not buildable with gcc11 right now + flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_S3=OFF' + image: ubuntu-r-only-r + + test-r-rstudio-r-base-3.6-centos7-devtoolset-8: + ci: azure + template: r/azure.linux.yml + params: + r_org: rstudio + r_image: r-base + r_tag: 3.6-centos7 + devtoolset_version: 8 + + test-r-minimal-build: + ci: azure + template: r/azure.linux.yml + params: + r_org: rocker + r_image: r-base + r_tag: latest + flags: "-e LIBARROW_MINIMAL=TRUE" + + test-ubuntu-18.04-r-sanitizer: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + UBUNTU: 18.04 + run: ubuntu-r-sanitizer + + revdep-r-check: + ci: github + template: r/github.linux.revdepcheck.yml + + test-debian-11-go-1.15: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + DEBIAN: 11 + GO: 1.15 + run: debian-go + + test-ubuntu-20.10-docs: + ci: azure + template: docker-tests/azure.linux.yml + params: + env: + UBUNTU: "20.10" + run: ubuntu-docs + + test-ubuntu-default-docs: + ci: azure + template: docker-tests/azure.linux.yml + params: + run: ubuntu-docs + + ############################## vcpkg tests ################################## + + test-build-vcpkg-win: + ci: github + template: vcpkg-tests/github.windows.yml + + ############################## Integration tests ############################ + +{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.6", "0.23", "1.16", True), + ("3.7", "0.24", "1.19", True), + ("3.7", "latest", "latest", False), + ("3.8", "latest", "latest", False), + ("3.8", "nightly", "nightly", False), + ("3.9", "master", "nightly", False)] %} + test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: {{ python_version }} + PANDAS: {{ pandas_version }} + NUMPY: {{ numpy_version }} + {% if cache_leaf %} + # use the latest pandas release, so prevent reusing any cached layers + flags: --no-leaf-cache + {% endif %} + image: conda-python-pandas +{% endfor %} + +{% for dask_version in ["latest", "master"] %} + test-conda-python-3.9-dask-{{ dask_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: 3.9 + DASK: {{ dask_version }} + # use the latest dask release, so prevent reusing any cached layers + flags: --no-leaf-cache + image: conda-python-dask +{% endfor %} + +# TEMP disable because those are failing due to needing upstream fix (ARROW-13594) +# {% for turbodbc_version in ["latest", "master"] %} +# test-conda-python-3.7-turbodbc-{{ turbodbc_version }}: +# ci: github +# template: docker-tests/github.linux.yml +# params: +# env: +# PYTHON: 3.7 +# TURBODBC: {{ turbodbc_version }} +# # use the latest turbodbc release, so prevent reusing any cached layers +# flags: --no-leaf-cache +# image: conda-python-turbodbc +# {% endfor %} + +{% for kartothek_version in ["latest", "master"] %} + test-conda-python-3.7-kartothek-{{ kartothek_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: 3.7 + KARTOTHEK: {{ kartothek_version }} + flags: --no-leaf-cache + image: conda-python-kartothek +{% endfor %} + +{% for hdfs_version in ["2.9.2", "3.2.1"] %} + test-conda-python-3.7-hdfs-{{ hdfs_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: 3.7 + HDFS: {{ hdfs_version }} + image: conda-python-hdfs +{% endfor %} + +{% for python_version, spark_version, test_pyarrow_only in [("3.7", "v3.1.2", "false"), + ("3.8", "v3.2.0", "false"), + ("3.9", "master", "false")] %} + test-conda-python-{{ python_version }}-spark-{{ spark_version }}: + ci: github + template: docker-tests/github.linux.yml + params: + env: + PYTHON: {{ python_version }} + SPARK: {{ spark_version }} + TEST_PYARROW_ONLY: {{ test_pyarrow_only }} + # use the branch-3.0 of spark, so prevent reusing any layers + flags: --no-leaf-cache + image: conda-python-spark +{% endfor %} + + # Remove the "skipped-" prefix in ARROW-8475 + skipped-test-conda-cpp-hiveserver2: + ci: github + template: docker-tests/github.linux.yml + params: + image: conda-cpp-hiveserver2 + +{% for kind in ["static", "static-system-dependency"] %} + example-cpp-minimal-build-{{ kind }}: + ci: github + template: cpp-examples/github.linux.yml + params: + type: minimal_build + run: {{ kind }} +{% endfor %} diff --git a/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat new file mode 100644 index 000000000..6423720c2 --- /dev/null +++ b/src/arrow/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat @@ -0,0 +1,86 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@rem Run VsDevCmd.bat to set Visual Studio environment variables for building +@rem on the command line. This is the path for Visual Studio Enterprise 2019 + +call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 + + +@rem Install build dependencies with vcpkg + +@rem TODO(ianmcook): change --x-manifest-root to --manifest-root after it +@rem changes in vcpkg + +vcpkg install ^ + --triplet x64-windows ^ + --x-manifest-root cpp ^ + --feature-flags=versions ^ + --clean-after-build ^ + || exit /B 1 + + +@rem Set environment variables + +set ARROW_TEST_DATA=%cd%\testing\data +set PARQUET_TEST_DATA=%cd%\cpp\submodules\parquet-testing\data + + +@rem Build Arrow C++ library + +mkdir cpp\build +pushd cpp\build + +@rem TODO(ianmcook): test using --parallel %NUMBER_OF_PROCESSORS% with +@rem cmake --build instead of specifying -DARROW_CXXFLAGS="/MP" here +@rem (see https://gitlab.kitware.com/cmake/cmake/-/issues/20564) + +@rem TODO(ianmcook): Add -DARROW_BUILD_BENCHMARKS=ON after the issue described +@rem at https://github.com/google/benchmark/issues/1046 is resolved + +cmake -G "Visual Studio 16 2019" -A x64 ^ + -DARROW_BOOST_USE_SHARED=ON ^ + -DARROW_BUILD_SHARED=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=ON ^ + -DARROW_CXXFLAGS="/MP" ^ + -DARROW_DATASET=ON ^ + -DARROW_DEPENDENCY_SOURCE=VCPKG ^ + -DARROW_FLIGHT=OFF ^ + -DARROW_MIMALLOC=ON ^ + -DARROW_PARQUET=ON ^ + -DARROW_PYTHON=OFF ^ + -DARROW_WITH_BROTLI=ON ^ + -DARROW_WITH_BZ2=ON ^ + -DARROW_WITH_LZ4=ON ^ + -DARROW_WITH_SNAPPY=ON ^ + -DARROW_WITH_ZLIB=ON ^ + -DARROW_WITH_ZSTD=ON ^ + -DCMAKE_BUILD_TYPE=release ^ + -DCMAKE_UNITY_BUILD=ON ^ + .. || exit /B 1 + +cmake --build . --target INSTALL --config Release || exit /B 1 + + +@rem Test Arrow C++ library + +ctest --output-on-failure ^ + --parallel %NUMBER_OF_PROCESSORS% ^ + --timeout 300 || exit /B 1 + +popd diff --git a/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml b/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml new file mode 100644 index 000000000..ad3e793a6 --- /dev/null +++ b/src/arrow/dev/tasks/vcpkg-tests/github.windows.yml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: must set "Crossbow" as name to have the badge links working in the +# github comment reports! +name: Crossbow + +on: + push: + branches: + - "*-github-*" + +jobs: + test-vcpkg-win: + name: Install build deps with vcpkg and build Arrow C++ + runs-on: windows-2019 + steps: + - name: Checkout Arrow + run: | + git clone --no-checkout {{ arrow.remote }} arrow + git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }} + git -C arrow checkout FETCH_HEAD + git -C arrow submodule update --init --recursive + - name: Remove and Reinstall vcpkg + # When running vcpkg in Github Actions on Windows, remove the + # preinstalled vcpkg and install the newest version from source. + # Versions of vcpkg rapidly stop working until updated, and + # the safest and most reliable way to update vcpkg is simply + # to remove and reinstall it. + shell: cmd + run: | + CALL vcpkg integrate remove 2>NUL + CALL C: + CALL cd \ + CALL rmdir /s /q vcpkg 2>NUL + CALL git clone https://github.com/microsoft/vcpkg.git vcpkg + CALL cd vcpkg + CALL bootstrap-vcpkg.bat -win64 -disableMetrics + CALL vcpkg integrate install + CALL setx PATH "%PATH%;C:\vcpkg" + - name: Install Dependencies with vcpkg and Build Arrow C++ + shell: cmd + run: | + CALL cd arrow + CALL dev\tasks\vcpkg-tests\cpp-build-vcpkg.bat diff --git a/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml b/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml new file mode 100644 index 000000000..8a4613a49 --- /dev/null +++ b/src/arrow/dev/tasks/verify-rc/github.linux.amd64.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + verify: + name: "Verify release candidate Ubuntu {{ artifact }}" + runs-on: {{ github_runner|default("ubuntu-20.04") }} + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - name: Install System Dependencies + run: | + # TODO: don't require removing newer llvms + sudo apt-get --purge remove -y llvm-9 clang-9 + sudo apt-get update -y + sudo apt-get install -y \ + autoconf-archive \ + binfmt-support \ + bison \ + curl \ + flex \ + gtk-doc-tools \ + jq \ + libboost-all-dev \ + libgirepository1.0-dev \ + ninja-build \ + qemu-user-static \ + wget + + if [ "$TEST_JAVA" = "1" ]; then + # Maven + MAVEN_VERSION=3.6.3 + wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip + unzip apache-maven-$MAVEN_VERSION-bin.zip + mkdir -p $HOME/java + mv apache-maven-$MAVEN_VERSION $HOME/java + export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH + fi + + if [ "$TEST_RUBY" = "1" ]; then + ruby --version + sudo gem install bundler + fi + - uses: actions/setup-node@v2-beta + with: + node-version: '14' + - name: Run verification + shell: bash + run: | + arrow/dev/release/verify-release-candidate.sh \ + {{ artifact }} \ + {{ release|default("1.0.0") }} {{ rc|default("0") }} diff --git a/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml b/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml new file mode 100644 index 000000000..d39cda382 --- /dev/null +++ b/src/arrow/dev/tasks/verify-rc/github.macos.amd64.yml @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + verify: + name: "Verify release candidate macOS {{ artifact }}" + runs-on: {{ github_runner|default("macos-latest") }} + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - name: Install System Dependencies + shell: bash + run: | + brew update + brew bundle --file=arrow/cpp/Brewfile + brew bundle --file=arrow/c_glib/Brewfile + - uses: actions/setup-node@v2-beta + with: + node-version: '14' + - name: Run verification + shell: bash + run: | + arrow/dev/release/verify-release-candidate.sh \ + {{ artifact }} \ + {{ release|default("1.0.0") }} {{ rc|default("0") }} diff --git a/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml b/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml new file mode 100644 index 000000000..26139ed60 --- /dev/null +++ b/src/arrow/dev/tasks/verify-rc/github.macos.arm64.yml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + verify: + name: "Verify release candidate macOS {{ artifact }}" + runs-on: {{ github_runner }} + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + + steps: + - name: Cleanup + shell: bash + run: rm -rf arrow + + {{ macros.github_checkout_arrow()|indent }} + + - name: Run verification + shell: bash + run: | + export PATH="$(brew --prefix node@14)/bin:$PATH" + export PATH="$(brew --prefix ruby)/bin:$PATH" + export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig" + arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \ + {{ artifact }} \ + {{ release|default("1.0.0") }} {{ rc|default("0") }} diff --git a/src/arrow/dev/tasks/verify-rc/github.win.yml b/src/arrow/dev/tasks/verify-rc/github.win.yml new file mode 100644 index 000000000..5406327e8 --- /dev/null +++ b/src/arrow/dev/tasks/verify-rc/github.win.yml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + verify: + name: "Verify release candidate Windows source" + runs-on: windows-2016 + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - uses: conda-incubator/setup-miniconda@v2 + - name: Install System Dependencies + run: | + choco install boost-msvc-14.1 + choco install wget + - name: Run verification + shell: cmd + run: | + cd arrow + dev/release/{{ script }} {{ release|default("1.0.0") }} {{ rc|default("0") }} diff --git a/src/arrow/dev/test_merge_arrow_pr.py b/src/arrow/dev/test_merge_arrow_pr.py new file mode 100644 index 000000000..8fe188350 --- /dev/null +++ b/src/arrow/dev/test_merge_arrow_pr.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import namedtuple + +import pytest + +import merge_arrow_pr + + +FakeIssue = namedtuple('issue', ['fields']) +FakeFields = namedtuple('fields', ['status', 'summary', 'assignee', + 'components', 'fixVersions']) +FakeAssignee = namedtuple('assignee', ['displayName']) +FakeStatus = namedtuple('status', ['name']) +FakeComponent = namedtuple('component', ['name']) +FakeVersion = namedtuple('version', ['name', 'raw']) + +RAW_VERSION_JSON = [ + {'name': 'JS-0.4.0', 'released': False}, + {'name': '0.11.0', 'released': False}, + {'name': '0.12.0', 'released': False}, + {'name': '0.10.0', 'released': True}, + {'name': '0.9.0', 'released': True} +] + + +SOURCE_VERSIONS = [FakeVersion(raw['name'], raw) + for raw in RAW_VERSION_JSON] + +TRANSITIONS = [{'name': 'Resolve Issue', 'id': 1}] + +jira_id = 'ARROW-1234' +status = FakeStatus('In Progress') +fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), + [FakeComponent('C++'), FakeComponent('Format')], + []) +FAKE_ISSUE_1 = FakeIssue(fields) + + +class FakeJIRA: + + def __init__(self, issue=None, project_versions=None, transitions=None, + current_fix_versions=None): + self._issue = issue + self._project_versions = project_versions + self._transitions = transitions + + def issue(self, jira_id): + return self._issue + + def transitions(self, jira_id): + return self._transitions + + def transition_issue(self, jira_id, transition_id, comment=None, + fixVersions=None): + self.captured_transition = { + 'jira_id': jira_id, + 'transition_id': transition_id, + 'comment': comment, + 'fixVersions': fixVersions + } + + def get_candidate_fix_versions(self): + return SOURCE_VERSIONS, ['0.12.0'] + + def project_versions(self, project): + return self._project_versions + + +class FakeCLI: + + def __init__(self, responses=()): + self.responses = responses + self.position = 0 + + def prompt(self, prompt): + response = self.responses[self.position] + self.position += 1 + return response + + def fail(self, msg): + raise Exception(msg) + + +def test_jira_fix_versions(): + jira = FakeJIRA(project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + all_versions, default_versions = issue.get_candidate_fix_versions() + assert all_versions == SOURCE_VERSIONS + assert default_versions == ['0.11.0'] + + +def test_jira_no_suggest_patch_release(): + versions_json = [ + {'name': '0.11.1', 'released': False}, + {'name': '0.12.0', 'released': False}, + ] + + versions = [FakeVersion(raw['name'], raw) for raw in versions_json] + + jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + all_versions, default_versions = issue.get_candidate_fix_versions() + assert all_versions == versions + assert default_versions == ['0.12.0'] + + +def test_jira_parquet_no_suggest_non_cpp(): + # ARROW-7351 + versions_json = [ + {'name': 'cpp-1.5.0', 'released': True}, + {'name': 'cpp-1.6.0', 'released': False}, + {'name': 'cpp-1.7.0', 'released': False}, + {'name': '1.11.0', 'released': False}, + {'name': '1.12.0', 'released': False} + ] + + versions = [FakeVersion(raw['name'], raw) + for raw in versions_json] + + jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) + issue = merge_arrow_pr.JiraIssue(jira, 'PARQUET-1713', 'PARQUET', + FakeCLI()) + all_versions, default_versions = issue.get_candidate_fix_versions() + assert all_versions == versions + assert default_versions == ['cpp-1.6.0'] + + +def test_jira_invalid_issue(): + class Mock: + + def issue(self, jira_id): + raise Exception("not found") + + with pytest.raises(Exception): + merge_arrow_pr.JiraIssue(Mock(), 'ARROW-1234', 'ARROW', FakeCLI()) + + +def test_jira_resolve(): + jira = FakeJIRA(issue=FAKE_ISSUE_1, + project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + my_comment = 'my comment' + fix_versions = [SOURCE_VERSIONS[1].raw] + + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + issue.resolve(fix_versions, my_comment) + + assert jira.captured_transition == { + 'jira_id': 'ARROW-1234', + 'transition_id': 1, + 'comment': my_comment, + 'fixVersions': fix_versions + } + + +def test_jira_resolve_non_mainline(): + jira = FakeJIRA(issue=FAKE_ISSUE_1, + project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + my_comment = 'my comment' + fix_versions = [SOURCE_VERSIONS[0].raw] + + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + issue.resolve(fix_versions, my_comment) + + assert jira.captured_transition == { + 'jira_id': 'ARROW-1234', + 'transition_id': 1, + 'comment': my_comment, + 'fixVersions': fix_versions + } + + +def test_jira_resolve_released_fix_version(): + # ARROW-5083 + jira = FakeJIRA(issue=FAKE_ISSUE_1, + project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + cmd = FakeCLI(responses=['0.9.0']) + fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, jira) + assert fix_versions_json == [RAW_VERSION_JSON[-1]] + + +def test_multiple_authors_bad_input(): + a0 = 'Jimbob Crawfish <jimbob.crawfish@gmail.com>' + a1 = 'Jarvis McCratchett <jarvis.mccratchett@hotmail.com>' + a2 = 'Hank Miller <hank.miller@protonmail.com>' + distinct_authors = [a0, a1] + + cmd = FakeCLI(responses=['']) + primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( + cmd, distinct_authors) + assert primary_author == a0 + assert new_distinct_authors == [a0, a1] + + cmd = FakeCLI(responses=['oops', a1]) + primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( + cmd, distinct_authors) + assert primary_author == a1 + assert new_distinct_authors == [a1, a0] + + cmd = FakeCLI(responses=[a2]) + primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author( + cmd, distinct_authors) + assert primary_author == a2 + assert new_distinct_authors == [a2, a0, a1] + + +def test_jira_already_resolved(): + status = FakeStatus('Resolved') + fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), + [FakeComponent('Java')], []) + issue = FakeIssue(fields) + + jira = FakeJIRA(issue=issue, + project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + fix_versions = [SOURCE_VERSIONS[0].raw] + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + + with pytest.raises(Exception, + match="ARROW-1234 already has status 'Resolved'"): + issue.resolve(fix_versions, "") + + +def test_no_unset_point_release_fix_version(): + # ARROW-6915: We have had the problem of issues marked with a point release + # having their fix versions overwritten by the merge tool. This verifies + # that existing patch release versions are carried over + status = FakeStatus('In Progress') + + versions_json = { + '0.14.2': {'name': '0.14.2', 'id': 1}, + '0.15.1': {'name': '0.15.1', 'id': 2}, + '0.16.0': {'name': '0.16.0', 'id': 3}, + '0.17.0': {'name': '0.17.0', 'id': 4} + } + + fields = FakeFields(status, 'summary', FakeAssignee('someone'), + [FakeComponent('Java')], + [FakeVersion(v, versions_json[v]) + for v in ['0.17.0', '0.15.1', '0.14.2']]) + issue = FakeIssue(fields) + + jira = FakeJIRA(issue=issue, project_versions=SOURCE_VERSIONS, + transitions=TRANSITIONS) + + issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + issue.resolve([versions_json['0.16.0']], "a comment") + + assert jira.captured_transition == { + 'jira_id': 'ARROW-1234', + 'transition_id': 1, + 'comment': 'a comment', + 'fixVersions': [versions_json[v] + for v in ['0.16.0', '0.15.1', '0.14.2']] + } + + issue.resolve([versions_json['0.15.1']], "a comment") + + assert jira.captured_transition == { + 'jira_id': 'ARROW-1234', + 'transition_id': 1, + 'comment': 'a comment', + 'fixVersions': [versions_json[v] for v in ['0.15.1', '0.14.2']] + } + + +def test_jira_output_no_components(): + # ARROW-5472 + status = 'Interesting work' + components = [] + output = merge_arrow_pr.format_jira_output( + 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'), + components) + + assert output == """=== JIRA ARROW-1234 === +Summary\t\tInteresting work +Assignee\tFoo Bar +Components\tNO COMPONENTS!!! +Status\t\tResolved +URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" + + output = merge_arrow_pr.format_jira_output( + 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'), + [FakeComponent('C++'), FakeComponent('Python')]) + + assert output == """=== JIRA ARROW-1234 === +Summary\t\tInteresting work +Assignee\tFoo Bar +Components\tC++, Python +Status\t\tResolved +URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" |