diff options
Diffstat (limited to 'src/etc/test-float-parse/runtests.py')
-rw-r--r-- | src/etc/test-float-parse/runtests.py | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/src/etc/test-float-parse/runtests.py b/src/etc/test-float-parse/runtests.py new file mode 100644 index 000000000..cf7279534 --- /dev/null +++ b/src/etc/test-float-parse/runtests.py @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 + +""" +Testing dec2flt +=============== +These are *really* extensive tests. Expect them to run for hours. Due to the +nature of the problem (the input is a string of arbitrary length), exhaustive +testing is not really possible. Instead, there are exhaustive tests for some +classes of inputs for which that is feasible and a bunch of deterministic and +random non-exhaustive tests for covering everything else. + +The actual tests (generating decimal strings and feeding them to dec2flt) is +performed by a set of stand-along rust programs. This script compiles, runs, +and supervises them. The programs report the strings they generate and the +floating point numbers they converted those strings to, and this script +checks that the results are correct. + +You can run specific tests rather than all of them by giving their names +(without .rs extension) as command line parameters. + +Verification +------------ +The tricky part is not generating those inputs but verifying the outputs. +Comparing with the result of Python's float() does not cut it because +(and this is apparently undocumented) although Python includes a version of +Martin Gay's code including the decimal-to-float part, it doesn't actually use +it for float() (only for round()) instead relying on the system scanf() which +is not necessarily completely accurate. + +Instead, we take the input and compute the true value with bignum arithmetic +(as a fraction, using the ``fractions`` module). + +Given an input string and the corresponding float computed via Rust, simply +decode the float into f * 2^k (for integers f, k) and the ULP. +We can now easily compute the error and check if it is within 0.5 ULP as it +should be. Zero and infinites are handled similarly: + +- If the approximation is 0.0, the exact value should be *less or equal* + half the smallest denormal float: the smallest denormal floating point + number has an odd mantissa (00...001) and thus half of that is rounded + to 00...00, i.e., zero. +- If the approximation is Inf, the exact value should be *greater or equal* + to the largest finite float + 0.5 ULP: the largest finite float has an odd + mantissa (11...11), so that plus half an ULP is rounded up to the nearest + even number, which overflows. + +Implementation details +---------------------- +This directory contains a set of single-file Rust programs that perform +tests with a particular class of inputs. Each is compiled and run without +parameters, outputs (f64, f32, decimal) pairs to verify externally, and +in any case either exits gracefully or with a panic. + +If a test binary writes *anything at all* to stderr or exits with an +exit code that's not 0, the test fails. +The output on stdout is treated as (f64, f32, decimal) record, encoded thusly: + +- First, the bits of the f64 encoded as an ASCII hex string. +- Second, the bits of the f32 encoded as an ASCII hex string. +- Then the corresponding string input, in ASCII +- The record is terminated with a newline. + +Incomplete records are an error. Not-a-Number bit patterns are invalid too. + +The tests run serially but the validation for a single test is parallelized +with ``multiprocessing``. Each test is launched as a subprocess. +One thread supervises it: Accepts and enqueues records to validate, observe +stderr, and waits for the process to exit. A set of worker processes perform +the validation work for the outputs enqueued there. Another thread listens +for progress updates from the workers. + +Known issues +------------ +Some errors (e.g., NaN outputs) aren't handled very gracefully. +Also, if there is an exception or the process is interrupted (at least on +Windows) the worker processes are leaked and stick around forever. +They're only a few megabytes each, but still, this script should not be run +if you aren't prepared to manually kill a lot of orphaned processes. +""" +from __future__ import print_function +import sys +import os.path +import time +import struct +from fractions import Fraction +from collections import namedtuple +from subprocess import Popen, check_call, PIPE +from glob import glob +import multiprocessing +import threading +import ctypes +import binascii + +try: # Python 3 + import queue as Queue +except ImportError: # Python 2 + import Queue + +NUM_WORKERS = 2 +UPDATE_EVERY_N = 50000 +INF = namedtuple('INF', '')() +NEG_INF = namedtuple('NEG_INF', '')() +ZERO = namedtuple('ZERO', '')() +MAILBOX = None # The queue for reporting errors to the main process. +STDOUT_LOCK = threading.Lock() +test_name = None +child_processes = [] +exit_status = 0 + +def msg(*args): + with STDOUT_LOCK: + print("[" + test_name + "]", *args) + sys.stdout.flush() + + +def write_errors(): + global exit_status + f = open("errors.txt", 'w') + have_seen_error = False + while True: + args = MAILBOX.get() + if args is None: + f.close() + break + print(*args, file=f) + f.flush() + if not have_seen_error: + have_seen_error = True + msg("Something is broken:", *args) + msg("Future errors logged to errors.txt") + exit_status = 101 + + +def cargo(): + print("compiling tests") + sys.stdout.flush() + check_call(['cargo', 'build', '--release']) + + +def run(test): + global test_name + test_name = test + + t0 = time.perf_counter() + msg("setting up supervisor") + command = ['cargo', 'run', '--bin', test, '--release'] + proc = Popen(command, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE) + done = multiprocessing.Value(ctypes.c_bool) + queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024) + workers = [] + for n in range(NUM_WORKERS): + worker = multiprocessing.Process(name='Worker-' + str(n + 1), + target=init_worker, + args=[test, MAILBOX, queue, done]) + workers.append(worker) + child_processes.append(worker) + for worker in workers: + worker.start() + msg("running test") + interact(proc, queue) + with done.get_lock(): + done.value = True + for worker in workers: + worker.join() + msg("python is done") + assert queue.empty(), "did not validate everything" + dt = time.perf_counter() - t0 + msg("took", round(dt, 3), "seconds") + + +def interact(proc, queue): + n = 0 + while proc.poll() is None: + line = proc.stdout.readline() + if not line: + continue + assert line.endswith(b'\n'), "incomplete line: " + repr(line) + queue.put(line) + n += 1 + if n % UPDATE_EVERY_N == 0: + msg("got", str(n // 1000) + "k", "records") + msg("rust is done. exit code:", proc.returncode) + rest, stderr = proc.communicate() + if stderr: + msg("rust stderr output:", stderr) + for line in rest.split(b'\n'): + if not line: + continue + queue.put(line) + + +def main(): + global MAILBOX + files = glob('src/bin/*.rs') + basenames = [os.path.basename(i) for i in files] + all_tests = [os.path.splitext(f)[0] for f in basenames if not f.startswith('_')] + args = sys.argv[1:] + if args: + tests = [test for test in all_tests if test in args] + else: + tests = all_tests + if not tests: + print("Error: No tests to run") + sys.exit(1) + # Compile first for quicker feedback + cargo() + # Set up mailbox once for all tests + MAILBOX = multiprocessing.Queue() + mailman = threading.Thread(target=write_errors) + mailman.daemon = True + mailman.start() + for test in tests: + run(test) + MAILBOX.put(None) + mailman.join() + + +# ---- Worker thread code ---- + + +POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) } +HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) } +DONE_FLAG = None + + +def send_error_to_supervisor(*args): + MAILBOX.put(args) + + +def init_worker(test, mailbox, queue, done): + global test_name, MAILBOX, DONE_FLAG + test_name = test + MAILBOX = mailbox + DONE_FLAG = done + do_work(queue) + + +def is_done(): + with DONE_FLAG.get_lock(): + return DONE_FLAG.value + + +def do_work(queue): + while True: + try: + line = queue.get(timeout=0.01) + except Queue.Empty: + if queue.empty() and is_done(): + return + else: + continue + bin64, bin32, text = line.rstrip().split() + validate(bin64, bin32, text.decode('utf-8')) + + +def decode_binary64(x): + """ + Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and + infinity (positive and negative), which return ZERO, INF, and NEG_INF + respectively. + """ + x = binascii.unhexlify(x) + assert len(x) == 8, repr(x) + [bits] = struct.unpack(b'>Q', x) + if bits == 0: + return ZERO + exponent = (bits >> 52) & 0x7FF + negative = bits >> 63 + low_bits = bits & 0xFFFFFFFFFFFFF + if exponent == 0: + mantissa = low_bits + exponent += 1 + if mantissa == 0: + return ZERO + elif exponent == 0x7FF: + assert low_bits == 0, "NaN" + if negative: + return NEG_INF + else: + return INF + else: + mantissa = low_bits | (1 << 52) + exponent -= 1023 + 52 + if negative: + mantissa = -mantissa + return (mantissa, exponent) + + +def decode_binary32(x): + """ + Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and + infinity (positive and negative), which return ZERO, INF, and NEG_INF + respectively. + """ + x = binascii.unhexlify(x) + assert len(x) == 4, repr(x) + [bits] = struct.unpack(b'>I', x) + if bits == 0: + return ZERO + exponent = (bits >> 23) & 0xFF + negative = bits >> 31 + low_bits = bits & 0x7FFFFF + if exponent == 0: + mantissa = low_bits + exponent += 1 + if mantissa == 0: + return ZERO + elif exponent == 0xFF: + if negative: + return NEG_INF + else: + return INF + else: + mantissa = low_bits | (1 << 23) + exponent -= 127 + 23 + if negative: + mantissa = -mantissa + return (mantissa, exponent) + + +MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074 +MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149 # XXX unsure +MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023) +MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127) +MAX_ULP_DOUBLE = 1023 - 52 +MAX_ULP_SINGLE = 127 - 23 +DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2 +DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1) +SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2 +SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1) + +def validate(bin64, bin32, text): + try: + double = decode_binary64(bin64) + except AssertionError: + print(bin64, bin32, text) + raise + single = decode_binary32(bin32) + real = Fraction(text) + + if double is ZERO: + if real > DOUBLE_ZERO_CUTOFF: + record_special_error(text, "f64 zero") + elif double is INF: + if real < DOUBLE_INF_CUTOFF: + record_special_error(text, "f64 inf") + elif double is NEG_INF: + if -real < DOUBLE_INF_CUTOFF: + record_special_error(text, "f64 -inf") + elif len(double) == 2: + sig, k = double + validate_normal(text, real, sig, k, "f64") + else: + assert 0, "didn't handle binary64" + if single is ZERO: + if real > SINGLE_ZERO_CUTOFF: + record_special_error(text, "f32 zero") + elif single is INF: + if real < SINGLE_INF_CUTOFF: + record_special_error(text, "f32 inf") + elif single is NEG_INF: + if -real < SINGLE_INF_CUTOFF: + record_special_error(text, "f32 -inf") + elif len(single) == 2: + sig, k = single + validate_normal(text, real, sig, k, "f32") + else: + assert 0, "didn't handle binary32" + +def record_special_error(text, descr): + send_error_to_supervisor(text.strip(), "wrongly rounded to", descr) + + +def validate_normal(text, real, sig, k, kind): + approx = sig * POW2[k] + error = abs(approx - real) + if error > HALF_ULP[k]: + record_normal_error(text, error, k, kind) + + +def record_normal_error(text, error, k, kind): + one_ulp = HALF_ULP[k + 1] + assert one_ulp == 2 * HALF_ULP[k] + relative_error = error / one_ulp + text = text.strip() + try: + err_repr = float(relative_error) + except ValueError: + err_repr = str(err_repr).replace('/', ' / ') + send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")") + + +if __name__ == '__main__': + main() |