diff options
Diffstat (limited to 'lib/math')
-rw-r--r-- | lib/math/Kconfig | 17 | ||||
-rw-r--r-- | lib/math/Makefile | 9 | ||||
-rw-r--r-- | lib/math/cordic.c | 92 | ||||
-rw-r--r-- | lib/math/div64.c | 236 | ||||
-rw-r--r-- | lib/math/gcd.c | 85 | ||||
-rw-r--r-- | lib/math/int_pow.c | 32 | ||||
-rw-r--r-- | lib/math/int_sqrt.c | 71 | ||||
-rw-r--r-- | lib/math/lcm.c | 26 | ||||
-rw-r--r-- | lib/math/prime_numbers.c | 316 | ||||
-rw-r--r-- | lib/math/rational-test.c | 56 | ||||
-rw-r--r-- | lib/math/rational.c | 111 | ||||
-rw-r--r-- | lib/math/reciprocal_div.c | 73 | ||||
-rw-r--r-- | lib/math/test_div64.c | 249 |
13 files changed, 1373 insertions, 0 deletions
diff --git a/lib/math/Kconfig b/lib/math/Kconfig new file mode 100644 index 000000000..0634b428d --- /dev/null +++ b/lib/math/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +config CORDIC + tristate "CORDIC algorithm" + help + This option provides an implementation of the CORDIC algorithm; + calculations are in fixed point. Module will be called cordic. + +config PRIME_NUMBERS + tristate "Simple prime number generator for testing" + help + This option provides a simple prime number generator for test + modules. + + If unsure, say N. + +config RATIONAL + tristate diff --git a/lib/math/Makefile b/lib/math/Makefile new file mode 100644 index 000000000..bfac26ddf --- /dev/null +++ b/lib/math/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o + +obj-$(CONFIG_CORDIC) += cordic.o +obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o +obj-$(CONFIG_RATIONAL) += rational.o + +obj-$(CONFIG_TEST_DIV64) += test_div64.o +obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o diff --git a/lib/math/cordic.c b/lib/math/cordic.c new file mode 100644 index 000000000..8ef27c129 --- /dev/null +++ b/lib/math/cordic.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <linux/module.h> +#include <linux/cordic.h> + +static const s32 arctan_table[] = { + 2949120, + 1740967, + 919879, + 466945, + 234379, + 117304, + 58666, + 29335, + 14668, + 7334, + 3667, + 1833, + 917, + 458, + 229, + 115, + 57, + 29 +}; + +/* + * cordic_calc_iq() - calculates the i/q coordinate for given angle + * + * theta: angle in degrees for which i/q coordinate is to be calculated + * coord: function output parameter holding the i/q coordinate + */ +struct cordic_iq cordic_calc_iq(s32 theta) +{ + struct cordic_iq coord; + s32 angle, valtmp; + unsigned iter; + int signx = 1; + int signtheta; + + coord.i = CORDIC_ANGLE_GEN; + coord.q = 0; + angle = 0; + + theta = CORDIC_FIXED(theta); + signtheta = (theta < 0) ? -1 : 1; + theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) - + CORDIC_FIXED(180) * signtheta; + + if (CORDIC_FLOAT(theta) > 90) { + theta -= CORDIC_FIXED(180); + signx = -1; + } else if (CORDIC_FLOAT(theta) < -90) { + theta += CORDIC_FIXED(180); + signx = -1; + } + + for (iter = 0; iter < CORDIC_NUM_ITER; iter++) { + if (theta > angle) { + valtmp = coord.i - (coord.q >> iter); + coord.q += (coord.i >> iter); + angle += arctan_table[iter]; + } else { + valtmp = coord.i + (coord.q >> iter); + coord.q -= (coord.i >> iter); + angle -= arctan_table[iter]; + } + coord.i = valtmp; + } + + coord.i *= signx; + coord.q *= signx; + return coord; +} +EXPORT_SYMBOL(cordic_calc_iq); + +MODULE_DESCRIPTION("CORDIC algorithm"); +MODULE_AUTHOR("Broadcom Corporation"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/math/div64.c b/lib/math/div64.c new file mode 100644 index 000000000..46866394f --- /dev/null +++ b/lib/math/div64.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> + * + * Based on former do_div() implementation from asm-parisc/div64.h: + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * + * + * Generic C version of 64bit/32bit division and modulo, with + * 64bit result and 32bit remainder. + * + * The fast case for (n>>32 == 0) is handled inline by do_div(). + * + * Code generated for this function might be very inefficient + * for some CPUs. __div64_32() can be overridden by linking arch-specific + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S + * or by defining a preprocessor macro in arch/include/asm/div64.h. + */ + +#include <linux/bitops.h> +#include <linux/export.h> +#include <linux/math.h> +#include <linux/math64.h> +#include <linux/log2.h> + +/* Not needed on 64bit architectures */ +#if BITS_PER_LONG == 32 + +#ifndef __div64_32 +uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} +EXPORT_SYMBOL(__div64_32); +#endif + +/** + * div_s64_rem - signed 64bit divide with 64bit divisor and remainder + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * @remainder: 64bit remainder + */ +#ifndef div_s64_rem +s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ + u64 quotient; + + if (dividend < 0) { + quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); + *remainder = -*remainder; + if (divisor > 0) + quotient = -quotient; + } else { + quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); + if (divisor < 0) + quotient = -quotient; + } + return quotient; +} +EXPORT_SYMBOL(div_s64_rem); +#endif + +/** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * @remainder: 64bit remainder + * + * This implementation is a comparable to algorithm used by div64_u64. + * But this operation, which includes math for calculating the remainder, + * is kept distinct to avoid slowing down the div64_u64 operation on 32bit + * systems. + */ +#ifndef div64_u64_rem +u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + u32 high = divisor >> 32; + u64 quot; + + if (high == 0) { + u32 rem32; + quot = div_u64_rem(dividend, divisor, &rem32); + *remainder = rem32; + } else { + int n = fls(high); + quot = div_u64(dividend >> n, divisor >> n); + + if (quot != 0) + quot--; + + *remainder = dividend - quot * divisor; + if (*remainder >= divisor) { + quot++; + *remainder -= divisor; + } + } + + return quot; +} +EXPORT_SYMBOL(div64_u64_rem); +#endif + +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * + * This implementation is a modified version of the algorithm proposed + * by the book 'Hacker's Delight'. The original source and full proof + * can be found here and is available for use without restriction. + * + * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' + */ +#ifndef div64_u64 +u64 div64_u64(u64 dividend, u64 divisor) +{ + u32 high = divisor >> 32; + u64 quot; + + if (high == 0) { + quot = div_u64(dividend, divisor); + } else { + int n = fls(high); + quot = div_u64(dividend >> n, divisor >> n); + + if (quot != 0) + quot--; + if ((dividend - quot * divisor) >= divisor) + quot++; + } + + return quot; +} +EXPORT_SYMBOL(div64_u64); +#endif + +/** + * div64_s64 - signed 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + */ +#ifndef div64_s64 +s64 div64_s64(s64 dividend, s64 divisor) +{ + s64 quot, t; + + quot = div64_u64(abs(dividend), abs(divisor)); + t = (dividend ^ divisor) >> 63; + + return (quot ^ t) - t; +} +EXPORT_SYMBOL(div64_s64); +#endif + +#endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + return __iter_div_u64_rem(dividend, divisor, remainder); +} +EXPORT_SYMBOL(iter_div_u64_rem); + +#ifndef mul_u64_u64_div_u64 +u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) +{ + u64 res = 0, div, rem; + int shift; + + /* can a * b overflow ? */ + if (ilog2(a) + ilog2(b) > 62) { + /* + * (b * a) / c is equal to + * + * (b / c) * a + + * (b % c) * a / c + * + * if nothing overflows. Can the 1st multiplication + * overflow? Yes, but we do not care: this can only + * happen if the end result can't fit in u64 anyway. + * + * So the code below does + * + * res = (b / c) * a; + * b = b % c; + */ + div = div64_u64_rem(b, c, &rem); + res = div * a; + b = rem; + + shift = ilog2(a) + ilog2(b) - 62; + if (shift > 0) { + /* drop precision */ + b >>= shift; + c >>= shift; + if (!c) + return res; + } + } + + return res + div64_u64(a * b, c); +} +EXPORT_SYMBOL(mul_u64_u64_div_u64); +#endif diff --git a/lib/math/gcd.c b/lib/math/gcd.c new file mode 100644 index 000000000..e3b042214 --- /dev/null +++ b/lib/math/gcd.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/gcd.h> +#include <linux/export.h> + +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears in a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + b >>= __ffs(b); + if (b == 1) + return r & -r; + + for (;;) { + a >>= __ffs(a); + if (a == 1) + return r & -r; + if (a == b) + return a << __ffs(r); + + if (a < b) + swap(a, b); + a -= b; + } +} + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(b & r)) + b >>= 1; + if (b == r) + return r; + + for (;;) { + while (!(a & r)) + a >>= 1; + if (a == r) + return r; + if (a == b) + return a; + + if (a < b) + swap(a, b); + a -= b; + a >>= 1; + if (a & r) + a += b; + a >>= 1; + } +} + +#endif + +EXPORT_SYMBOL_GPL(gcd); diff --git a/lib/math/int_pow.c b/lib/math/int_pow.c new file mode 100644 index 000000000..0cf426e69 --- /dev/null +++ b/lib/math/int_pow.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * An integer based power function + * + * Derived from drivers/video/backlight/pwm_bl.c + */ + +#include <linux/export.h> +#include <linux/math.h> +#include <linux/types.h> + +/** + * int_pow - computes the exponentiation of the given base and exponent + * @base: base which will be raised to the given power + * @exp: power to be raised to + * + * Computes: pow(base, exp), i.e. @base raised to the @exp power + */ +u64 int_pow(u64 base, unsigned int exp) +{ + u64 result = 1; + + while (exp) { + if (exp & 1) + result *= base; + exp >>= 1; + base *= base; + } + + return result; +} +EXPORT_SYMBOL_GPL(int_pow); diff --git a/lib/math/int_sqrt.c b/lib/math/int_sqrt.c new file mode 100644 index 000000000..a8170bb91 --- /dev/null +++ b/lib/math/int_sqrt.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com> + * + * Based on the shift-and-subtract algorithm for computing integer + * square root from Guy L. Steele. + */ + +#include <linux/export.h> +#include <linux/bitops.h> +#include <linux/limits.h> +#include <linux/math.h> + +/** + * int_sqrt - computes the integer square root + * @x: integer of which to calculate the sqrt + * + * Computes: floor(sqrt(x)) + */ +unsigned long int_sqrt(unsigned long x) +{ + unsigned long b, m, y = 0; + + if (x <= 1) + return x; + + m = 1UL << (__fls(x) & ~1UL); + while (m != 0) { + b = y + m; + y >>= 1; + + if (x >= b) { + x -= b; + y += m; + } + m >>= 2; + } + + return y; +} +EXPORT_SYMBOL(int_sqrt); + +#if BITS_PER_LONG < 64 +/** + * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input + * is expected. + * @x: 64bit integer of which to calculate the sqrt + */ +u32 int_sqrt64(u64 x) +{ + u64 b, m, y = 0; + + if (x <= ULONG_MAX) + return int_sqrt((unsigned long) x); + + m = 1ULL << ((fls64(x) - 1) & ~1ULL); + while (m != 0) { + b = y + m; + y >>= 1; + + if (x >= b) { + x -= b; + y += m; + } + m >>= 2; + } + + return y; +} +EXPORT_SYMBOL(int_sqrt64); +#endif diff --git a/lib/math/lcm.c b/lib/math/lcm.c new file mode 100644 index 000000000..6e0b2e736 --- /dev/null +++ b/lib/math/lcm.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/compiler.h> +#include <linux/gcd.h> +#include <linux/export.h> +#include <linux/lcm.h> + +/* Lowest common multiple */ +unsigned long lcm(unsigned long a, unsigned long b) +{ + if (a && b) + return (a / gcd(a, b)) * b; + else + return 0; +} +EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/math/prime_numbers.c b/lib/math/prime_numbers.c new file mode 100644 index 000000000..d42cebf74 --- /dev/null +++ b/lib/math/prime_numbers.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "prime numbers: " fmt + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/prime_numbers.h> +#include <linux/slab.h> + +#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long)) + +struct primes { + struct rcu_head rcu; + unsigned long last, sz; + unsigned long primes[]; +}; + +#if BITS_PER_LONG == 64 +static const struct primes small_primes = { + .last = 61, + .sz = 64, + .primes = { + BIT(2) | + BIT(3) | + BIT(5) | + BIT(7) | + BIT(11) | + BIT(13) | + BIT(17) | + BIT(19) | + BIT(23) | + BIT(29) | + BIT(31) | + BIT(37) | + BIT(41) | + BIT(43) | + BIT(47) | + BIT(53) | + BIT(59) | + BIT(61) + } +}; +#elif BITS_PER_LONG == 32 +static const struct primes small_primes = { + .last = 31, + .sz = 32, + .primes = { + BIT(2) | + BIT(3) | + BIT(5) | + BIT(7) | + BIT(11) | + BIT(13) | + BIT(17) | + BIT(19) | + BIT(23) | + BIT(29) | + BIT(31) + } +}; +#else +#error "unhandled BITS_PER_LONG" +#endif + +static DEFINE_MUTEX(lock); +static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes); + +static unsigned long selftest_max; + +static bool slow_is_prime_number(unsigned long x) +{ + unsigned long y = int_sqrt(x); + + while (y > 1) { + if ((x % y) == 0) + break; + y--; + } + + return y == 1; +} + +static unsigned long slow_next_prime_number(unsigned long x) +{ + while (x < ULONG_MAX && !slow_is_prime_number(++x)) + ; + + return x; +} + +static unsigned long clear_multiples(unsigned long x, + unsigned long *p, + unsigned long start, + unsigned long end) +{ + unsigned long m; + + m = 2 * x; + if (m < start) + m = roundup(start, x); + + while (m < end) { + __clear_bit(m, p); + m += x; + } + + return x; +} + +static bool expand_to_next_prime(unsigned long x) +{ + const struct primes *p; + struct primes *new; + unsigned long sz, y; + + /* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3, + * there is always at least one prime p between n and 2n - 2. + * Equivalently, if n > 1, then there is always at least one prime p + * such that n < p < 2n. + * + * http://mathworld.wolfram.com/BertrandsPostulate.html + * https://en.wikipedia.org/wiki/Bertrand's_postulate + */ + sz = 2 * x; + if (sz < x) + return false; + + sz = round_up(sz, BITS_PER_LONG); + new = kmalloc(sizeof(*new) + bitmap_size(sz), + GFP_KERNEL | __GFP_NOWARN); + if (!new) + return false; + + mutex_lock(&lock); + p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); + if (x < p->last) { + kfree(new); + goto unlock; + } + + /* Where memory permits, track the primes using the + * Sieve of Eratosthenes. The sieve is to remove all multiples of known + * primes from the set, what remains in the set is therefore prime. + */ + bitmap_fill(new->primes, sz); + bitmap_copy(new->primes, p->primes, p->sz); + for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1)) + new->last = clear_multiples(y, new->primes, p->sz, sz); + new->sz = sz; + + BUG_ON(new->last <= x); + + rcu_assign_pointer(primes, new); + if (p != &small_primes) + kfree_rcu((struct primes *)p, rcu); + +unlock: + mutex_unlock(&lock); + return true; +} + +static void free_primes(void) +{ + const struct primes *p; + + mutex_lock(&lock); + p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); + if (p != &small_primes) { + rcu_assign_pointer(primes, &small_primes); + kfree_rcu((struct primes *)p, rcu); + } + mutex_unlock(&lock); +} + +/** + * next_prime_number - return the next prime number + * @x: the starting point for searching to test + * + * A prime number is an integer greater than 1 that is only divisible by + * itself and 1. The set of prime numbers is computed using the Sieve of + * Eratoshenes (on finding a prime, all multiples of that prime are removed + * from the set) enabling a fast lookup of the next prime number larger than + * @x. If the sieve fails (memory limitation), the search falls back to using + * slow trial-divison, up to the value of ULONG_MAX (which is reported as the + * final prime as a sentinel). + * + * Returns: the next prime number larger than @x + */ +unsigned long next_prime_number(unsigned long x) +{ + const struct primes *p; + + rcu_read_lock(); + p = rcu_dereference(primes); + while (x >= p->last) { + rcu_read_unlock(); + + if (!expand_to_next_prime(x)) + return slow_next_prime_number(x); + + rcu_read_lock(); + p = rcu_dereference(primes); + } + x = find_next_bit(p->primes, p->last, x + 1); + rcu_read_unlock(); + + return x; +} +EXPORT_SYMBOL(next_prime_number); + +/** + * is_prime_number - test whether the given number is prime + * @x: the number to test + * + * A prime number is an integer greater than 1 that is only divisible by + * itself and 1. Internally a cache of prime numbers is kept (to speed up + * searching for sequential primes, see next_prime_number()), but if the number + * falls outside of that cache, its primality is tested using trial-divison. + * + * Returns: true if @x is prime, false for composite numbers. + */ +bool is_prime_number(unsigned long x) +{ + const struct primes *p; + bool result; + + rcu_read_lock(); + p = rcu_dereference(primes); + while (x >= p->sz) { + rcu_read_unlock(); + + if (!expand_to_next_prime(x)) + return slow_is_prime_number(x); + + rcu_read_lock(); + p = rcu_dereference(primes); + } + result = test_bit(x, p->primes); + rcu_read_unlock(); + + return result; +} +EXPORT_SYMBOL(is_prime_number); + +static void dump_primes(void) +{ + const struct primes *p; + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + rcu_read_lock(); + p = rcu_dereference(primes); + + if (buf) + bitmap_print_to_pagebuf(true, buf, p->primes, p->sz); + pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s\n", + p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf); + + rcu_read_unlock(); + + kfree(buf); +} + +static int selftest(unsigned long max) +{ + unsigned long x, last; + + if (!max) + return 0; + + for (last = 0, x = 2; x < max; x++) { + bool slow = slow_is_prime_number(x); + bool fast = is_prime_number(x); + + if (slow != fast) { + pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!\n", + x, slow ? "yes" : "no", fast ? "yes" : "no"); + goto err; + } + + if (!slow) + continue; + + if (next_prime_number(last) != x) { + pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu\n", + last, x, next_prime_number(last)); + goto err; + } + last = x; + } + + pr_info("%s(%lu) passed, last prime was %lu\n", __func__, x, last); + return 0; + +err: + dump_primes(); + return -EINVAL; +} + +static int __init primes_init(void) +{ + return selftest(selftest_max); +} + +static void __exit primes_exit(void) +{ + free_primes(); +} + +module_init(primes_init); +module_exit(primes_exit); + +module_param_named(selftest, selftest_max, ulong, 0400); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/lib/math/rational-test.c b/lib/math/rational-test.c new file mode 100644 index 000000000..01611ddff --- /dev/null +++ b/lib/math/rational-test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <kunit/test.h> + +#include <linux/rational.h> + +struct rational_test_param { + unsigned long num, den; + unsigned long max_num, max_den; + unsigned long exp_num, exp_den; + + const char *name; +}; + +static const struct rational_test_param test_parameters[] = { + { 1230, 10, 100, 20, 100, 1, "Exceeds bounds, semi-convergent term > 1/2 last term" }, + { 34567,100, 120, 20, 120, 1, "Exceeds bounds, semi-convergent term < 1/2 last term" }, + { 1, 30, 100, 10, 0, 1, "Closest to zero" }, + { 1, 19, 100, 10, 1, 10, "Closest to smallest non-zero" }, + { 27,32, 16, 16, 11, 13, "Use convergent" }, + { 1155, 7735, 255, 255, 33, 221, "Exact answer" }, + { 87, 32, 70, 32, 68, 25, "Semiconvergent, numerator limit" }, + { 14533, 4626, 15000, 2400, 7433, 2366, "Semiconvergent, denominator limit" }, +}; + +static void get_desc(const struct rational_test_param *param, char *desc) +{ + strscpy(desc, param->name, KUNIT_PARAM_DESC_SIZE); +} + +/* Creates function rational_gen_params */ +KUNIT_ARRAY_PARAM(rational, test_parameters, get_desc); + +static void rational_test(struct kunit *test) +{ + const struct rational_test_param *param = (const struct rational_test_param *)test->param_value; + unsigned long n = 0, d = 0; + + rational_best_approximation(param->num, param->den, param->max_num, param->max_den, &n, &d); + KUNIT_EXPECT_EQ(test, n, param->exp_num); + KUNIT_EXPECT_EQ(test, d, param->exp_den); +} + +static struct kunit_case rational_test_cases[] = { + KUNIT_CASE_PARAM(rational_test, rational_gen_params), + {} +}; + +static struct kunit_suite rational_test_suite = { + .name = "rational", + .test_cases = rational_test_cases, +}; + +kunit_test_suites(&rational_test_suite); + +MODULE_LICENSE("GPL v2"); diff --git a/lib/math/rational.c b/lib/math/rational.c new file mode 100644 index 000000000..ec59d426e --- /dev/null +++ b/lib/math/rational.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com> + * Copyright (C) 2019 Trent Piepho <tpiepho@gmail.com> + * + * helper functions when coping with rational numbers + */ + +#include <linux/rational.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/minmax.h> +#include <linux/limits.h> +#include <linux/module.h> + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * https://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + /* n/d is the starting rational, which is continually + * decreased each iteration using the Euclidean algorithm. + * + * dp is the value of d from the prior iteration. + * + * n2/d2, n1/d1, and n0/d0 are our successively more accurate + * approximations of the rational. They are, respectively, + * the current, previous, and two prior iterations of it. + * + * a is current term of the continued fraction. + */ + unsigned long n, d, n0, d0, n1, d1, n2, d2; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + + for (;;) { + unsigned long dp, a; + + if (d == 0) + break; + /* Find next term in continued fraction, 'a', via + * Euclidean algorithm. + */ + dp = d; + a = n / d; + d = n % d; + n = dp; + + /* Calculate the current rational approximation (aka + * convergent), n2/d2, using the term just found and + * the two prior approximations. + */ + n2 = n0 + a * n1; + d2 = d0 + a * d1; + + /* If the current convergent exceeds the maxes, then + * return either the previous convergent or the + * largest semi-convergent, the final term of which is + * found below as 't'. + */ + if ((n2 > max_numerator) || (d2 > max_denominator)) { + unsigned long t = ULONG_MAX; + + if (d1) + t = (max_denominator - d0) / d1; + if (n1) + t = min(t, (max_numerator - n0) / n1); + + /* This tests if the semi-convergent is closer than the previous + * convergent. If d1 is zero there is no previous convergent as this + * is the 1st iteration, so always choose the semi-convergent. + */ + if (!d1 || 2u * t > a || (2u * t == a && d0 * dp > d1 * d)) { + n1 = n0 + t * n1; + d1 = d0 + t * d1; + } + break; + } + n0 = n1; + n1 = n2; + d0 = d1; + d1 = d2; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); + +MODULE_LICENSE("GPL v2"); diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c new file mode 100644 index 000000000..6cb4adbb8 --- /dev/null +++ b/lib/math/reciprocal_div.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/export.h> +#include <linux/limits.h> +#include <linux/math.h> +#include <linux/minmax.h> +#include <linux/types.h> + +#include <linux/reciprocal_div.h> + +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) +{ + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; +} +EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); diff --git a/lib/math/test_div64.c b/lib/math/test_div64.c new file mode 100644 index 000000000..c15edd688 --- /dev/null +++ b/lib/math/test_div64.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Maciej W. Rozycki + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/time64.h> +#include <linux/types.h> + +#include <asm/div64.h> + +#define TEST_DIV64_N_ITER 1024 + +static const u64 test_div64_dividends[] = { + 0x00000000ab275080, + 0x0000000fe73c1959, + 0x000000e54c0a74b1, + 0x00000d4398ff1ef9, + 0x0000a18c2ee1c097, + 0x00079fb80b072e4a, + 0x0072db27380dd689, + 0x0842f488162e2284, + 0xf66745411d8ab063, +}; +#define SIZE_DIV64_DIVIDENDS ARRAY_SIZE(test_div64_dividends) + +#define TEST_DIV64_DIVISOR_0 0x00000009 +#define TEST_DIV64_DIVISOR_1 0x0000007c +#define TEST_DIV64_DIVISOR_2 0x00000204 +#define TEST_DIV64_DIVISOR_3 0x0000cb5b +#define TEST_DIV64_DIVISOR_4 0x00010000 +#define TEST_DIV64_DIVISOR_5 0x0008a880 +#define TEST_DIV64_DIVISOR_6 0x003fd3ae +#define TEST_DIV64_DIVISOR_7 0x0b658fac +#define TEST_DIV64_DIVISOR_8 0xdc08b349 + +static const u32 test_div64_divisors[] = { + TEST_DIV64_DIVISOR_0, + TEST_DIV64_DIVISOR_1, + TEST_DIV64_DIVISOR_2, + TEST_DIV64_DIVISOR_3, + TEST_DIV64_DIVISOR_4, + TEST_DIV64_DIVISOR_5, + TEST_DIV64_DIVISOR_6, + TEST_DIV64_DIVISOR_7, + TEST_DIV64_DIVISOR_8, +}; +#define SIZE_DIV64_DIVISORS ARRAY_SIZE(test_div64_divisors) + +static const struct { + u64 quotient; + u32 remainder; +} test_div64_results[SIZE_DIV64_DIVISORS][SIZE_DIV64_DIVIDENDS] = { + { + { 0x0000000013045e47, 0x00000001 }, + { 0x000000000161596c, 0x00000030 }, + { 0x000000000054e9d4, 0x00000130 }, + { 0x000000000000d776, 0x0000278e }, + { 0x000000000000ab27, 0x00005080 }, + { 0x00000000000013c4, 0x0004ce80 }, + { 0x00000000000002ae, 0x001e143c }, + { 0x000000000000000f, 0x0033e56c }, + { 0x0000000000000000, 0xab275080 }, + }, { + { 0x00000001c45c02d1, 0x00000000 }, + { 0x0000000020d5213c, 0x00000049 }, + { 0x0000000007e3d65f, 0x000001dd }, + { 0x0000000000140531, 0x000065ee }, + { 0x00000000000fe73c, 0x00001959 }, + { 0x000000000001d637, 0x0004e5d9 }, + { 0x0000000000003fc9, 0x000713bb }, + { 0x0000000000000165, 0x029abe7d }, + { 0x0000000000000012, 0x6e9f7e37 }, + }, { + { 0x000000197a3a0cf7, 0x00000002 }, + { 0x00000001d9632e5c, 0x00000021 }, + { 0x0000000071c28039, 0x000001cd }, + { 0x000000000120a844, 0x0000b885 }, + { 0x0000000000e54c0a, 0x000074b1 }, + { 0x00000000001a7bb3, 0x00072331 }, + { 0x00000000000397ad, 0x0002c61b }, + { 0x000000000000141e, 0x06ea2e89 }, + { 0x000000000000010a, 0xab002ad7 }, + }, { + { 0x0000017949e37538, 0x00000001 }, + { 0x0000001b62441f37, 0x00000055 }, + { 0x0000000694a3391d, 0x00000085 }, + { 0x0000000010b2a5d2, 0x0000a753 }, + { 0x000000000d4398ff, 0x00001ef9 }, + { 0x0000000001882ec6, 0x0005cbf9 }, + { 0x000000000035333b, 0x0017abdf }, + { 0x00000000000129f1, 0x0ab4520d }, + { 0x0000000000000f6e, 0x8ac0ce9b }, + }, { + { 0x000011f321a74e49, 0x00000006 }, + { 0x0000014d8481d211, 0x0000005b }, + { 0x0000005025cbd92d, 0x000001e3 }, + { 0x00000000cb5e71e3, 0x000043e6 }, + { 0x00000000a18c2ee1, 0x0000c097 }, + { 0x0000000012a88828, 0x00036c97 }, + { 0x000000000287f16f, 0x002c2a25 }, + { 0x00000000000e2cc7, 0x02d581e3 }, + { 0x000000000000bbf4, 0x1ba08c03 }, + }, { + { 0x0000d8db8f72935d, 0x00000005 }, + { 0x00000fbd5aed7a2e, 0x00000002 }, + { 0x000003c84b6ea64a, 0x00000122 }, + { 0x0000000998fa8829, 0x000044b7 }, + { 0x000000079fb80b07, 0x00002e4a }, + { 0x00000000e16b20fa, 0x0002a14a }, + { 0x000000001e940d22, 0x00353b2e }, + { 0x0000000000ab40ac, 0x06fba6ba }, + { 0x000000000008debd, 0x72d98365 }, + }, { + { 0x000cc3045b8fc281, 0x00000000 }, + { 0x0000ed1f48b5c9fc, 0x00000079 }, + { 0x000038fb9c63406a, 0x000000e1 }, + { 0x000000909705b825, 0x00000a62 }, + { 0x00000072db27380d, 0x0000d689 }, + { 0x0000000d43fce827, 0x00082b09 }, + { 0x00000001ccaba11a, 0x0037e8dd }, + { 0x000000000a13f729, 0x0566dffd }, + { 0x000000000085a14b, 0x23d36726 }, + }, { + { 0x00eafeb9c993592b, 0x00000001 }, + { 0x00110e5befa9a991, 0x00000048 }, + { 0x00041947b4a1d36a, 0x000000dc }, + { 0x00000a6679327311, 0x0000c079 }, + { 0x00000842f488162e, 0x00002284 }, + { 0x000000f4459740fc, 0x00084484 }, + { 0x0000002122c47bf9, 0x002ca446 }, + { 0x00000000b9936290, 0x004979c4 }, + { 0x00000000099ca89d, 0x9db446bf }, + }, { + { 0x1b60cece589da1d2, 0x00000001 }, + { 0x01fcb42be1453f5b, 0x0000004f }, + { 0x007a3f2457df0749, 0x0000013f }, + { 0x0001363130e3ec7b, 0x000017aa }, + { 0x0000f66745411d8a, 0x0000b063 }, + { 0x00001c757dfab350, 0x00048863 }, + { 0x000003dc4979c652, 0x00224ea7 }, + { 0x000000159edc3144, 0x06409ab3 }, + { 0x000000011eadfee3, 0xa99c48a8 }, + }, +}; + +static inline bool test_div64_verify(u64 quotient, u32 remainder, int i, int j) +{ + return (quotient == test_div64_results[i][j].quotient && + remainder == test_div64_results[i][j].remainder); +} + +/* + * This needs to be a macro, because we don't want to rely on the compiler + * to do constant propagation, and `do_div' may take a different path for + * constants, so we do want to verify that as well. + */ +#define test_div64_one(dividend, divisor, i, j) ({ \ + bool result = true; \ + u64 quotient; \ + u32 remainder; \ + \ + quotient = dividend; \ + remainder = do_div(quotient, divisor); \ + if (!test_div64_verify(quotient, remainder, i, j)) { \ + pr_err("ERROR: %016llx / %08x => %016llx,%08x\n", \ + dividend, divisor, quotient, remainder); \ + pr_err("ERROR: expected value => %016llx,%08x\n",\ + test_div64_results[i][j].quotient, \ + test_div64_results[i][j].remainder); \ + result = false; \ + } \ + result; \ +}) + +/* + * Run calculation for the same divisor value expressed as a constant + * and as a variable, so as to verify the implementation for both cases + * should they be handled by different code execution paths. + */ +static bool __init test_div64(void) +{ + u64 dividend; + int i, j; + + for (i = 0; i < SIZE_DIV64_DIVIDENDS; i++) { + dividend = test_div64_dividends[i]; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_0, i, 0)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_1, i, 1)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_2, i, 2)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_3, i, 3)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_4, i, 4)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_5, i, 5)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_6, i, 6)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_7, i, 7)) + return false; + if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_8, i, 8)) + return false; + for (j = 0; j < SIZE_DIV64_DIVISORS; j++) { + if (!test_div64_one(dividend, test_div64_divisors[j], + i, j)) + return false; + } + } + return true; +} + +static int __init test_div64_init(void) +{ + struct timespec64 ts, ts0, ts1; + int i; + + pr_info("Starting 64bit/32bit division and modulo test\n"); + ktime_get_ts64(&ts0); + + for (i = 0; i < TEST_DIV64_N_ITER; i++) + if (!test_div64()) + break; + + ktime_get_ts64(&ts1); + ts = timespec64_sub(ts1, ts0); + pr_info("Completed 64bit/32bit division and modulo test, " + "%llu.%09lus elapsed\n", ts.tv_sec, ts.tv_nsec); + + return 0; +} + +static void __exit test_div64_exit(void) +{ +} + +module_init(test_div64_init); +module_exit(test_div64_exit); + +MODULE_AUTHOR("Maciej W. Rozycki <macro@orcam.me.uk>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("64bit/32bit division and modulo test module"); |