diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/erasure-code/jerasure/gf-complete/tools | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/erasure-code/jerasure/gf-complete/tools')
11 files changed, 1952 insertions, 0 deletions
diff --git a/src/erasure-code/jerasure/gf-complete/tools/Makefile.am b/src/erasure-code/jerasure/gf-complete/tools/Makefile.am new file mode 100644 index 00000000..4ca9131a --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/Makefile.am @@ -0,0 +1,56 @@ +# GF-Complete 'tools' AM file + +AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include +AM_CFLAGS = -O3 -fPIC + +bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time + +gf_mult_SOURCES = gf_mult.c +#gf_mult_LDFLAGS = -lgf_complete +gf_mult_LDADD = ../src/libgf_complete.la + +gf_div_SOURCES = gf_div.c +#gf_div_LDFLAGS = -lgf_complete +gf_div_LDADD = ../src/libgf_complete.la + +gf_add_SOURCES = gf_add.c +#gf_add_LDFLAGS = -lgf_complete +gf_add_LDADD = ../src/libgf_complete.la + +gf_time_SOURCES = gf_time.c +#gf_time_LDFLAGS = -lgf_complete +gf_time_LDADD = ../src/libgf_complete.la + +gf_methods_SOURCES = gf_methods.c +#gf_methods_LDFLAGS = -lgf_complete +gf_methods_LDADD = ../src/libgf_complete.la + +gf_poly_SOURCES = gf_poly.c +#gf_poly_LDFLAGS = -lgf_complete +gf_poly_LDADD = ../src/libgf_complete.la + +gf_inline_time_SOURCES = gf_inline_time.c +#gf_inline_time_LDFLAGS = -lgf_complete +gf_inline_time_LDADD = ../src/libgf_complete.la + +# gf_unit 8 A -1 -m LOG_ZERO_EXT is excluded until http://lab.jerasure.org/jerasure/gf-complete/issues/13 is resolved +if ENABLE_VALGRIND +VALGRIND = | perl -p -e 's|^|../libtool --mode=execute valgrind --quiet --error-exitcode=1 --tool=memcheck | if(!/gf_unit 8 A -1 -m LOG_ZERO_EXT/)' +endif + +# gf_unit tests as generated by gf_methods +gf_unit_w%.sh: gf_methods + ./$^ $(@:gf_unit_w%.sh=%) -A -U ${VALGRIND} > $@ || rm $@ + +TESTS = gf_unit_w128.sh \ + gf_unit_w64.sh \ + gf_unit_w32.sh \ + gf_unit_w16.sh \ + gf_unit_w8.sh \ + gf_unit_w4.sh + +TEST_EXTENSIONS = .sh +SH_LOG_COMPILER = $(SHELL) +AM_SH_LOG_FLAGS = -e + +CLEANFILES = $(TESTS) diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_add.c b/src/erasure-code/jerasure/gf-complete/tools/gf_add.c new file mode 100644 index 00000000..28cc12c1 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_add.c @@ -0,0 +1,114 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_add.c + * + * Adds two numbers in gf_2^w + */ + +#include <stdio.h> +#include <getopt.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_add a b w - does addition of a and b in GF(2^w)\n"); + fprintf(stderr, " If w has an h on the end, treat a, b and the sum as hexadecimal (no 0x required)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " legal w are: 1-32, 64 and 128\n"); + fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n"); + + if (s != NULL) fprintf(stderr, "%s", s); + exit(1); +} + +int read_128(char *s, uint64_t *v) +{ + int l, t; + char save; + + l = strlen(s); + if (l > 32) return 0; + + if (l > 16) { + if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0; + save = s[l-16]; + s[l-16] = '\0'; + t = sscanf(s, "%llx", (long long unsigned int *) &(v[0])); + s[l-16] = save; + return t; + } else { + v[0] = 0; + return sscanf(s, "%llx", (long long unsigned int *)&(v[1])); + } + return 1; +} + +void print_128(uint64_t *v) +{ + if (v[0] > 0) { + printf("%llx", (long long unsigned int) v[0]); + printf("%016llx", (long long unsigned int) v[1]); + } else { + printf("%llx", (long long unsigned int) v[1]); + } + printf("\n"); +} + + +int main(int argc, char **argv) +{ + int hex, w; + uint32_t a, b, c, top; + uint64_t a64, b64, c64; + uint64_t a128[2], b128[2], c128[2]; + char *format; + + if (argc != 4) usage(NULL); + if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n"); + + if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w"); + + hex = (strchr(argv[3], 'h') != NULL); + + if (!hex && w == 128) usage(NULL); + + if (w <= 32) { + format = (hex) ? "%x" : "%u"; + if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n"); + + if (w < 32) { + top = (w == 31) ? 0x80000000 : (1 << w); + if (w != 32 && a >= top) usage("a is too large\n"); + if (w != 32 && b >= top) usage("b is too large\n"); + } + + c = a ^ b; + printf(format, c); + printf("\n"); + + } else if (w == 64) { + format = (hex) ? "%llx" : "%llu"; + if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n"); + c64 = a64 ^ b64; + + printf(format, c64); + printf("\n"); + + } else if (w == 128) { + + if (read_128(argv[1], a128) == 0) usage("Bad a\n"); + if (read_128(argv[2], b128) == 0) usage("Bad b\n"); + c128[0] = a128[0] ^ b128[0]; + c128[1] = a128[1] ^ b128[1]; + + print_128(c128); + } + exit(0); +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_div.c b/src/erasure-code/jerasure/gf-complete/tools/gf_div.c new file mode 100644 index 00000000..9797f07d --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_div.c @@ -0,0 +1,68 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_div.c + * + * Multiplies two numbers in gf_2^w + */ + +#include <stdio.h> +#include <getopt.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include "gf_complete.h" +#include "gf_method.h" +#include "gf_general.h" + +void usage(int why) +{ + fprintf(stderr, "usage: gf_div a b w [method] - does division of a and b in GF(2^w)\n"); + if (why == 'W') { + fprintf(stderr, "Bad w.\n"); + fprintf(stderr, "Legal w are: 1 - 32, 64 and 128.\n"); + fprintf(stderr, "Append 'h' to w to treat a, b and the quotient as hexadecimal.\n"); + fprintf(stderr, "w=128 is hex only (i.e. '128' will be an error - do '128h')\n"); + } + if (why == 'A') fprintf(stderr, "Bad a\n"); + if (why == 'B') fprintf(stderr, "Bad b\n"); + if (why == 'M') { + fprintf(stderr, "Bad Method Specification: "); + gf_error(); + } + exit(1); +} + +int main(int argc, char **argv) +{ + int hex, w; + gf_t gf; + gf_general_t a, b, c; + char output[50]; + + if (argc < 4) usage(' '); + + if (sscanf(argv[3], "%d", &w) == 0) usage('W'); + if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage('W'); + + hex = (strchr(argv[3], 'h') != NULL); + if (!hex && w == 128) usage('W'); + + if (argc == 4) { + if (gf_init_easy(&gf, w) == 0) usage('M'); + } else { + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage('M'); + } + + if (!gf_general_s_to_val(&a, w, argv[1], hex)) usage('A'); + if (!gf_general_s_to_val(&b, w, argv[2], hex)) usage('B'); + + gf_general_divide(&gf, &a, &b, &c); + gf_general_val_to_s(&c, w, output, hex); + + printf("%s\n", output); + exit(0); +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_inline_time.c b/src/erasure-code/jerasure/gf-complete/tools/gf_inline_time.c new file mode 100644 index 00000000..f8119da6 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_inline_time.c @@ -0,0 +1,170 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_inline_time.c + * + * Times inline single multiplication when w = 4, 8 or 16 + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> + +#include "gf_complete.h" +#include "gf_rand.h" + +void +timer_start (double *t) +{ + struct timeval tv; + + gettimeofday (&tv, NULL); + *t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; +} + +double +timer_split (const double *t) +{ + struct timeval tv; + double cur_t; + + gettimeofday (&tv, NULL); + cur_t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; + return (cur_t - *t); +} + +void problem(char *s) +{ + fprintf(stderr, "Timing test failed.\n"); + fprintf(stderr, "%s\n", s); + exit(1); +} + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_inline_time w seed #elts iterations - does timing of single multiplies\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal w are: 4, 8 or 16\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Use -1 for time(0) as a seed.\n"); + fprintf(stderr, "\n"); + if (s != NULL) fprintf(stderr, "%s\n", s); + exit(1); +} + +int main(int argc, char **argv) +{ + int w, j, i, size, iterations; + gf_t gf; + double timer, elapsed, dnum, num; + uint8_t *ra = NULL, *rb = NULL, *mult4, *mult8; + uint16_t *ra16 = NULL, *rb16 = NULL, *log16, *alog16; + time_t t0; + + if (argc != 5) usage(NULL); + if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n"); + if (w != 4 && w != 8 && w != 16) usage("Bad w\n"); + if (sscanf(argv[2], "%ld", &t0) == 0) usage("Bad seed\n"); + if (sscanf(argv[3], "%d", &size) == 0) usage("Bad #elts\n"); + if (sscanf(argv[4], "%d", &iterations) == 0) usage("Bad iterations\n"); + if (t0 == -1) t0 = time(0); + MOA_Seed(t0); + + num = size; + + gf_init_easy(&gf, w); + + printf("Seed: %ld\n", t0); + + if (w == 4 || w == 8) { + ra = (uint8_t *) malloc(size); + rb = (uint8_t *) malloc(size); + + if (ra == NULL || rb == NULL) { perror("malloc"); exit(1); } + } else if (w == 16) { + ra16 = (uint16_t *) malloc(size*2); + rb16 = (uint16_t *) malloc(size*2); + + if (ra16 == NULL || rb16 == NULL) { perror("malloc"); exit(1); } + } + + if (w == 4) { + mult4 = gf_w4_get_mult_table(&gf); + if (mult4 == NULL) { + printf("Couldn't get inline multiplication table.\n"); + exit(1); + } + elapsed = 0; + dnum = 0; + for (i = 0; i < iterations; i++) { + for (j = 0; j < size; j++) { + ra[j] = MOA_Random_W(w, 1); + rb[j] = MOA_Random_W(w, 1); + } + timer_start(&timer); + for (j = 0; j < size; j++) { + ra[j] = GF_W4_INLINE_MULTDIV(mult4, ra[j], rb[j]); + } + dnum += num; + elapsed += timer_split(&timer); + } + printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n", + elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed); + + } else if (w == 8) { + mult8 = gf_w8_get_mult_table(&gf); + if (mult8 == NULL) { + printf("Couldn't get inline multiplication table.\n"); + exit(1); + } + elapsed = 0; + dnum = 0; + for (i = 0; i < iterations; i++) { + for (j = 0; j < size; j++) { + ra[j] = MOA_Random_W(w, 1); + rb[j] = MOA_Random_W(w, 1); + } + timer_start(&timer); + for (j = 0; j < size; j++) { + ra[j] = GF_W8_INLINE_MULTDIV(mult8, ra[j], rb[j]); + } + dnum += num; + elapsed += timer_split(&timer); + } + printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n", + elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed); + } else if (w == 16) { + log16 = gf_w16_get_log_table(&gf); + alog16 = gf_w16_get_mult_alog_table(&gf); + if (log16 == NULL) { + printf("Couldn't get inline multiplication table.\n"); + exit(1); + } + elapsed = 0; + dnum = 0; + for (i = 0; i < iterations; i++) { + for (j = 0; j < size; j++) { + ra16[j] = MOA_Random_W(w, 1); + rb16[j] = MOA_Random_W(w, 1); + } + timer_start(&timer); + for (j = 0; j < size; j++) { + ra16[j] = GF_W16_INLINE_MULT(log16, alog16, ra16[j], rb16[j]); + } + dnum += num; + elapsed += timer_split(&timer); + } + printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n", + elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed); + } + free (ra); + free (rb); + free (ra16); + free (rb16); + return 0; +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_methods.c b/src/erasure-code/jerasure/gf-complete/tools/gf_methods.c new file mode 100644 index 00000000..b016c33c --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_methods.c @@ -0,0 +1,246 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_methods.c + * + * Lists supported methods (incomplete w.r.t. GROUP and COMPOSITE) + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include "gf_complete.h" +#include "gf_method.h" +#include "gf_int.h" + +#define BNMULTS (8) +static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48", + "TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" }; +#define NMULTS (17) +static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", + "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", + "SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" }; + +/* Make sure CAUCHY is last */ + +#define NREGIONS (7) +static char *REGIONS[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SIMD", "NOSIMD", + "ALTMAP", "CAUCHY" }; + +#define BNREGIONS (4) +static char *BREGIONS[BNREGIONS] = { "DOUBLE", "QUAD", "ALTMAP", "CAUCHY" }; + +#define NDIVS (2) +static char *divides[NDIVS] = { "MATRIX", "EUCLID" }; + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_methods w -BADC -LXUMDRB\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " w can be 1-32, 64, 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -B lists basic methods that are useful\n"); + fprintf(stderr, " -A does a nearly exhaustive listing\n"); + fprintf(stderr, " -D adds EUCLID and MATRIX division\n"); + fprintf(stderr, " -C adds CAUCHY when possible\n"); + fprintf(stderr, " Combinations are fine.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -L Simply lists methods\n"); + fprintf(stderr, " -X List methods and functions selected (compile with DEBUG_FUNCTIONS)\n"); + fprintf(stderr, " -U Produces calls to gf_unit\n"); + fprintf(stderr, " -M Produces calls to time_tool.sh for single multiplications\n"); + fprintf(stderr, " -D Produces calls to time_tool.sh for single divisions\n"); + fprintf(stderr, " -R Produces calls to time_tool.sh for region multiplications\n"); + fprintf(stderr, " -B Produces calls to time_tool.sh for the fastest region multiplications\n"); + fprintf(stderr, " Cannot combine L, U, T.\n"); + if (s != NULL) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s\n", s); + } + exit(1); +} + +void print_methods(gf_t *gf) +{ +#ifdef DEBUG_FUNCTIONS + gf_internal_t *h = (gf_internal_t*) gf->scratch; + + printf("multiply = %s\n", h->multiply); + printf("divide = %s\n", h->divide); + printf("inverse = %s\n", h->inverse); + printf("multiply_region = %s\n", h->multiply_region); + printf("extract_word = %s\n", h->extract_word); +#endif +} + +int main(int argc, char *argv[]) +{ + int m, r, d, w, i, sa, j, k, reset, ok; + int nregions; + int nmults; + char **regions; + char **mults; + int exhaustive = 0; + int divide = 0; + int cauchy = 0; + int listing; + char *gf_argv[50], *x; + gf_t gf; + char ls[10]; + char * w_str; + + if (argc != 4) usage(NULL); + w = atoi(argv[1]); + ok = (w >= 1 && w <= 32); + if (w == 64) ok = 1; + if (w == 128) ok = 1; + if (!ok) usage("Bad w"); + + if (argv[2][0] != '-' || argv[3][0] != '-' || strlen(argv[2]) == 1 || strlen(argv[3]) != 2) { + usage(NULL); + } + for (i = 1; argv[2][i] != '\0'; i++) { + switch(argv[2][i]) { + case 'B': exhaustive = 0; break; + case 'A': exhaustive = 1; break; + case 'D': divide = 1; break; + case 'C': cauchy = 1; break; + default: usage("Bad -BADC"); + } + } + + if (strchr("LXUMDRB", argv[3][1]) == NULL) { usage("Bad -LXUMDRB"); } + listing = argv[3][1]; + + if (listing == 'U') { + w_str = "../test/gf_unit %d A -1"; + } else if (listing == 'L' || listing == 'X') { + w_str = "w=%d:"; + } else { + w_str = strdup("sh time_tool.sh X %d"); + x = strchr(w_str, 'X'); + *x = listing; + } + + gf_argv[0] = "-"; + if (create_gf_from_argv(&gf, w, 1, gf_argv, 0) > 0) { + printf(w_str, w); + printf(" - \n"); + gf_free(&gf, 1); + } else if (_gf_errno == GF_E_DEFAULT) { + fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2); + exit(1); + } + + nregions = (exhaustive) ? NREGIONS : BNREGIONS; + if (!cauchy) nregions--; + regions = (exhaustive) ? REGIONS : BREGIONS; + mults = (exhaustive) ? MULTS : BMULTS; + nmults = (exhaustive) ? NMULTS : BNMULTS; + + + for (m = 0; m < nmults; m++) { + sa = 0; + gf_argv[sa++] = "-m"; + if (strcmp(mults[m], "GROUP44") == 0) { + gf_argv[sa++] = "GROUP"; + gf_argv[sa++] = "4"; + gf_argv[sa++] = "4"; + } else if (strcmp(mults[m], "GROUP48") == 0) { + gf_argv[sa++] = "GROUP"; + gf_argv[sa++] = "4"; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT2") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "2"; + } else if (strcmp(mults[m], "SPLIT4") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "4"; + } else if (strcmp(mults[m], "SPLIT8") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT16") == 0) { + gf_argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + gf_argv[sa++] = ls; + gf_argv[sa++] = "16"; + } else if (strcmp(mults[m], "SPLIT88") == 0) { + gf_argv[sa++] = "SPLIT"; + gf_argv[sa++] = "8"; + gf_argv[sa++] = "8"; + } else if (strcmp(mults[m], "COMPOSITE") == 0) { + gf_argv[sa++] = "COMPOSITE"; + gf_argv[sa++] = "2"; + gf_argv[sa++] = "-"; + } else { + gf_argv[sa++] = mults[m]; + } + reset = sa; + + + for (r = 0; r < (1 << nregions); r++) { + sa = reset; + for (k = 0; k < nregions; k++) { + if (r & (1 << k)) { + gf_argv[sa++] = "-r"; + gf_argv[sa++] = regions[k]; + } + } + gf_argv[sa++] = "-"; + + /* printf("Hmmmm. %s", gf_argv[0]); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); */ + + if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) { + printf(w_str, w); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); + if (listing == 'X') + print_methods(&gf); + gf_free(&gf, 1); + } else if (_gf_errno == GF_E_DEFAULT) { + fprintf(stderr, "Unlabeled failed method: w=%d:", w); + for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]); + fprintf(stderr, "\n"); + exit(1); + } + sa--; + if (divide) { + for (d = 0; d < NDIVS; d++) { + gf_argv[sa++] = "-d"; + gf_argv[sa++] = divides[d]; + /* printf("w=%d:", w); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); */ + gf_argv[sa++] = "-"; + if (create_gf_from_argv(&gf, w, sa, gf_argv, 0) > 0) { + printf(w_str, w); + for (j = 0; j < sa; j++) printf(" %s", gf_argv[j]); + printf("\n"); + if (listing == 'X') + print_methods(&gf); + gf_free(&gf, 1); + } else if (_gf_errno == GF_E_DEFAULT) { + fprintf(stderr, "Unlabeled failed method: w=%d:", w); + for (j = 0; j < sa; j++) fprintf(stderr, " %s", gf_argv[j]); + fprintf(stderr, "\n"); + exit(1); + } + sa-=3; + } + } + } + } + return 0; +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_mult.c b/src/erasure-code/jerasure/gf-complete/tools/gf_mult.c new file mode 100644 index 00000000..815bd8b2 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_mult.c @@ -0,0 +1,68 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_mult.c + * + * Multiplies two numbers in gf_2^w + */ + +#include <stdio.h> +#include <getopt.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include "gf_complete.h" +#include "gf_method.h" +#include "gf_general.h" + +void usage(int why) +{ + fprintf(stderr, "usage: gf_mult a b w [method] - does multiplication of a and b in GF(2^w)\n"); + if (why == 'W') { + fprintf(stderr, "Bad w.\n"); + fprintf(stderr, "Legal w are: 1 - 32, 64 and 128.\n"); + fprintf(stderr, "Append 'h' to w to treat a, b and the product as hexadecimal.\n"); + fprintf(stderr, "w=128 is hex only (i.e. '128' will be an error - do '128h')\n"); + } + if (why == 'A') fprintf(stderr, "Bad a\n"); + if (why == 'B') fprintf(stderr, "Bad b\n"); + if (why == 'M') { + fprintf(stderr, "Bad Method Specification: "); + gf_error(); + } + exit(1); +} + +int main(int argc, char **argv) +{ + int hex, w; + gf_t gf; + gf_general_t a, b, c; + char output[50]; + + if (argc < 4) usage(' '); + + if (sscanf(argv[3], "%d", &w) == 0) usage('W'); + if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage('W'); + + hex = (strchr(argv[3], 'h') != NULL); + if (!hex && w == 128) usage('W'); + + if (argc == 4) { + if (gf_init_easy(&gf, w) == 0) usage('M'); + } else { + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage('M'); + } + + if (!gf_general_s_to_val(&a, w, argv[1], hex)) usage('A'); + if (!gf_general_s_to_val(&b, w, argv[2], hex)) usage('B'); + + gf_general_multiply(&gf, &a, &b, &c); + gf_general_val_to_s(&c, w, output, hex); + + printf("%s\n", output); + exit(0); +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_poly.c b/src/erasure-code/jerasure/gf-complete/tools/gf_poly.c new file mode 100644 index 00000000..b3faf254 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_poly.c @@ -0,0 +1,275 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_poly.c - program to help find irreducible polynomials in composite fields, + * using the Ben-Or algorithm. + * + * (This one was written by Jim) + * + * Please see the following paper for a description of the Ben-Or algorithm: + * + * author S. Gao and D. Panario + * title Tests and Constructions of Irreducible Polynomials over Finite Fields + * booktitle Foundations of Computational Mathematics + * year 1997 + * publisher Springer Verlag + * pages 346-361 + * + * The basic technique is this. You have a polynomial f(x) whose coefficients are + * in a base field GF(2^w). The polynomial is of degree n. You need to do the + * following for all i from 1 to n/2: + * + * Construct x^(2^w)^i modulo f. That will be a polynomial of maximum degree n-1 + * with coefficients in GF(2^w). You construct that polynomial by starting with x + * and doubling it w times, each time taking the result modulo f. Then you + * multiply that by itself i times, again each time taking the result modulo f. + * + * When you're done, you need to "subtract" x -- since addition = subtraction = + * XOR, that means XOR x. + * + * Now, find the GCD of that last polynomial and f, using Euclid's algorithm. If + * the GCD is not one, then f is reducible. If it is not reducible for each of + * those i, then it is irreducible. + * + * In this code, I am using a gf_general_t to represent elements of GF(2^w). This + * is so that I can use base fields that are GF(2^64) or GF(2^128). + * + * I have two main procedures. The first is x_to_q_to_i_minus_x, which calculates + * x^(2^w)^i - x, putting the result into a gf_general_t * called retval. + * + * The second is gcd_one, which takes a polynomial of degree n and a second one + * of degree n-1, and uses Euclid's algorithm to decide if their GCD == 1. + * + * These can be made faster (e.g. calculate x^(2^w) once and store it). + */ + +#include "gf_complete.h" +#include "gf_method.h" +#include "gf_general.h" +#include "gf_int.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +char *BM = "Bad Method: "; + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_poly w(base-field) method power:coef [ power:coef .. ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " use - for the default method.\n"); + fprintf(stderr, " use 0x in front of the coefficient if it's in hex\n"); + fprintf(stderr, " \n"); + fprintf(stderr, " For example, to test whether x^2 + 2x + 1 is irreducible\n"); + fprintf(stderr, " in GF(2^16), the call is:\n"); + fprintf(stderr, " \n"); + fprintf(stderr, " gf_poly 16 - 2:1 1:2 0:1\n"); + fprintf(stderr, " \n"); + fprintf(stderr, " See the user's manual for more information.\n"); + if (s != NULL) { + fprintf(stderr, "\n"); + if (s == BM) { + fprintf(stderr, "%s", s); + gf_error(); + } else { + fprintf(stderr, "%s\n", s); + } + } + exit(1); +} + +int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod) +{ + gf_general_t *a, *b, zero, factor, p; + int i, j, da, db; + + gf_general_set_zero(&zero, w); + + a = (gf_general_t *) malloc(sizeof(gf_general_t) * n+1); + b = (gf_general_t *) malloc(sizeof(gf_general_t) * n); + for (i = 0; i <= n; i++) gf_general_add(gf, &zero, poly+i, a+i); + for (i = 0; i < n; i++) gf_general_add(gf, &zero, prod+i, b+i); + + da = n; + while (1) { + for (db = n-1; db >= 0 && gf_general_is_zero(b+db, w); db--) ; + if (db < 0) return 0; + if (db == 0) return 1; + for (j = da; j >= db; j--) { + if (!gf_general_is_zero(a+j, w)) { + gf_general_divide(gf, a+j, b+db, &factor); + for (i = 0; i <= db; i++) { + gf_general_multiply(gf, b+i, &factor, &p); + gf_general_add(gf, &p, a+(i+j-db), a+(i+j-db)); + } + } + } + for (i = 0; i < n; i++) { + gf_general_add(gf, a+i, &zero, &p); + gf_general_add(gf, b+i, &zero, a+i); + gf_general_add(gf, &p, &zero, b+i); + } + } + +} + +void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, int i, gf_general_t *retval) +{ + gf_general_t x; + gf_general_t *x_to_q; + gf_general_t *product; + gf_general_t p, zero, factor; + int j, k, lq; + + gf_general_set_zero(&zero, w); + product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2); + x_to_q = (gf_general_t *) malloc(sizeof(gf_general_t) * n); + for (j = 0; j < n; j++) gf_general_set_zero(x_to_q+j, w); + gf_general_set_one(x_to_q+1, w); + + for (lq = 0; lq < logq; lq++) { + for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w); + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + gf_general_multiply(gf, x_to_q+j, x_to_q+k, &p); + gf_general_add(gf, product+(j+k), &p, product+(j+k)); + } + } + for (j = n*2-1; j >= n; j--) { + if (!gf_general_is_zero(product+j, w)) { + gf_general_add(gf, product+j, &zero, &factor); + for (k = 0; k <= n; k++) { + gf_general_multiply(gf, poly+k, &factor, &p); + gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k)); + } + } + } + for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, x_to_q+j); + } + for (j = 0; j < n; j++) gf_general_set_zero(retval+j, w); + gf_general_set_one(retval, w); + + while (i > 0) { + for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w); + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + gf_general_multiply(gf, x_to_q+j, retval+k, &p); + gf_general_add(gf, product+(j+k), &p, product+(j+k)); + } + } + for (j = n*2-1; j >= n; j--) { + if (!gf_general_is_zero(product+j, w)) { + gf_general_add(gf, product+j, &zero, &factor); + for (k = 0; k <= n; k++) { + gf_general_multiply(gf, poly+k, &factor, &p); + gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k)); + } + } + } + for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, retval+j); + i--; + } + + gf_general_set_one(&x, w); + gf_general_add(gf, &x, retval+1, retval+1); + + free(product); + free(x_to_q); +} + +int main(int argc, char **argv) +{ + int w, i, power, n, ap, success; + gf_t gf; + gf_general_t *poly, *prod; + char *string, *ptr; + char buf[100]; + + if (argc < 4) usage(NULL); + + if (sscanf(argv[1], "%d", &w) != 1 || w <= 0) usage("Bad w."); + ap = create_gf_from_argv(&gf, w, argc, argv, 2); + + if (ap == 0) usage(BM); + + if (ap == argc) usage("No powers/coefficients given."); + + n = -1; + for (i = ap; i < argc; i++) { + if (strchr(argv[i], ':') == NULL || sscanf(argv[i], "%d:", &power) != 1) { + string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100))); + sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]); + usage(string); + } + if (power < 0) { + usage("Can't have negative powers\n"); + } else { + n = power; + } + } + // in case the for-loop header fails + assert (n >= 0); + + poly = (gf_general_t *) malloc(sizeof(gf_general_t)*(n+1)); + for (i = 0; i <= n; i++) gf_general_set_zero(poly+i, w); + prod = (gf_general_t *) malloc(sizeof(gf_general_t)*n); + + for (i = ap; i < argc; i++) { + sscanf(argv[i], "%d:", &power); + ptr = strchr(argv[i], ':'); + ptr++; + if (strncmp(ptr, "0x", 2) == 0) { + success = gf_general_s_to_val(poly+power, w, ptr+2, 1); + } else { + success = gf_general_s_to_val(poly+power, w, ptr, 0); + } + if (success == 0) { + string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100))); + sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]); + usage(string); + } + } + + printf("Poly:"); + for (power = n; power >= 0; power--) { + if (!gf_general_is_zero(poly+power, w)) { + printf("%s", (power == n) ? " " : " + "); + if (!gf_general_is_one(poly+power, w)) { + gf_general_val_to_s(poly+power, w, buf, 1); + if (n > 0) { + printf("(0x%s)", buf); + } else { + printf("0x%s", buf); + } + } + if (power == 0) { + if (gf_general_is_one(poly+power, w)) printf("1"); + } else if (power == 1) { + printf("x"); + } else { + printf("x^%d", power); + } + } + } + printf("\n"); + + if (!gf_general_is_one(poly+n, w)) { + printf("\n"); + printf("Can't do Ben-Or, because the polynomial is not monic.\n"); + exit(0); + } + + for (i = 1; i <= n/2; i++) { + x_to_q_to_i_minus_x(&gf, w, n, poly, w, i, prod); + if (!gcd_one(&gf, w, n, poly, prod)) { + printf("Reducible.\n"); + exit(0); + } + } + + printf("Irreducible.\n"); + exit(0); +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/gf_time.c b/src/erasure-code/jerasure/gf-complete/tools/gf_time.c new file mode 100644 index 00000000..7402ab5c --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/gf_time.c @@ -0,0 +1,232 @@ +/* + * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic + * James S. Plank, Ethan L. Miller, Kevin M. Greenan, + * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. + * + * gf_time.c + * + * Performs timing for gf arithmetic + */ + +#include "config.h" + +#ifdef HAVE_POSIX_MEMALIGN +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE 600 +#endif +#endif + +#include <stdio.h> +#include <getopt.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <sys/time.h> + +#include "gf_complete.h" +#include "gf_method.h" +#include "gf_rand.h" +#include "gf_general.h" + +void +timer_start (double *t) +{ + struct timeval tv; + + gettimeofday (&tv, NULL); + *t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; +} + +double +timer_split (const double *t) +{ + struct timeval tv; + double cur_t; + + gettimeofday (&tv, NULL); + cur_t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; + return (cur_t - *t); +} + +void problem(char *s) +{ + fprintf(stderr, "Timing test failed.\n"); + fprintf(stderr, "%s\n", s); + exit(1); +} + +char *BM = "Bad Method: "; + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_time w tests seed size(bytes) iterations [method [params]] - does timing\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "does unit testing in GF(2^w)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Tests may be any combination of:\n"); + fprintf(stderr, " A: All\n"); + fprintf(stderr, " S: All Single Operations\n"); + fprintf(stderr, " R: All Region Operations\n"); + fprintf(stderr, " M: Single: Multiplications\n"); + fprintf(stderr, " D: Single: Divisions\n"); + fprintf(stderr, " I: Single: Inverses\n"); + fprintf(stderr, " G: Region: Buffer-Constant Multiplication\n"); + fprintf(stderr, " 0: Region: Doing nothing, and bzero()\n"); + fprintf(stderr, " 1: Region: Memcpy() and XOR\n"); + fprintf(stderr, " 2: Region: Multiplying by two\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Use -1 for time(0) as a seed.\n"); + fprintf(stderr, "\n"); + if (s == BM) { + fprintf(stderr, "%s", BM); + gf_error(); + } else if (s != NULL) { + fprintf(stderr, "%s\n", s); + } + exit(1); +} + +int main(int argc, char **argv) +{ + int w, it, i, size, iterations, xor; + char tests[100]; + char test; + char *single_tests = "MDI"; + char *region_tests = "G012"; + char *tstrings[256]; + void *tmethods[256]; + gf_t gf; + double timer, elapsed, ds, di, dnum; + int num; + time_t t0; + uint8_t *ra, *rb; + gf_general_t a; +#ifndef HAVE_POSIX_MEMALIGN + uint8_t *malloc_ra, *malloc_rb; +#endif + + + if (argc < 6) usage(NULL); + + if (sscanf(argv[1], "%d", &w) == 0){ + usage("Bad w[-pp]\n"); + } + + + if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n"); + if (sscanf(argv[4], "%d", &size) == 0) usage("Bad size\n"); + if (sscanf(argv[5], "%d", &iterations) == 0) usage("Bad iterations\n"); + if (t0 == -1) t0 = time(0); + MOA_Seed(t0); + + ds = size; + di = iterations; + + if ((w > 32 && w != 64 && w != 128) || w < 0) usage("Bad w"); + if ((size * 8) % w != 0) usage ("Bad size -- must be a multiple of w*8\n"); + + if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage(BM); + + strcpy(tests, ""); + for (i = 0; argv[2][i] != '\0'; i++) { + switch(argv[2][i]) { + case 'A': strcat(tests, single_tests); + strcat(tests, region_tests); + break; + case 'S': strcat(tests, single_tests); break; + case 'R': strcat(tests, region_tests); break; + case 'G': strcat(tests, "G"); break; + case '0': strcat(tests, "0"); break; + case '1': strcat(tests, "1"); break; + case '2': strcat(tests, "2"); break; + case 'M': strcat(tests, "M"); break; + case 'D': strcat(tests, "D"); break; + case 'I': strcat(tests, "I"); break; + default: usage("Bad tests"); + } + } + + tstrings['M'] = "Multiply"; + tstrings['D'] = "Divide"; + tstrings['I'] = "Inverse"; + tstrings['G'] = "Region-Random"; + tstrings['0'] = "Region-By-Zero"; + tstrings['1'] = "Region-By-One"; + tstrings['2'] = "Region-By-Two"; + + tmethods['M'] = (void *) gf.multiply.w32; + tmethods['D'] = (void *) gf.divide.w32; + tmethods['I'] = (void *) gf.inverse.w32; + tmethods['G'] = (void *) gf.multiply_region.w32; + tmethods['0'] = (void *) gf.multiply_region.w32; + tmethods['1'] = (void *) gf.multiply_region.w32; + tmethods['2'] = (void *) gf.multiply_region.w32; + + printf("Seed: %ld\n", t0); + +#ifdef HAVE_POSIX_MEMALIGN + if (posix_memalign((void **) &ra, 16, size)) + ra = NULL; + if (posix_memalign((void **) &rb, 16, size)) + rb = NULL; +#else + malloc_ra = (uint8_t *) malloc(size + 15); + malloc_rb = (uint8_t *) malloc(size + 15); + ra = (uint8_t *) (((uintptr_t) malloc_ra + 15) & ~((uintptr_t) 0xf)); + rb = (uint8_t *) (((uintptr_t) malloc_rb + 15) & ~((uintptr_t) 0xf)); +#endif + + if (ra == NULL || rb == NULL) { perror("malloc"); exit(1); } + + for (i = 0; i < 3; i++) { + test = single_tests[i]; + if (strchr(tests, test) != NULL) { + if (tmethods[(int)test] == NULL) { + printf("No %s method.\n", tstrings[(int)test]); + } else { + elapsed = 0; + dnum = 0; + for (it = 0; it < iterations; it++) { + gf_general_set_up_single_timing_test(w, ra, rb, size); + timer_start(&timer); + num = gf_general_do_single_timing_test(&gf, ra, rb, size, test); + dnum += num; + elapsed += timer_split(&timer); + } + printf("%14s: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n", + tstrings[(int)test], elapsed, + dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed); + } + } + } + + for (i = 0; i < 4; i++) { + test = region_tests[i]; + if (strchr(tests, test) != NULL) { + if (tmethods[(int)test] == NULL) { + printf("No %s method.\n", tstrings[(int)test]); + } else { + if (test == '0') gf_general_set_zero(&a, w); + if (test == '1') gf_general_set_one(&a, w); + if (test == '2') gf_general_set_two(&a, w); + + for (xor = 0; xor < 2; xor++) { + elapsed = 0; + for (it = 0; it < iterations; it++) { + if (test == 'G') gf_general_set_random(&a, w, 1); + gf_general_set_up_single_timing_test(8, ra, rb, size); + timer_start(&timer); + gf_general_do_region_multiply(&gf, &a, ra, rb, size, xor); + elapsed += timer_split(&timer); + } + printf("%14s: XOR: %d %10.6lf s MB: %10.3lf %10.3lf MB/s\n", + tstrings[(int)test], xor, elapsed, + ds*di/1024.0/1024.0, ds*di/1024.0/1024.0/elapsed); + } + } + } + } + return 0; +} diff --git a/src/erasure-code/jerasure/gf-complete/tools/test_simd.sh b/src/erasure-code/jerasure/gf-complete/tools/test_simd.sh new file mode 100755 index 00000000..e514e4f6 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/test_simd.sh @@ -0,0 +1,367 @@ +#!/bin/bash -e + +# this scripts has a number of tests for SIMD. It can be invoked +# on the host or on a QEMU machine. + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +host_cpu=`uname -p` +results=${script_dir}/test_simd.results +nprocs=$(grep -c ^processor /proc/cpuinfo) + +# runs unit tests and save the results +test_unit(){ + { ./configure && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } + make -j$nprocs check || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + cat tools/test-suite.log >> ${results} || true +} + +# build with DEBUG_FUNCTIONS and save all methods selected +# to a results file +test_functions() { + failed=0 + + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + done + + return ${failed} +} + +# build with DEBUG_CPU_FUNCTIONS and print out CPU detection +test_detection() { + failed=0 + + { ./configure --enable-debug-cpu && make clean && make; } || { echo "Compile FAILED" >> ${results}; return 1; } + { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); } + + return ${failed} +} + +compile_arm() { + failed=0 + + echo -n "Compiling with NO SIMD support..." >> ${results} + { ./configure --disable-neon && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with FULL SIMD support..." >> ${results} + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + return ${failed} +} + +compile_intel() { + failed=0 + + echo -n "Compiling with NO SIMD support..." >> ${results} + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=no + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3,SSE4_1 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with SSE2,SSE3,SSSE3,SSE4_2 only..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=no + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + echo -n "Compiling with FULL SIMD support..." >> ${results} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=yes + { ./configure && make clean && make && echo "SUCCESS" >> ${results}; } || { echo "FAIL" >> ${results}; ((++failed)); } + + return ${failed} +} + +# test that we can compile the source code with different +# SIMD options. We assume that we are running on processor +# full SIMD support +test_compile() { + case $host_cpu in + aarch64*|arm*) compile_arm ;; + i[[3456]]86*|x86_64*|amd64*) compile_intel ;; + esac +} + +# disable through build flags +runtime_arm_flags() { + failed=0 + + echo "====NO SIMD support..." >> ${1} + { ./configure --disable-neon --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +# build once with FULL SIMD and disable at runtime through environment +runtime_arm_env() { + failed=0 + + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } + + echo "====NO SIMD support..." >> ${1} + export GF_COMPLETE_DISABLE_NEON=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + unset GF_COMPLETE_DISABLE_NEON + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +runtime_intel_flags() { + failed=0 + + echo "====NO SIMD support..." >> ${1} + { ./configure --disable-sse --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=no + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=no + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_1 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=yes + export ax_cv_have_sse42_ext=no + export ax_cv_have_pclmuldq_ext=no + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_2 support..." >> ${1} + export ax_cv_have_sse_ext=no + export ax_cv_have_sse2_ext=yes + export ax_cv_have_sse3_ext=yes + export ax_cv_have_ssse3_ext=yes + export ax_cv_have_sse41_ext=no + export ax_cv_have_sse42_ext=yes + export ax_cv_have_pclmuldq_ext=no + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + { ./configure --enable-debug-func && make clean && make; } || { echo "FAIL" >> ${1}; ((++failed)); } + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +runtime_intel_env() { + failed=0 + + # compile a build with full SIMD support + { ./configure --enable-debug-func && make clean && make; } || { echo "Compile FAILED" >> ${1}; return 1; } + + echo "====NO SIMD support..." >> ${1} + export GF_COMPLETE_DISABLE_SSE2=1 + export GF_COMPLETE_DISABLE_SSE3=1 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + export GF_COMPLETE_DISABLE_SSE3=1 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + export GF_COMPLETE_DISABLE_SSSE3=1 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + export GF_COMPLETE_DISABLE_SSE4=1 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_1 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====SSE2,SSE3,SSSE3,SSE4_2 support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + export GF_COMPLETE_DISABLE_SSE4_PCLMUL=1 + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + echo "====FULL SIMD support..." >> ${1} + unset GF_COMPLETE_DISABLE_SSE2 + unset GF_COMPLETE_DISABLE_SSE3 + unset GF_COMPLETE_DISABLE_SSSE3 + unset GF_COMPLETE_DISABLE_SSE4 + unset GF_COMPLETE_DISABLE_SSE4_PCLMUL + for i in 128 64 32 16 8 4; do + { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } + done + + return ${failed} +} + +test_runtime() { + rm -f ${results}.left + rm -f ${results}.right + + case $host_cpu in + aarch64*|arm*) + runtime_arm_flags ${results}.left + runtime_arm_env ${results}.right + ;; + i[[3456]]86*|x86_64*|amd64*) + runtime_intel_flags ${results}.left + runtime_intel_env ${results}.right + ;; + esac + + echo "======LEFT======" > ${results} + cat ${results}.left >> ${results} + echo "======RIGHT======" >> ${results} + cat ${results}.right >> ${results} + echo "======RESULT======" >> ${results} + if diff "${results}.left" "${results}.right"; then + echo SUCCESS >> ${results} + return 0 + else + echo SUCCESS >> ${results} + return 1 + fi +} + +cd ${script_dir}/.. +rm -f ${results} + +test_$1 +exit $? diff --git a/src/erasure-code/jerasure/gf-complete/tools/test_simd_qemu.sh b/src/erasure-code/jerasure/gf-complete/tools/test_simd_qemu.sh new file mode 100755 index 00000000..5771874f --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/test_simd_qemu.sh @@ -0,0 +1,258 @@ +#!/bin/bash -e + +# This script will use QEMU to test gf-complete especially SIMD support +# on different architectures and cpus. It will boot a qemu machine +# and run an Ubuntu cloud image. All testing will happen inside the +# QEMU machine. + +# The following packages are required: +# qemu-system-aarch64 +# qemu-system-arm +# qemu-system-x86_64 +# genisoimage + + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +qemu_dir="${script_dir}/.qemu" +ssh_port=2222 +ssh_pubkey_file="${qemu_dir}/qemu.pub" +ssh_key_file="${qemu_dir}/qemu" + +mkdir -p "${qemu_dir}" + +cleanup() { + if [[ -n "$(jobs -p)" ]]; then + echo killing qemu processes "$(jobs -p)" + kill $(jobs -p) + fi +} + +trap cleanup EXIT + +start_qemu() { + arch=$1 + cpu=$2 + + image_version="xenial" + image_url_base="http://cloud-images.ubuntu.com/${image_version}/current" + + case $arch in + i[[3456]]86*|x86_64*|amd64*) + image_kernel="${image_version}-server-cloudimg-amd64-vmlinuz-generic" + image_initrd="${image_version}-server-cloudimg-amd64-initrd-generic" + image_disk="${image_version}-server-cloudimg-amd64-disk1.img" + ;; + aarch64*) + image_kernel="${image_version}-server-cloudimg-arm64-vmlinuz-generic" + image_initrd="${image_version}-server-cloudimg-arm64-initrd-generic" + image_disk="${image_version}-server-cloudimg-arm64-disk1.img" + ;; + arm*) + image_kernel="${image_version}-server-cloudimg-armhf-vmlinuz-lpae" + image_initrd="${image_version}-server-cloudimg-armhf-initrd-generic-lpae" + image_disk="${image_version}-server-cloudimg-armhf-disk1.img" + ;; + *) die "Unsupported arch" ;; + esac + + [[ -f ${qemu_dir}/${image_kernel} ]] || wget -O ${qemu_dir}/${image_kernel} ${image_url_base}/unpacked/${image_kernel} + [[ -f ${qemu_dir}/${image_initrd} ]] || wget -O ${qemu_dir}/${image_initrd} ${image_url_base}/unpacked/${image_initrd} + [[ -f ${qemu_dir}/${image_disk} ]] || wget -O ${qemu_dir}/${image_disk} ${image_url_base}/${image_disk} + + #create a delta disk to keep the original image clean + delta_disk="${qemu_dir}/disk.img" + rm -f ${delta_disk} + qemu-img create -q -f qcow2 -b "${qemu_dir}/${image_disk}" ${delta_disk} + + # generate an ssh keys + [[ -f ${ssh_pubkey_file} ]] || ssh-keygen -q -N "" -f ${ssh_key_file} + + # create a config disk to set the SSH keys + cat > "${qemu_dir}/meta-data" <<EOF +instance-id: qemu +local-hostname: qemu +EOF + cat > "${qemu_dir}/user-data" <<EOF +#cloud-config +hostname: qemu +manage_etc_hosts: true +users: + - name: qemu + ssh-authorized-keys: + - $(cat "${ssh_pubkey_file}") + sudo: ['ALL=(ALL) NOPASSWD:ALL'] + groups: sudo + shell: /bin/bash +EOF + genisoimage -quiet -output "${qemu_dir}/cloud.iso" -volid cidata -joliet -rock "${qemu_dir}/user-data" "${qemu_dir}/meta-data" + + common_args=( \ + -name "qemu" \ + -m 1024 \ + -nodefaults \ + -nographic \ + -kernel ${qemu_dir}/${image_kernel} \ + -initrd ${qemu_dir}/${image_initrd} \ + -cdrom ${qemu_dir}/cloud.iso \ + -serial file:${qemu_dir}/console.log + ) + + case $arch in + i[[3456]]86*|x86_64*|amd64*) + qemu-system-x86_64 \ + "${common_args[@]}" \ + -machine accel=kvm -cpu $cpu \ + -append "console=ttyS0 root=/dev/sda1" \ + -hda "${delta_disk}" \ + -net nic,vlan=0,model=virtio \ + -net user,vlan=0,hostfwd=tcp::"${ssh_port}"-:22,hostname="${vm_name}" \ + & + ;; + aarch64*|arm*) + qemu-system-$arch \ + "${common_args[@]}" \ + -machine virt -cpu $cpu -machine type=virt -smp 1 \ + -drive if=none,file="${delta_disk}",id=hd0 \ + -device virtio-blk-device,drive=hd0 \ + -append "console=ttyAMA0 root=/dev/vda1" \ + -netdev user,id=eth0,hostfwd=tcp::"${ssh_port}"-:22,hostname="${vm_name}" \ + -device virtio-net-device,netdev=eth0 \ + & + ;; + *) die "Unsupported arch" ;; + esac + + wait_for_ssh +} + +stop_qemu() { + run_ssh "sudo shutdown now" || true + wait $(jobs -p) +} + +shared_args=( + -i ${ssh_key_file} + -F /dev/null + -o BatchMode=yes + -o UserKnownHostsFile=/dev/null + -o StrictHostKeyChecking=no + -o IdentitiesOnly=yes +) + +ssh_args=( + ${shared_args[*]} + -p ${ssh_port} +) + +wait_for_ssh() { + retries=0 + retry_count=50 + + echo "waiting for machine to come up." + echo "tail -F ${qemu_dir}/console.log for progress." + + while true; do + set +e + ssh -q ${ssh_args[*]} -o ConnectTimeout=1 qemu@localhost "echo done" + error=$? + set -e + if [[ $error == 0 ]]; then + return 0 + fi + + if [[ ${retries} == ${retry_count} ]]; then + echo "timeout" + return 1 + fi + + echo -n "." + ((++retries)) + sleep 10 + done +} + +run_ssh() { + ssh -q ${ssh_args[*]} qemu@localhost "$@" +} + +run_scp() { + scp -q ${shared_args[*]} -P ${ssh_port} "$@" +} + +rsync_args=( + --exclude '.qemu' + --exclude '.git' +) + +run_rsync() { + rsync -avz -e "ssh ${ssh_args[*]}" ${rsync_args[*]} "$@" +} + +init_machine() { + run_ssh "sudo apt-get -y install --no-install-recommends make gcc autoconf libtool automake" +} + +init_machine_and_copy_source() { + init_machine + run_ssh "rm -fr ~/gf-complete; mkdir -p ~/gf-complete" + run_rsync ${script_dir}/.. qemu@localhost:gf-complete + run_ssh "cd ~/gf-complete && ./autogen.sh" +} + +run_test() { + arch=$1; shift + cpu=$1; shift + test=$1; shift + + run_ssh "~/gf-complete/tools/test_simd.sh ${test}" + run_scp qemu@localhost:gf-complete/tools/test_simd.results ${script_dir}/test_simd_${test}_${arch}_${cpu}.results +} + +# this test run the unit tests on the machine using "make check" +run_test_simd_basic() { + arch=$1; shift + cpu=$1; shift + + failed=0 + + echo "=====starting qemu machine $arch $cpu" + start_qemu $arch $cpu + init_machine_and_copy_source + echo "=====running compile test" + { run_test $arch $cpu "compile" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running unit test" + { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running functions test" + { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running detection test" + { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + echo "=====running runtime test" + { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + stop_qemu + + return ${failed} +} + +run_all_tests() { + failed=0 + + echo ============================ + echo =====running x86_64 tests + # NOTE: Broadwell has all the supported SIMD instructions + { run_test_simd_basic "x86_64" "Broadwell" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + + echo ============================ + echo =====running aarch64 tests + # NOTE: cortex-a57 has ASIMD support + { run_test_simd_basic "aarch64" "cortex-a57" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + + echo ============================ + echo =====running arm tests + # NOTE: cortex-a15 has NEON support + { run_test_simd_basic "arm" "cortex-a15" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } + + return ${failed} +} + +run_all_tests +exit $? diff --git a/src/erasure-code/jerasure/gf-complete/tools/time_tool.sh b/src/erasure-code/jerasure/gf-complete/tools/time_tool.sh new file mode 100644 index 00000000..7b165e17 --- /dev/null +++ b/src/erasure-code/jerasure/gf-complete/tools/time_tool.sh @@ -0,0 +1,98 @@ +# time_tool.sh - Shell script to test various timings. +# This is a rough tester -- its job is to work quickly rather than precisely. +# (Jim Plank) + +#!/bin/sh + +if [ $# -lt 3 ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + exit 1 +fi + +op=$1 +w=$2 + +shift ; shift + +method="$*" + +if [ $op != M -a $op != D -a $op != R -a $op != B ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + echo 'You have to specify a test: ' >&2 + echo ' M=Multiplication' >&2 + echo ' D=Division' >&2 + echo ' R=Regions' >&2 + echo ' B=Best-Region' >&2 + exit 1 +fi + +# First, use a 16K buffer to test the performance of single multiplies. + +fac=`echo $w | awk '{ n = $1; while (n != 0 && n%2==0) n /= 2; print n }'` +if [ $fac -eq 0 ]; then + echo 'usage sh time_tool.sh M|D|R|B w method' >&2 + echo 'Bad w' >&2 + exit 1 +fi + +bsize=16384 +bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'` + +if [ `./gf_time $w M -1 $bsize 1 $method 2>&1 | wc | awk '{ print $1 }'` -gt 2 ]; then + echo 'usage sh time_tool.sh w method' >&2 + echo "Bad method" + exit 1 +fi + +if [ $op = M -o $op = D ]; then + iter=1 + c1=`./gf_time $w $op -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $4*100 }'` + s=`echo $c1 | awk '{ print $8 }'` + bs=$s + + while [ $t -lt 1 ]; do + bs=$s + iter=`echo $iter | awk '{ print $1*2 }'` + c1=`./gf_time $w $op -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $4*100 }'` + s=`echo $c1 | awk '{ print $8 }'` + done + + echo $op $bs | awk '{ printf "%s speed (MB/s): %8.2lf W-Method: ", $1, $2 }' + echo $w $method + exit 0 +fi + +bsize=16384 +bsize=`echo $bsize $fac | awk '{ print $1 * $2 }'` + +best=0 +while [ $bsize -le 4194304 ]; do + iter=1 + c1=`./gf_time $w G -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $6*500 }'` + s=`echo $c1 | awk '{ print $10 }'` + bs=$s + + while [ $t -lt 1 ]; do + bs=$s + iter=`echo $iter | awk '{ print $1*2 }'` + c1=`./gf_time $w G -1 $bsize $iter $method` + t=`echo $c1 | awk '{ printf "%d\n", $6*500 }'` + s=`echo $c1 | awk '{ print $10 }'` + done + if [ $bsize -lt 1048576 ]; then + str=`echo $bsize | awk '{ printf "%3dK\n", $1/1024 }'` + else + str=`echo $bsize | awk '{ printf "%3dM\n", $1/1024/1024 }'` + fi + if [ $op = R ]; then + echo $str $bs | awk '{ printf "Region Buffer-Size: %4s (MB/s): %8.2lf W-Method: ", $1, $2 }' + echo $w $method + fi + best=`echo $best $bs | awk '{ print ($1 > $2) ? $1 : $2 }'` + bsize=`echo $bsize | awk '{ print $1 * 2 }'` +done +echo $best | awk '{ printf "Region Best (MB/s): %8.2lf W-Method: ", $1 }' +echo $w $method |