diff options
Diffstat (limited to '')
-rw-r--r-- | src/third-party/base64/lib/lib_openmp.c | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/src/third-party/base64/lib/lib_openmp.c b/src/third-party/base64/lib/lib_openmp.c new file mode 100644 index 0000000..6b87c52 --- /dev/null +++ b/src/third-party/base64/lib/lib_openmp.c @@ -0,0 +1,149 @@ +// This code makes some assumptions on the implementation of +// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode(). +// Basically these assumptions boil down to that when breaking the src into +// parts, out parts can be written without side effects. +// This is met when: +// 1) base64_stream_encode() and base64_stream_decode() don't use globals; +// 2) the shared variables src and out are not read or written outside of the +// bounds of their parts, i.e. when base64_stream_encode() reads a multiple +// of 3 bytes, it must write no more then a multiple of 4 bytes, not even +// temporarily; +// 3) the state flag can be discarded after base64_stream_encode() and +// base64_stream_decode() on the parts. + +static inline void +base64_encode_openmp + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) +{ + size_t s; + size_t t; + size_t sum = 0, len, last_len; + struct base64_state state, initial_state; + int num_threads, i; + + // Request a number of threads but not necessarily get them: + #pragma omp parallel + { + // Get the number of threads used from one thread only, + // as num_threads is a shared var: + #pragma omp single + { + num_threads = omp_get_num_threads(); + + // Split the input string into num_threads parts, each + // part a multiple of 3 bytes. The remaining bytes will + // be done later: + len = srclen / (num_threads * 3); + len *= 3; + last_len = srclen - num_threads * len; + + // Init the stream reader: + base64_stream_encode_init(&state, flags); + initial_state = state; + } + + // Single has an implicit barrier for all threads to wait here + // for the above to complete: + #pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1) + for (i = 0; i < num_threads; i++) + { + // Feed each part of the string to the stream reader: + base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s); + sum += s; + } + } + + // As encoding should never fail and we encode an exact multiple + // of 3 bytes, we can discard state: + state = initial_state; + + // Encode the remaining bytes: + base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s); + + // Finalize the stream by writing trailer if any: + base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t); + + // Final output length is stream length plus tail: + sum += s + t; + *outlen = sum; +} + +static inline int +base64_decode_openmp + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) +{ + int num_threads, result = 0, i; + size_t sum = 0, len, last_len, s; + struct base64_state state, initial_state; + + // Request a number of threads but not necessarily get them: + #pragma omp parallel + { + // Get the number of threads used from one thread only, + // as num_threads is a shared var: + #pragma omp single + { + num_threads = omp_get_num_threads(); + + // Split the input string into num_threads parts, each + // part a multiple of 4 bytes. The remaining bytes will + // be done later: + len = srclen / (num_threads * 4); + len *= 4; + last_len = srclen - num_threads * len; + + // Init the stream reader: + base64_stream_decode_init(&state, flags); + + initial_state = state; + } + + // Single has an implicit barrier to wait here for the above to + // complete: + #pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1) + for (i = 0; i < num_threads; i++) + { + int this_result; + + // Feed each part of the string to the stream reader: + this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s); + sum += s; + result += this_result; + } + } + + // If `result' equals `-num_threads', then all threads returned -1, + // indicating that the requested codec is not available: + if (result == -num_threads) { + return -1; + } + + // If `result' does not equal `num_threads', then at least one of the + // threads hit a decode error: + if (result != num_threads) { + return 0; + } + + // So far so good, now decode whatever remains in the buffer. Reuse the + // initial state, since we are at a 4-byte boundary: + state = initial_state; + result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s); + sum += s; + *outlen = sum; + + // If when decoding a whole block, we're still waiting for input then fail: + if (result && (state.bytes == 0)) { + return result; + } + return 0; +} |