summaryrefslogtreecommitdiffstats
path: root/src/basenc.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:11:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:11:47 +0000
commit758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree48111ece75cf4f98316848b37a7e26356e00669e /src/basenc.c
parentInitial commit. (diff)
downloadcoreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz
coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip
Adding upstream version 9.1.upstream/9.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/basenc.c')
-rw-r--r--src/basenc.c1248
1 files changed, 1248 insertions, 0 deletions
diff --git a/src/basenc.c b/src/basenc.c
new file mode 100644
index 0000000..04857d5
--- /dev/null
+++ b/src/basenc.c
@@ -0,0 +1,1248 @@
+/* Base64, base32, and similar encoding/decoding strings or files.
+ Copyright (C) 2004-2022 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Simon Josefsson <simon@josefsson.org>. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+
+#include "system.h"
+#include "c-ctype.h"
+#include "die.h"
+#include "error.h"
+#include "fadvise.h"
+#include "idx.h"
+#include "quote.h"
+#include "xstrtol.h"
+#include "xdectoint.h"
+#include "xbinary-io.h"
+
+#if BASE_TYPE == 42
+# define AUTHORS \
+ proper_name ("Simon Josefsson"), \
+ proper_name ("Assaf Gordon")
+#else
+# define AUTHORS proper_name ("Simon Josefsson")
+#endif
+
+#if BASE_TYPE == 32
+# include "base32.h"
+# define PROGRAM_NAME "base32"
+#elif BASE_TYPE == 64
+# include "base64.h"
+# define PROGRAM_NAME "base64"
+#elif BASE_TYPE == 42
+# include "base32.h"
+# include "base64.h"
+# include <assert.h>
+# define PROGRAM_NAME "basenc"
+#else
+# error missing/invalid BASE_TYPE definition
+#endif
+
+
+
+#if BASE_TYPE == 42
+enum
+{
+ BASE64_OPTION = CHAR_MAX + 1,
+ BASE64URL_OPTION,
+ BASE32_OPTION,
+ BASE32HEX_OPTION,
+ BASE16_OPTION,
+ BASE2MSBF_OPTION,
+ BASE2LSBF_OPTION,
+ Z85_OPTION
+};
+#endif
+
+static struct option const long_options[] =
+{
+ {"decode", no_argument, 0, 'd'},
+ {"wrap", required_argument, 0, 'w'},
+ {"ignore-garbage", no_argument, 0, 'i'},
+#if BASE_TYPE == 42
+ {"base64", no_argument, 0, BASE64_OPTION},
+ {"base64url", no_argument, 0, BASE64URL_OPTION},
+ {"base32", no_argument, 0, BASE32_OPTION},
+ {"base32hex", no_argument, 0, BASE32HEX_OPTION},
+ {"base16", no_argument, 0, BASE16_OPTION},
+ {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
+ {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
+ {"z85", no_argument, 0, Z85_OPTION},
+#endif
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {NULL, 0, NULL, 0}
+};
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTION]... [FILE]\n\
+"), program_name);
+
+#if BASE_TYPE == 42
+ fputs (_("\
+basenc encode or decode FILE, or standard input, to standard output.\n\
+"), stdout);
+#else
+ printf (_("\
+Base%d encode or decode FILE, or standard input, to standard output.\n\
+"), BASE_TYPE);
+#endif
+
+ emit_stdin_note ();
+ emit_mandatory_arg_note ();
+#if BASE_TYPE == 42
+ fputs (_("\
+ --base64 same as 'base64' program (RFC4648 section 4)\n\
+"), stdout);
+ fputs (_("\
+ --base64url file- and url-safe base64 (RFC4648 section 5)\n\
+"), stdout);
+ fputs (_("\
+ --base32 same as 'base32' program (RFC4648 section 6)\n\
+"), stdout);
+ fputs (_("\
+ --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\
+"), stdout);
+ fputs (_("\
+ --base16 hex encoding (RFC4648 section 8)\n\
+"), stdout);
+ fputs (_("\
+ --base2msbf bit string with most significant bit (msb) first\n\
+"), stdout);
+ fputs (_("\
+ --base2lsbf bit string with least significant bit (lsb) first\n\
+"), stdout);
+#endif
+ fputs (_("\
+ -d, --decode decode data\n\
+ -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\
+ -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\
+ Use 0 to disable line wrapping\n\
+"), stdout);
+#if BASE_TYPE == 42
+ fputs (_("\
+ --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
+ when encoding, input length must be a multiple of 4;\n\
+ when decoding, input length must be a multiple of 5\n\
+"), stdout);
+#endif
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+#if BASE_TYPE == 42
+ fputs (_("\
+\n\
+When decoding, the input may contain newlines in addition to the bytes of\n\
+the formal alphabet. Use --ignore-garbage to attempt to recover\n\
+from any other non-alphabet bytes in the encoded stream.\n\
+"), stdout);
+#else
+ printf (_("\
+\n\
+The data are encoded as described for the %s alphabet in RFC 4648.\n\
+When decoding, the input may contain newlines in addition to the bytes of\n\
+the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\
+from any other non-alphabet bytes in the encoded stream.\n"),
+ PROGRAM_NAME, PROGRAM_NAME);
+#endif
+ emit_ancillary_info (PROGRAM_NAME);
+ }
+
+ exit (status);
+}
+
+#define ENC_BLOCKSIZE (1024 * 3 * 10)
+
+#if BASE_TYPE == 32
+# define BASE_LENGTH BASE32_LENGTH
+/* Note that increasing this may decrease performance if --ignore-garbage
+ is used, because of the memmove operation below. */
+# define DEC_BLOCKSIZE (1024 * 5)
+
+/* Ensure that BLOCKSIZE is a multiple of 5 and 8. */
+verify (ENC_BLOCKSIZE % 40 == 0); /* So padding chars only on last block. */
+verify (DEC_BLOCKSIZE % 40 == 0); /* So complete encoded blocks are used. */
+
+# define base_encode base32_encode
+# define base_decode_context base32_decode_context
+# define base_decode_ctx_init base32_decode_ctx_init
+# define base_decode_ctx base32_decode_ctx
+# define isbase isbase32
+#elif BASE_TYPE == 64
+# define BASE_LENGTH BASE64_LENGTH
+/* Note that increasing this may decrease performance if --ignore-garbage
+ is used, because of the memmove operation below. */
+# define DEC_BLOCKSIZE (1024 * 3)
+
+/* Ensure that BLOCKSIZE is a multiple of 3 and 4. */
+verify (ENC_BLOCKSIZE % 12 == 0); /* So padding chars only on last block. */
+verify (DEC_BLOCKSIZE % 12 == 0); /* So complete encoded blocks are used. */
+
+# define base_encode base64_encode
+# define base_decode_context base64_decode_context
+# define base_decode_ctx_init base64_decode_ctx_init
+# define base_decode_ctx base64_decode_ctx
+# define isbase isbase64
+#elif BASE_TYPE == 42
+
+
+# define BASE_LENGTH base_length
+
+/* Note that increasing this may decrease performance if --ignore-garbage
+ is used, because of the memmove operation below. */
+# define DEC_BLOCKSIZE (4200)
+verify (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32 */
+verify (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64 */
+
+static int (*base_length) (int i);
+static bool (*isbase) (char ch);
+static void (*base_encode) (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen);
+
+struct base16_decode_context
+{
+ char nibble;
+ bool have_nibble;
+};
+
+struct z85_decode_context
+{
+ int i;
+ unsigned char octets[5];
+};
+
+struct base2_decode_context
+{
+ unsigned char octet;
+};
+
+struct base_decode_context
+{
+ int i; /* will be updated manually */
+ union {
+ struct base64_decode_context base64;
+ struct base32_decode_context base32;
+ struct base16_decode_context base16;
+ struct base2_decode_context base2;
+ struct z85_decode_context z85;
+ } ctx;
+ char *inbuf;
+ idx_t bufsize;
+};
+static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
+static bool (*base_decode_ctx) (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen);
+#endif
+
+
+
+
+#if BASE_TYPE == 42
+
+static int
+base64_length_wrapper (int len)
+{
+ return BASE64_LENGTH (len);
+}
+
+static void
+base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+ base64_decode_ctx_init (&ctx->ctx.base64);
+}
+
+static bool
+base64_decode_ctx_wrapper (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
+ ctx->i = ctx->ctx.base64.i;
+ return b;
+}
+
+static void
+init_inbuf (struct base_decode_context *ctx)
+{
+ ctx->bufsize = DEC_BLOCKSIZE;
+ ctx->inbuf = xcharalloc (ctx->bufsize);
+}
+
+static void
+prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
+{
+ if (ctx->bufsize < inlen)
+ {
+ ctx->bufsize = inlen * 2;
+ ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char));
+ }
+}
+
+
+static void
+base64url_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ base64_encode (in, inlen, out, outlen);
+ /* translate 62nd and 63rd characters */
+ char *p = out;
+ while (outlen--)
+ {
+ if (*p == '+')
+ *p = '-';
+ else if (*p == '/')
+ *p = '_';
+ ++p;
+ }
+}
+
+static bool
+isbase64url (char ch)
+{
+ return (ch == '-' || ch == '_'
+ || (ch != '+' && ch != '/' && isbase64 (ch)));
+}
+
+static void
+base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+ base64_decode_ctx_init (&ctx->ctx.base64);
+ init_inbuf (ctx);
+}
+
+
+static bool
+base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ prepare_inbuf (ctx, inlen);
+ memcpy (ctx->inbuf, in, inlen);
+
+ /* translate 62nd and 63rd characters */
+ idx_t i = inlen;
+ char *p = ctx->inbuf;
+ while (i--)
+ {
+ if (*p == '+' || *p == '/')
+ {
+ *outlen = 0;
+ return false; /* reject base64 input */
+ }
+ else if (*p == '-')
+ *p = '+';
+ else if (*p == '_')
+ *p = '/';
+ ++p;
+ }
+
+ bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
+ out, outlen);
+ ctx->i = ctx->ctx.base64.i;
+
+ return b;
+}
+
+
+
+static int
+base32_length_wrapper (int len)
+{
+ return BASE32_LENGTH (len);
+}
+
+static void
+base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+ base32_decode_ctx_init (&ctx->ctx.base32);
+}
+
+static bool
+base32_decode_ctx_wrapper (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
+ ctx->i = ctx->ctx.base32.i;
+ return b;
+}
+
+/* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
+ to
+ 0123456789ABCDEFGHIJKLMNOPQRSTUV */
+static const char base32_norm_to_hex[32 + 9] = {
+/*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
+ 'Q', 'R', 'S', 'T', 'U', 'V',
+
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+
+/*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
+ '0', '1', '2', '3', '4', '5', '6', '7',
+
+/*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+
+/*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
+ 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+
+/*0x59, 0x5a, */
+ 'O', 'P',
+};
+
+/* 0123456789ABCDEFGHIJKLMNOPQRSTUV
+ to
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
+static const char base32_hex_to_norm[32 + 9] = {
+ /* from: 0x30 .. 0x39 ('0' to '9') */
+ /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+
+ 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+
+ /* from: 0x41 .. 0x4A ('A' to 'J') */
+ /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+
+ /* from: 0x4B .. 0x54 ('K' to 'T') */
+ /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
+
+ /* from: 0x55 .. 0x56 ('U' to 'V') */
+ /* to:*/ '6', '7'
+};
+
+
+inline static bool
+isbase32hex (char ch)
+{
+ return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
+}
+
+
+static void
+base32hex_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ base32_encode (in, inlen, out, outlen);
+
+ for (char *p = out; outlen--; p++)
+ {
+ assert (0x32 <= *p && *p <= 0x5a); /* LCOV_EXCL_LINE */
+ *p = base32_norm_to_hex[*p - 0x32];
+ }
+}
+
+
+static void
+base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+ base32_decode_ctx_init (&ctx->ctx.base32);
+ init_inbuf (ctx);
+}
+
+
+static bool
+base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ prepare_inbuf (ctx, inlen);
+
+ idx_t i = inlen;
+ char *p = ctx->inbuf;
+ while (i--)
+ {
+ if (isbase32hex (*in))
+ *p = base32_hex_to_norm[ (int)*in - 0x30];
+ else
+ *p = *in;
+ ++p;
+ ++in;
+ }
+
+ bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
+ out, outlen);
+ ctx->i = ctx->ctx.base32.i;
+
+ return b;
+}
+
+
+static bool
+isbase16 (char ch)
+{
+ return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F');
+}
+
+static int
+base16_length (int len)
+{
+ return len * 2;
+}
+
+static const char base16[16] = "0123456789ABCDEF";
+
+static void
+base16_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ while (inlen--)
+ {
+ unsigned char c = *in;
+ *out++ = base16[c >> 4];
+ *out++ = base16[c & 0x0F];
+ ++in;
+ }
+}
+
+
+static void
+base16_decode_ctx_init (struct base_decode_context *ctx)
+{
+ init_inbuf (ctx);
+ ctx->ctx.base16.have_nibble = false;
+ ctx->i = 1;
+}
+
+
+static bool
+base16_decode_ctx (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool ignore_lines = true; /* for now, always ignore them */
+
+ *outlen = 0;
+
+ /* inlen==0 is request to flush output.
+ if there is a dangling high nibble - we are missing the low nibble,
+ so return false - indicating an invalid input. */
+ if (inlen == 0)
+ return !ctx->ctx.base16.have_nibble;
+
+ while (inlen--)
+ {
+ if (ignore_lines && *in == '\n')
+ {
+ ++in;
+ continue;
+ }
+
+ int nib = *in++;
+ if ('0' <= nib && nib <= '9')
+ nib -= '0';
+ else if ('A' <= nib && nib <= 'F')
+ nib -= 'A' - 10;
+ else
+ return false; /* garbage - return false */
+
+ if (ctx->ctx.base16.have_nibble)
+ {
+ /* have both nibbles, write octet */
+ *out++ = (ctx->ctx.base16.nibble << 4) + nib;
+ ++(*outlen);
+ }
+ else
+ {
+ /* Store higher nibble until next one arrives */
+ ctx->ctx.base16.nibble = nib;
+ }
+ ctx->ctx.base16.have_nibble = !ctx->ctx.base16.have_nibble;
+ }
+ return true;
+}
+
+
+
+
+static int
+z85_length (int len)
+{
+ /* Z85 does not allow padding, so no need to round to highest integer. */
+ int outlen = (len * 5) / 4;
+ return outlen;
+}
+
+static bool
+isz85 (char ch)
+{
+ return c_isalnum (ch) || (strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != NULL);
+}
+
+static char const z85_encoding[85] =
+ "0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ ".-:+=^!/*?&<>()[]{}@%$#";
+
+static void
+z85_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ int i = 0;
+ unsigned char quad[4];
+ idx_t outidx = 0;
+
+ while (true)
+ {
+ if (inlen == 0)
+ {
+ /* no more input, exactly on 4 octet boundary. */
+ if (i == 0)
+ return;
+
+ /* currently, there's no way to return an error in encoding. */
+ die (EXIT_FAILURE, 0,
+ _("invalid input (length must be multiple of 4 characters)"));
+ }
+ else
+ {
+ quad[i++] = *in++;
+ --inlen;
+ }
+
+ /* Got a quad, encode it */
+ if (i == 4)
+ {
+ int_fast64_t val = quad[0];
+ val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
+
+ for (int j = 4; j >= 0; --j)
+ {
+ int c = val % 85;
+ val /= 85;
+
+ /* NOTE: if there is padding (which is trimmed by z85
+ before outputting the result), the output buffer 'out'
+ might not include enough allocated bytes for the padding,
+ so don't store them. */
+ if (outidx + j < outlen)
+ out[j] = z85_encoding[c];
+ }
+ out += 5;
+ outidx += 5;
+ i = 0;
+ }
+ }
+}
+
+static void
+z85_decode_ctx_init (struct base_decode_context *ctx)
+{
+ init_inbuf (ctx);
+ ctx->ctx.z85.i = 0;
+ ctx->i = 1;
+}
+
+
+# define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
+ (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \
+ ((ctx)->ctx.z85.octets[2] * 85 * 85) + \
+ ((ctx)->ctx.z85.octets[3] * 85) + \
+ ((ctx)->ctx.z85.octets[4]))
+
+
+# define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
+ ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
+
+/*
+ 0 - 9: 0 1 2 3 4 5 6 7 8 9
+ 10 - 19: a b c d e f g h i j
+ 20 - 29: k l m n o p q r s t
+ 30 - 39: u v w x y z A B C D
+ 40 - 49: E F G H I J K L M N
+ 50 - 59: O P Q R S T U V W X
+ 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check
+ 70 - 79: * ? & < > ( ) [ ] {
+ 80 - 84: } @ % $ #
+*/
+static signed char const z85_decoding[93] = {
+ 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
+ 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
+ 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
+ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
+ 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
+ 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
+ 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
+ 79, -1, 80 /* { | } */
+};
+
+static bool
+z85_decode_ctx (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool ignore_lines = true; /* for now, always ignore them */
+
+ *outlen = 0;
+
+ /* inlen==0 is request to flush output.
+ if there are dangling values - we are missing entries,
+ so return false - indicating an invalid input. */
+ if (inlen == 0)
+ {
+ if (ctx->ctx.z85.i > 0)
+ {
+ /* Z85 variant does not allow padding - input must
+ be a multiple of 5 - so return error. */
+ return false;
+ }
+ return true;
+ }
+
+ while (inlen--)
+ {
+ if (ignore_lines && *in == '\n')
+ {
+ ++in;
+ continue;
+ }
+
+ /* z85 decoding */
+ unsigned char c = *in;
+
+ if (c >= 33 && c <= 125)
+ {
+ signed char ch = z85_decoding[c - 33];
+ if (ch < 0)
+ return false; /* garbage - return false */
+ c = ch;
+ }
+ else
+ return false; /* garbage - return false */
+
+ ++in;
+
+ ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
+ if (ctx->ctx.z85.i == 5)
+ {
+ /* decode the lowest 4 octets, then check for overflows. */
+ int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
+
+ /* The Z85 spec and the reference implementation say nothing
+ about overflows. To be on the safe side, reject them. */
+
+ val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
+ if ((val >> 24) & ~0xFF)
+ return false;
+
+ *out++ = val >> 24;
+ *out++ = (val >> 16) & 0xFF;
+ *out++ = (val >> 8) & 0xFF;
+ *out++ = val & 0xFF;
+
+ *outlen += 4;
+
+ ctx->ctx.z85.i = 0;
+ }
+ }
+ ctx->i = ctx->ctx.z85.i;
+ return true;
+}
+
+
+inline static bool
+isbase2 (char ch)
+{
+ return ch == '0' || ch == '1';
+}
+
+static int
+base2_length (int len)
+{
+ return len * 8;
+}
+
+
+inline static void
+base2msbf_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ while (inlen--)
+ {
+ unsigned char c = *in;
+ for (int i = 0; i < 8; i++)
+ {
+ *out++ = c & 0x80 ? '1' : '0';
+ c <<= 1;
+ }
+ outlen -= 8;
+ ++in;
+ }
+}
+
+inline static void
+base2lsbf_encode (char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t outlen)
+{
+ while (inlen--)
+ {
+ unsigned char c = *in;
+ for (int i = 0; i < 8; i++)
+ {
+ *out++ = c & 0x01 ? '1' : '0';
+ c >>= 1;
+ }
+ outlen -= 8;
+ ++in;
+ }
+}
+
+
+static void
+base2_decode_ctx_init (struct base_decode_context *ctx)
+{
+ init_inbuf (ctx);
+ ctx->ctx.base2.octet = 0;
+ ctx->i = 0;
+}
+
+
+static bool
+base2lsbf_decode_ctx (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool ignore_lines = true; /* for now, always ignore them */
+
+ *outlen = 0;
+
+ /* inlen==0 is request to flush output.
+ if there is a dangling bit - we are missing some bits,
+ so return false - indicating an invalid input. */
+ if (inlen == 0)
+ return ctx->i == 0;
+
+ while (inlen--)
+ {
+ if (ignore_lines && *in == '\n')
+ {
+ ++in;
+ continue;
+ }
+
+ if (!isbase2 (*in))
+ return false;
+
+ bool bit = (*in == '1');
+ ctx->ctx.base2.octet |= bit << ctx->i;
+ ++ctx->i;
+
+ if (ctx->i == 8)
+ {
+ *out++ = ctx->ctx.base2.octet;
+ ctx->ctx.base2.octet = 0;
+ ++*outlen;
+ ctx->i = 0;
+ }
+
+ ++in;
+ }
+
+ return true;
+}
+
+static bool
+base2msbf_decode_ctx (struct base_decode_context *ctx,
+ char const *restrict in, idx_t inlen,
+ char *restrict out, idx_t *outlen)
+{
+ bool ignore_lines = true; /* for now, always ignore them */
+
+ *outlen = 0;
+
+ /* inlen==0 is request to flush output.
+ if there is a dangling bit - we are missing some bits,
+ so return false - indicating an invalid input. */
+ if (inlen == 0)
+ return ctx->i == 0;
+
+ while (inlen--)
+ {
+ if (ignore_lines && *in == '\n')
+ {
+ ++in;
+ continue;
+ }
+
+ if (!isbase2 (*in))
+ return false;
+
+ bool bit = (*in == '1');
+ if (ctx->i == 0)
+ ctx->i = 8;
+ --ctx->i;
+ ctx->ctx.base2.octet |= bit << ctx->i;
+
+ if (ctx->i == 0)
+ {
+ *out++ = ctx->ctx.base2.octet;
+ ctx->ctx.base2.octet = 0;
+ ++*outlen;
+ ctx->i = 0;
+ }
+
+ ++in;
+ }
+
+ return true;
+}
+
+#endif /* BASE_TYPE == 42, i.e., "basenc"*/
+
+
+
+static void
+wrap_write (char const *buffer, idx_t len,
+ idx_t wrap_column, idx_t *current_column, FILE *out)
+{
+ if (wrap_column == 0)
+ {
+ /* Simple write. */
+ if (fwrite (buffer, 1, len, stdout) < len)
+ die (EXIT_FAILURE, errno, _("write error"));
+ }
+ else
+ for (idx_t written = 0; written < len; )
+ {
+ idx_t to_write = MIN (wrap_column - *current_column, len - written);
+
+ if (to_write == 0)
+ {
+ if (fputc ('\n', out) == EOF)
+ die (EXIT_FAILURE, errno, _("write error"));
+ *current_column = 0;
+ }
+ else
+ {
+ if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
+ die (EXIT_FAILURE, errno, _("write error"));
+ *current_column += to_write;
+ written += to_write;
+ }
+ }
+}
+
+static _Noreturn void
+finish_and_exit (FILE *in, char const *infile)
+{
+ if (fclose (in) != 0)
+ {
+ if (STREQ (infile, "-"))
+ die (EXIT_FAILURE, errno, _("closing standard input"));
+ else
+ die (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+
+ exit (EXIT_SUCCESS);
+}
+
+static _Noreturn void
+do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
+{
+ idx_t current_column = 0;
+ char *inbuf, *outbuf;
+ idx_t sum;
+
+ inbuf = xmalloc (ENC_BLOCKSIZE);
+ outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
+
+ do
+ {
+ idx_t n;
+
+ sum = 0;
+ do
+ {
+ n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
+ sum += n;
+ }
+ while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
+
+ if (sum > 0)
+ {
+ /* Process input one block at a time. Note that ENC_BLOCKSIZE
+ is sized so that no pad chars will appear in output. */
+ base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
+
+ wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
+ &current_column, out);
+ }
+ }
+ while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
+
+ /* When wrapping, terminate last line. */
+ if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
+ die (EXIT_FAILURE, errno, _("write error"));
+
+ if (ferror (in))
+ die (EXIT_FAILURE, errno, _("read error"));
+
+ finish_and_exit (in, infile);
+}
+
+static _Noreturn void
+do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
+{
+ char *inbuf, *outbuf;
+ idx_t sum;
+ struct base_decode_context ctx;
+
+ inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
+ outbuf = xmalloc (DEC_BLOCKSIZE);
+
+#if BASE_TYPE == 42
+ ctx.inbuf = NULL;
+#endif
+ base_decode_ctx_init (&ctx);
+
+ do
+ {
+ bool ok;
+
+ sum = 0;
+ do
+ {
+ idx_t n = fread (inbuf + sum,
+ 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
+
+ if (ignore_garbage)
+ {
+ for (idx_t i = 0; n > 0 && i < n;)
+ {
+ if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
+ i++;
+ else
+ memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
+ }
+ }
+
+ sum += n;
+
+ if (ferror (in))
+ die (EXIT_FAILURE, errno, _("read error"));
+ }
+ while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
+
+ /* The following "loop" is usually iterated just once.
+ However, when it processes the final input buffer, we want
+ to iterate it one additional time, but with an indicator
+ telling it to flush what is in CTX. */
+ for (int k = 0; k < 1 + !!feof (in); k++)
+ {
+ if (k == 1 && ctx.i == 0)
+ break;
+ idx_t n = DEC_BLOCKSIZE;
+ ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
+
+ if (fwrite (outbuf, 1, n, out) < n)
+ die (EXIT_FAILURE, errno, _("write error"));
+
+ if (!ok)
+ die (EXIT_FAILURE, 0, _("invalid input"));
+ }
+ }
+ while (!feof (in));
+
+ finish_and_exit (in, infile);
+}
+
+int
+main (int argc, char **argv)
+{
+ int opt;
+ FILE *input_fh;
+ char const *infile;
+
+ /* True if --decode has been given and we should decode data. */
+ bool decode = false;
+ /* True if we should ignore non-base-alphabetic characters. */
+ bool ignore_garbage = false;
+ /* Wrap encoded data around the 76th column, by default. */
+ idx_t wrap_column = 76;
+
+#if BASE_TYPE == 42
+ int base_type = 0;
+#endif
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ atexit (close_stdout);
+
+ while ((opt = getopt_long (argc, argv, "diw:", long_options, NULL)) != -1)
+ switch (opt)
+ {
+ case 'd':
+ decode = true;
+ break;
+
+ case 'w':
+ {
+ intmax_t w;
+ strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
+ if (LONGINT_OVERFLOW < s_err || w < 0)
+ die (EXIT_FAILURE, 0, "%s: %s",
+ _("invalid wrap size"), quote (optarg));
+ wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
+ }
+ break;
+
+ case 'i':
+ ignore_garbage = true;
+ break;
+
+#if BASE_TYPE == 42
+ case BASE64_OPTION:
+ case BASE64URL_OPTION:
+ case BASE32_OPTION:
+ case BASE32HEX_OPTION:
+ case BASE16_OPTION:
+ case BASE2MSBF_OPTION:
+ case BASE2LSBF_OPTION:
+ case Z85_OPTION:
+ base_type = opt;
+ break;
+#endif
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ break;
+ }
+
+#if BASE_TYPE == 42
+ switch (base_type)
+ {
+ case BASE64_OPTION:
+ base_length = base64_length_wrapper;
+ isbase = isbase64;
+ base_encode = base64_encode;
+ base_decode_ctx_init = base64_decode_ctx_init_wrapper;
+ base_decode_ctx = base64_decode_ctx_wrapper;
+ break;
+
+ case BASE64URL_OPTION:
+ base_length = base64_length_wrapper;
+ isbase = isbase64url;
+ base_encode = base64url_encode;
+ base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
+ base_decode_ctx = base64url_decode_ctx_wrapper;
+ break;
+
+ case BASE32_OPTION:
+ base_length = base32_length_wrapper;
+ isbase = isbase32;
+ base_encode = base32_encode;
+ base_decode_ctx_init = base32_decode_ctx_init_wrapper;
+ base_decode_ctx = base32_decode_ctx_wrapper;
+ break;
+
+ case BASE32HEX_OPTION:
+ base_length = base32_length_wrapper;
+ isbase = isbase32hex;
+ base_encode = base32hex_encode;
+ base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
+ base_decode_ctx = base32hex_decode_ctx_wrapper;
+ break;
+
+ case BASE16_OPTION:
+ base_length = base16_length;
+ isbase = isbase16;
+ base_encode = base16_encode;
+ base_decode_ctx_init = base16_decode_ctx_init;
+ base_decode_ctx = base16_decode_ctx;
+ break;
+
+ case BASE2MSBF_OPTION:
+ base_length = base2_length;
+ isbase = isbase2;
+ base_encode = base2msbf_encode;
+ base_decode_ctx_init = base2_decode_ctx_init;
+ base_decode_ctx = base2msbf_decode_ctx;
+ break;
+
+ case BASE2LSBF_OPTION:
+ base_length = base2_length;
+ isbase = isbase2;
+ base_encode = base2lsbf_encode;
+ base_decode_ctx_init = base2_decode_ctx_init;
+ base_decode_ctx = base2lsbf_decode_ctx;
+ break;
+
+ case Z85_OPTION:
+ base_length = z85_length;
+ isbase = isz85;
+ base_encode = z85_encode;
+ base_decode_ctx_init = z85_decode_ctx_init;
+ base_decode_ctx = z85_decode_ctx;
+ break;
+
+ default:
+ error (0, 0, _("missing encoding type"));
+ usage (EXIT_FAILURE);
+ }
+#endif
+
+ if (argc - optind > 1)
+ {
+ error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
+ usage (EXIT_FAILURE);
+ }
+
+ if (optind < argc)
+ infile = argv[optind];
+ else
+ infile = "-";
+
+ if (STREQ (infile, "-"))
+ {
+ xset_binary_mode (STDIN_FILENO, O_BINARY);
+ input_fh = stdin;
+ }
+ else
+ {
+ input_fh = fopen (infile, "rb");
+ if (input_fh == NULL)
+ die (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+
+ fadvise (input_fh, FADVISE_SEQUENTIAL);
+
+ if (decode)
+ do_decode (input_fh, infile, stdout, ignore_garbage);
+ else
+ do_encode (input_fh, infile, stdout, wrap_column);
+}