Adding upstream version 9.1.upstream/9.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 16:11:47 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 16:11:47 +0000
commit: 758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree: 48111ece75cf4f98316848b37a7e26356e00669e /src/basenc.c
parent: Initial commit. (diff)
download: coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz
coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip
1 files changed, 1248 insertions, 0 deletions
diff --git a/src/basenc.c b/src/basenc.c
new file mode 100644
index 0000000..04857d5
--- /dev/null
+++ b/src/basenc.c
@@ -0,0 +1,1248 @@
+/* Base64, base32, and similar encoding/decoding strings or files.
+   Copyright (C) 2004-2022 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Simon Josefsson <simon@josefsson.org>.  */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+
+#include "system.h"
+#include "c-ctype.h"
+#include "die.h"
+#include "error.h"
+#include "fadvise.h"
+#include "idx.h"
+#include "quote.h"
+#include "xstrtol.h"
+#include "xdectoint.h"
+#include "xbinary-io.h"
+
+#if BASE_TYPE == 42
+# define AUTHORS \
+  proper_name ("Simon Josefsson"), \
+  proper_name ("Assaf Gordon")
+#else
+# define AUTHORS proper_name ("Simon Josefsson")
+#endif
+
+#if BASE_TYPE == 32
+# include "base32.h"
+# define PROGRAM_NAME "base32"
+#elif BASE_TYPE == 64
+# include "base64.h"
+# define PROGRAM_NAME "base64"
+#elif BASE_TYPE == 42
+# include "base32.h"
+# include "base64.h"
+# include <assert.h>
+# define PROGRAM_NAME "basenc"
+#else
+# error missing/invalid BASE_TYPE definition
+#endif
+
+
+
+#if BASE_TYPE == 42
+enum
+{
+  BASE64_OPTION = CHAR_MAX + 1,
+  BASE64URL_OPTION,
+  BASE32_OPTION,
+  BASE32HEX_OPTION,
+  BASE16_OPTION,
+  BASE2MSBF_OPTION,
+  BASE2LSBF_OPTION,
+  Z85_OPTION
+};
+#endif
+
+static struct option const long_options[] =
+{
+  {"decode", no_argument, 0, 'd'},
+  {"wrap", required_argument, 0, 'w'},
+  {"ignore-garbage", no_argument, 0, 'i'},
+#if BASE_TYPE == 42
+  {"base64",    no_argument, 0, BASE64_OPTION},
+  {"base64url", no_argument, 0, BASE64URL_OPTION},
+  {"base32",    no_argument, 0, BASE32_OPTION},
+  {"base32hex", no_argument, 0, BASE32HEX_OPTION},
+  {"base16",    no_argument, 0, BASE16_OPTION},
+  {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
+  {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
+  {"z85",       no_argument, 0, Z85_OPTION},
+#endif
+  {GETOPT_HELP_OPTION_DECL},
+  {GETOPT_VERSION_OPTION_DECL},
+  {NULL, 0, NULL, 0}
+};
+
+void
+usage (int status)
+{
+  if (status != EXIT_SUCCESS)
+    emit_try_help ();
+  else
+    {
+      printf (_("\
+Usage: %s [OPTION]... [FILE]\n\
+"), program_name);
+
+#if BASE_TYPE == 42
+      fputs (_("\
+basenc encode or decode FILE, or standard input, to standard output.\n\
+"), stdout);
+#else
+      printf (_("\
+Base%d encode or decode FILE, or standard input, to standard output.\n\
+"), BASE_TYPE);
+#endif
+
+      emit_stdin_note ();
+      emit_mandatory_arg_note ();
+#if BASE_TYPE == 42
+      fputs (_("\
+      --base64          same as 'base64' program (RFC4648 section 4)\n\
+"), stdout);
+      fputs (_("\
+      --base64url       file- and url-safe base64 (RFC4648 section 5)\n\
+"), stdout);
+      fputs (_("\
+      --base32          same as 'base32' program (RFC4648 section 6)\n\
+"), stdout);
+      fputs (_("\
+      --base32hex       extended hex alphabet base32 (RFC4648 section 7)\n\
+"), stdout);
+      fputs (_("\
+      --base16          hex encoding (RFC4648 section 8)\n\
+"), stdout);
+      fputs (_("\
+      --base2msbf       bit string with most significant bit (msb) first\n\
+"), stdout);
+      fputs (_("\
+      --base2lsbf       bit string with least significant bit (lsb) first\n\
+"), stdout);
+#endif
+      fputs (_("\
+  -d, --decode          decode data\n\
+  -i, --ignore-garbage  when decoding, ignore non-alphabet characters\n\
+  -w, --wrap=COLS       wrap encoded lines after COLS character (default 76).\n\
+                          Use 0 to disable line wrapping\n\
+"), stdout);
+#if BASE_TYPE == 42
+      fputs (_("\
+      --z85             ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
+                        when encoding, input length must be a multiple of 4;\n\
+                        when decoding, input length must be a multiple of 5\n\
+"), stdout);
+#endif
+      fputs (HELP_OPTION_DESCRIPTION, stdout);
+      fputs (VERSION_OPTION_DESCRIPTION, stdout);
+#if BASE_TYPE == 42
+      fputs (_("\
+\n\
+When decoding, the input may contain newlines in addition to the bytes of\n\
+the formal alphabet.  Use --ignore-garbage to attempt to recover\n\
+from any other non-alphabet bytes in the encoded stream.\n\
+"), stdout);
+#else
+      printf (_("\
+\n\
+The data are encoded as described for the %s alphabet in RFC 4648.\n\
+When decoding, the input may contain newlines in addition to the bytes of\n\
+the formal %s alphabet.  Use --ignore-garbage to attempt to recover\n\
+from any other non-alphabet bytes in the encoded stream.\n"),
+              PROGRAM_NAME, PROGRAM_NAME);
+#endif
+      emit_ancillary_info (PROGRAM_NAME);
+    }
+
+  exit (status);
+}
+
+#define ENC_BLOCKSIZE (1024 * 3 * 10)
+
+#if BASE_TYPE == 32
+# define BASE_LENGTH BASE32_LENGTH
+/* Note that increasing this may decrease performance if --ignore-garbage
+   is used, because of the memmove operation below.  */
+# define DEC_BLOCKSIZE (1024 * 5)
+
+/* Ensure that BLOCKSIZE is a multiple of 5 and 8.  */
+verify (ENC_BLOCKSIZE % 40 == 0);  /* So padding chars only on last block.  */
+verify (DEC_BLOCKSIZE % 40 == 0);  /* So complete encoded blocks are used.  */
+
+# define base_encode base32_encode
+# define base_decode_context base32_decode_context
+# define base_decode_ctx_init base32_decode_ctx_init
+# define base_decode_ctx base32_decode_ctx
+# define isbase isbase32
+#elif BASE_TYPE == 64
+# define BASE_LENGTH BASE64_LENGTH
+/* Note that increasing this may decrease performance if --ignore-garbage
+   is used, because of the memmove operation below.  */
+# define DEC_BLOCKSIZE (1024 * 3)
+
+/* Ensure that BLOCKSIZE is a multiple of 3 and 4.  */
+verify (ENC_BLOCKSIZE % 12 == 0);  /* So padding chars only on last block.  */
+verify (DEC_BLOCKSIZE % 12 == 0);  /* So complete encoded blocks are used.  */
+
+# define base_encode base64_encode
+# define base_decode_context base64_decode_context
+# define base_decode_ctx_init base64_decode_ctx_init
+# define base_decode_ctx base64_decode_ctx
+# define isbase isbase64
+#elif BASE_TYPE == 42
+
+
+# define BASE_LENGTH base_length
+
+/* Note that increasing this may decrease performance if --ignore-garbage
+   is used, because of the memmove operation below.  */
+# define DEC_BLOCKSIZE (4200)
+verify (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32 */
+verify (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64 */
+
+static int (*base_length) (int i);
+static bool (*isbase) (char ch);
+static void (*base_encode) (char const *restrict in, idx_t inlen,
+                            char *restrict out, idx_t outlen);
+
+struct base16_decode_context
+{
+  char nibble;
+  bool have_nibble;
+};
+
+struct z85_decode_context
+{
+  int i;
+  unsigned char octets[5];
+};
+
+struct base2_decode_context
+{
+  unsigned char octet;
+};
+
+struct base_decode_context
+{
+  int i; /* will be updated manually */
+  union {
+    struct base64_decode_context base64;
+    struct base32_decode_context base32;
+    struct base16_decode_context base16;
+    struct base2_decode_context base2;
+    struct z85_decode_context z85;
+  } ctx;
+  char *inbuf;
+  idx_t bufsize;
+};
+static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
+static bool (*base_decode_ctx) (struct base_decode_context *ctx,
+                                char const *restrict in, idx_t inlen,
+                                char *restrict out, idx_t *outlen);
+#endif
+
+
+
+
+#if BASE_TYPE == 42
+
+static int
+base64_length_wrapper (int len)
+{
+  return BASE64_LENGTH (len);
+}
+
+static void
+base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+  base64_decode_ctx_init (&ctx->ctx.base64);
+}
+
+static bool
+base64_decode_ctx_wrapper (struct base_decode_context *ctx,
+                           char const *restrict in, idx_t inlen,
+                           char *restrict out, idx_t *outlen)
+{
+  bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
+  ctx->i = ctx->ctx.base64.i;
+  return b;
+}
+
+static void
+init_inbuf (struct base_decode_context *ctx)
+{
+  ctx->bufsize = DEC_BLOCKSIZE;
+  ctx->inbuf = xcharalloc (ctx->bufsize);
+}
+
+static void
+prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
+{
+  if (ctx->bufsize < inlen)
+    {
+      ctx->bufsize = inlen * 2;
+      ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char));
+    }
+}
+
+
+static void
+base64url_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
+{
+  base64_encode (in, inlen, out, outlen);
+  /* translate 62nd and 63rd characters */
+  char *p = out;
+  while (outlen--)
+    {
+      if (*p == '+')
+        *p = '-';
+      else if (*p == '/')
+        *p = '_';
+      ++p;
+    }
+}
+
+static bool
+isbase64url (char ch)
+{
+  return (ch == '-' || ch == '_'
+          || (ch != '+' && ch != '/' && isbase64 (ch)));
+}
+
+static void
+base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+  base64_decode_ctx_init (&ctx->ctx.base64);
+  init_inbuf (ctx);
+}
+
+
+static bool
+base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
+                              char const *restrict in, idx_t inlen,
+                              char *restrict out, idx_t *outlen)
+{
+  prepare_inbuf (ctx, inlen);
+  memcpy (ctx->inbuf, in, inlen);
+
+  /* translate 62nd and 63rd characters */
+  idx_t i = inlen;
+  char *p = ctx->inbuf;
+  while (i--)
+    {
+      if (*p == '+' || *p == '/')
+        {
+          *outlen = 0;
+          return false; /* reject base64 input */
+        }
+      else if (*p == '-')
+        *p = '+';
+      else if (*p == '_')
+        *p = '/';
+      ++p;
+    }
+
+  bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
+                              out, outlen);
+  ctx->i = ctx->ctx.base64.i;
+
+  return b;
+}
+
+
+
+static int
+base32_length_wrapper (int len)
+{
+  return BASE32_LENGTH (len);
+}
+
+static void
+base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+  base32_decode_ctx_init (&ctx->ctx.base32);
+}
+
+static bool
+base32_decode_ctx_wrapper (struct base_decode_context *ctx,
+                           char const *restrict in, idx_t inlen,
+                           char *restrict out, idx_t *outlen)
+{
+  bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
+  ctx->i = ctx->ctx.base32.i;
+  return b;
+}
+
+/* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
+     to
+   0123456789ABCDEFGHIJKLMNOPQRSTUV */
+static const char base32_norm_to_hex[32 + 9] = {
+/*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
+  'Q',  'R',  'S',  'T',  'U',  'V',
+
+  0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+
+/*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
+  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
+
+/*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
+  '8',  '9',  'A',  'B',  'C',  'D',  'E',  'F',
+
+/*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
+  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',
+
+/*0x59, 0x5a, */
+  'O',  'P',
+};
+
+/* 0123456789ABCDEFGHIJKLMNOPQRSTUV
+     to
+   ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
+static const char base32_hex_to_norm[32 + 9] = {
+  /* from: 0x30 .. 0x39 ('0' to '9') */
+  /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+
+  0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+
+  /* from: 0x41 .. 0x4A ('A' to 'J') */
+  /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+
+  /* from: 0x4B .. 0x54 ('K' to 'T') */
+  /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
+
+  /* from: 0x55 .. 0x56 ('U' to 'V') */
+  /* to:*/ '6', '7'
+};
+
+
+inline static bool
+isbase32hex (char ch)
+{
+  return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
+}
+
+
+static void
+base32hex_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
+{
+  base32_encode (in, inlen, out, outlen);
+
+  for (char *p = out; outlen--; p++)
+    {
+      assert (0x32 <= *p && *p <= 0x5a);          /* LCOV_EXCL_LINE */
+      *p = base32_norm_to_hex[*p - 0x32];
+    }
+}
+
+
+static void
+base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
+{
+  base32_decode_ctx_init (&ctx->ctx.base32);
+  init_inbuf (ctx);
+}
+
+
+static bool
+base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
+                              char const *restrict in, idx_t inlen,
+                              char *restrict out, idx_t *outlen)
+{
+  prepare_inbuf (ctx, inlen);
+
+  idx_t i = inlen;
+  char *p = ctx->inbuf;
+  while (i--)
+    {
+      if (isbase32hex (*in))
+        *p = base32_hex_to_norm[ (int)*in - 0x30];
+      else
+        *p = *in;
+      ++p;
+      ++in;
+    }
+
+  bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
+                              out, outlen);
+  ctx->i = ctx->ctx.base32.i;
+
+  return b;
+}
+
+
+static bool
+isbase16 (char ch)
+{
+  return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F');
+}
+
+static int
+base16_length (int len)
+{
+  return len * 2;
+}
+
+static const char base16[16] = "0123456789ABCDEF";
+
+static void
+base16_encode (char const *restrict in, idx_t inlen,
+               char *restrict out, idx_t outlen)
+{
+  while (inlen--)
+    {
+      unsigned char c = *in;
+      *out++ = base16[c >> 4];
+      *out++ = base16[c & 0x0F];
+      ++in;
+    }
+}
+
+
+static void
+base16_decode_ctx_init (struct base_decode_context *ctx)
+{
+  init_inbuf (ctx);
+  ctx->ctx.base16.have_nibble = false;
+  ctx->i = 1;
+}
+
+
+static bool
+base16_decode_ctx (struct base_decode_context *ctx,
+                   char const *restrict in, idx_t inlen,
+                   char *restrict out, idx_t *outlen)
+{
+  bool ignore_lines = true;  /* for now, always ignore them */
+
+  *outlen = 0;
+
+  /* inlen==0 is request to flush output.
+     if there is a dangling high nibble - we are missing the low nibble,
+     so return false - indicating an invalid input.  */
+  if (inlen == 0)
+    return !ctx->ctx.base16.have_nibble;
+
+  while (inlen--)
+    {
+      if (ignore_lines && *in == '\n')
+        {
+          ++in;
+          continue;
+        }
+
+      int nib = *in++;
+      if ('0' <= nib && nib <= '9')
+        nib -= '0';
+      else if ('A' <= nib && nib <= 'F')
+        nib -= 'A' - 10;
+      else
+        return false; /* garbage - return false */
+
+      if (ctx->ctx.base16.have_nibble)
+        {
+          /* have both nibbles, write octet */
+          *out++ = (ctx->ctx.base16.nibble << 4) + nib;
+          ++(*outlen);
+        }
+      else
+        {
+          /* Store higher nibble until next one arrives */
+          ctx->ctx.base16.nibble = nib;
+        }
+      ctx->ctx.base16.have_nibble = !ctx->ctx.base16.have_nibble;
+    }
+  return true;
+}
+
+
+
+
+static int
+z85_length (int len)
+{
+  /* Z85 does not allow padding, so no need to round to highest integer.  */
+  int outlen = (len * 5) / 4;
+  return outlen;
+}
+
+static bool
+isz85 (char ch)
+{
+  return c_isalnum (ch) || (strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != NULL);
+}
+
+static char const z85_encoding[85] =
+  "0123456789"
+  "abcdefghijklmnopqrstuvwxyz"
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+  ".-:+=^!/*?&<>()[]{}@%$#";
+
+static void
+z85_encode (char const *restrict in, idx_t inlen,
+            char *restrict out, idx_t outlen)
+{
+  int i = 0;
+  unsigned char quad[4];
+  idx_t outidx = 0;
+
+  while (true)
+    {
+      if (inlen == 0)
+        {
+          /* no more input, exactly on 4 octet boundary. */
+          if (i == 0)
+            return;
+
+          /* currently, there's no way to return an error in encoding.  */
+          die (EXIT_FAILURE, 0,
+               _("invalid input (length must be multiple of 4 characters)"));
+        }
+      else
+        {
+          quad[i++] = *in++;
+          --inlen;
+        }
+
+      /* Got a quad, encode it */
+      if (i == 4)
+        {
+          int_fast64_t val = quad[0];
+          val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
+
+          for (int j = 4; j >= 0; --j)
+            {
+              int c = val % 85;
+              val /= 85;
+
+              /* NOTE: if there is padding (which is trimmed by z85
+                 before outputting the result), the output buffer 'out'
+                 might not include enough allocated bytes for the padding,
+                 so don't store them. */
+              if (outidx + j < outlen)
+                out[j] = z85_encoding[c];
+            }
+          out += 5;
+          outidx += 5;
+          i = 0;
+        }
+    }
+}
+
+static void
+z85_decode_ctx_init (struct base_decode_context *ctx)
+{
+  init_inbuf (ctx);
+  ctx->ctx.z85.i = 0;
+  ctx->i = 1;
+}
+
+
+# define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
+  (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) +      \
+   ((ctx)->ctx.z85.octets[2] * 85 * 85) +	    \
+   ((ctx)->ctx.z85.octets[3] * 85) +		    \
+   ((ctx)->ctx.z85.octets[4]))
+
+
+# define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
+  ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
+
+/*
+ 0 -  9:  0 1 2 3 4 5 6 7 8 9
+ 10 - 19:  a b c d e f g h i j
+ 20 - 29:  k l m n o p q r s t
+ 30 - 39:  u v w x y z A B C D
+ 40 - 49:  E F G H I J K L M N
+ 50 - 59:  O P Q R S T U V W X
+ 60 - 69:  Y Z . - : + = ^ ! /   #dummy comment to workaround syntax-check
+ 70 - 79:  * ? & < > ( ) [ ] {
+ 80 - 84:  } @ % $ #
+*/
+static signed char const z85_decoding[93] = {
+  68, -1,  84,  83, 82,  72, -1,               /* ! " # $ % & ' */
+  75, 76,  70,  65, -1,  63, 62, 69,           /* ( ) * + , - . / */
+  0,  1,   2,   3,  4,   5,  6,   7,  8,  9,   /* '0' to '9' */
+  64, -1,  73,  66, 74,  71, 81,               /* : ; < =  > ? @ */
+  36, 37,  38,  39, 40,  41, 42,  43, 44, 45,  /* 'A' to 'J' */
+  46, 47,  48,  49, 50,  51, 52,  53, 54, 55,  /* 'K' to 'T' */
+  56, 57,  58,  59, 60,  61,                   /* 'U' to 'Z' */
+  77,  -1, 78,  67,  -1,  -1,                  /* [ \ ] ^ _ ` */
+  10, 11,  12,  13, 14,  15, 16,  17, 18, 19,  /* 'a' to 'j' */
+  20, 21,  22,  23, 24,  25, 26,  27, 28, 29,  /* 'k' to 't' */
+  30, 31,  32,  33, 34,  35,                   /* 'u' to 'z' */
+  79, -1,  80                                  /* { | } */
+};
+
+static bool
+z85_decode_ctx (struct base_decode_context *ctx,
+                char const *restrict in, idx_t inlen,
+                char *restrict out, idx_t *outlen)
+{
+  bool ignore_lines = true;  /* for now, always ignore them */
+
+  *outlen = 0;
+
+  /* inlen==0 is request to flush output.
+     if there are dangling values - we are missing entries,
+     so return false - indicating an invalid input.  */
+  if (inlen == 0)
+    {
+      if (ctx->ctx.z85.i > 0)
+        {
+          /* Z85 variant does not allow padding - input must
+             be a multiple of 5 - so return error.  */
+          return false;
+        }
+      return true;
+    }
+
+  while (inlen--)
+    {
+      if (ignore_lines && *in == '\n')
+        {
+          ++in;
+          continue;
+        }
+
+      /* z85 decoding */
+      unsigned char c = *in;
+
+      if (c >= 33 && c <= 125)
+        {
+          signed char ch = z85_decoding[c - 33];
+          if (ch < 0)
+            return false; /* garbage - return false */
+          c = ch;
+        }
+      else
+        return false; /* garbage - return false */
+
+      ++in;
+
+      ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
+      if (ctx->ctx.z85.i == 5)
+        {
+          /* decode the lowest 4 octets, then check for overflows.  */
+          int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
+
+          /* The Z85 spec and the reference implementation say nothing
+             about overflows. To be on the safe side, reject them.  */
+
+          val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
+          if ((val >> 24) & ~0xFF)
+            return false;
+
+          *out++ = val >> 24;
+          *out++ = (val >> 16) & 0xFF;
+          *out++ = (val >> 8) & 0xFF;
+          *out++ = val & 0xFF;
+
+          *outlen += 4;
+
+          ctx->ctx.z85.i = 0;
+        }
+    }
+  ctx->i = ctx->ctx.z85.i;
+  return true;
+}
+
+
+inline static bool
+isbase2 (char ch)
+{
+  return ch == '0' || ch == '1';
+}
+
+static int
+base2_length (int len)
+{
+  return len * 8;
+}
+
+
+inline static void
+base2msbf_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
+{
+  while (inlen--)
+    {
+      unsigned char c = *in;
+      for (int i = 0; i < 8; i++)
+        {
+          *out++ = c & 0x80 ? '1' : '0';
+          c <<= 1;
+        }
+      outlen -= 8;
+      ++in;
+    }
+}
+
+inline static void
+base2lsbf_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
+{
+  while (inlen--)
+    {
+      unsigned char c = *in;
+      for (int i = 0; i < 8; i++)
+        {
+          *out++ = c & 0x01 ? '1' : '0';
+          c >>= 1;
+        }
+      outlen -= 8;
+      ++in;
+    }
+}
+
+
+static void
+base2_decode_ctx_init (struct base_decode_context *ctx)
+{
+  init_inbuf (ctx);
+  ctx->ctx.base2.octet = 0;
+  ctx->i = 0;
+}
+
+
+static bool
+base2lsbf_decode_ctx (struct base_decode_context *ctx,
+                      char const *restrict in, idx_t inlen,
+                      char *restrict out, idx_t *outlen)
+{
+  bool ignore_lines = true;  /* for now, always ignore them */
+
+  *outlen = 0;
+
+  /* inlen==0 is request to flush output.
+     if there is a dangling bit - we are missing some bits,
+     so return false - indicating an invalid input.  */
+  if (inlen == 0)
+    return ctx->i == 0;
+
+  while (inlen--)
+    {
+      if (ignore_lines && *in == '\n')
+        {
+          ++in;
+          continue;
+        }
+
+      if (!isbase2 (*in))
+        return false;
+
+      bool bit = (*in == '1');
+      ctx->ctx.base2.octet |= bit << ctx->i;
+      ++ctx->i;
+
+      if (ctx->i == 8)
+        {
+          *out++ = ctx->ctx.base2.octet;
+          ctx->ctx.base2.octet = 0;
+          ++*outlen;
+          ctx->i = 0;
+        }
+
+      ++in;
+    }
+
+  return true;
+}
+
+static bool
+base2msbf_decode_ctx (struct base_decode_context *ctx,
+                      char const *restrict in, idx_t inlen,
+                      char *restrict out, idx_t *outlen)
+{
+  bool ignore_lines = true;  /* for now, always ignore them */
+
+  *outlen = 0;
+
+  /* inlen==0 is request to flush output.
+     if there is a dangling bit - we are missing some bits,
+     so return false - indicating an invalid input.  */
+  if (inlen == 0)
+    return ctx->i == 0;
+
+  while (inlen--)
+    {
+      if (ignore_lines && *in == '\n')
+        {
+          ++in;
+          continue;
+        }
+
+      if (!isbase2 (*in))
+        return false;
+
+      bool bit = (*in == '1');
+      if (ctx->i == 0)
+        ctx->i = 8;
+      --ctx->i;
+      ctx->ctx.base2.octet |= bit << ctx->i;
+
+      if (ctx->i == 0)
+        {
+          *out++ = ctx->ctx.base2.octet;
+          ctx->ctx.base2.octet = 0;
+          ++*outlen;
+          ctx->i = 0;
+        }
+
+      ++in;
+    }
+
+  return true;
+}
+
+#endif /* BASE_TYPE == 42, i.e., "basenc"*/
+
+
+
+static void
+wrap_write (char const *buffer, idx_t len,
+            idx_t wrap_column, idx_t *current_column, FILE *out)
+{
+  if (wrap_column == 0)
+    {
+      /* Simple write. */
+      if (fwrite (buffer, 1, len, stdout) < len)
+        die (EXIT_FAILURE, errno, _("write error"));
+    }
+  else
+    for (idx_t written = 0; written < len; )
+      {
+        idx_t to_write = MIN (wrap_column - *current_column, len - written);
+
+        if (to_write == 0)
+          {
+            if (fputc ('\n', out) == EOF)
+              die (EXIT_FAILURE, errno, _("write error"));
+            *current_column = 0;
+          }
+        else
+          {
+            if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
+              die (EXIT_FAILURE, errno, _("write error"));
+            *current_column += to_write;
+            written += to_write;
+          }
+      }
+}
+
+static _Noreturn void
+finish_and_exit (FILE *in, char const *infile)
+{
+  if (fclose (in) != 0)
+    {
+      if (STREQ (infile, "-"))
+        die (EXIT_FAILURE, errno, _("closing standard input"));
+      else
+        die (EXIT_FAILURE, errno, "%s", quotef (infile));
+    }
+
+  exit (EXIT_SUCCESS);
+}
+
+static _Noreturn void
+do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
+{
+  idx_t current_column = 0;
+  char *inbuf, *outbuf;
+  idx_t sum;
+
+  inbuf = xmalloc (ENC_BLOCKSIZE);
+  outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
+
+  do
+    {
+      idx_t n;
+
+      sum = 0;
+      do
+        {
+          n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
+          sum += n;
+        }
+      while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
+
+      if (sum > 0)
+        {
+          /* Process input one block at a time.  Note that ENC_BLOCKSIZE
+             is sized so that no pad chars will appear in output. */
+          base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
+
+          wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
+                      &current_column, out);
+        }
+    }
+  while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
+
+  /* When wrapping, terminate last line. */
+  if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
+    die (EXIT_FAILURE, errno, _("write error"));
+
+  if (ferror (in))
+    die (EXIT_FAILURE, errno, _("read error"));
+
+  finish_and_exit (in, infile);
+}
+
+static _Noreturn void
+do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
+{
+  char *inbuf, *outbuf;
+  idx_t sum;
+  struct base_decode_context ctx;
+
+  inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
+  outbuf = xmalloc (DEC_BLOCKSIZE);
+
+#if BASE_TYPE == 42
+  ctx.inbuf = NULL;
+#endif
+  base_decode_ctx_init (&ctx);
+
+  do
+    {
+      bool ok;
+
+      sum = 0;
+      do
+        {
+          idx_t n = fread (inbuf + sum,
+                           1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
+
+          if (ignore_garbage)
+            {
+              for (idx_t i = 0; n > 0 && i < n;)
+                {
+                  if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
+                    i++;
+                  else
+                    memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
+                }
+            }
+
+          sum += n;
+
+          if (ferror (in))
+            die (EXIT_FAILURE, errno, _("read error"));
+        }
+      while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
+
+      /* The following "loop" is usually iterated just once.
+         However, when it processes the final input buffer, we want
+         to iterate it one additional time, but with an indicator
+         telling it to flush what is in CTX.  */
+      for (int k = 0; k < 1 + !!feof (in); k++)
+        {
+          if (k == 1 && ctx.i == 0)
+            break;
+          idx_t n = DEC_BLOCKSIZE;
+          ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
+
+          if (fwrite (outbuf, 1, n, out) < n)
+            die (EXIT_FAILURE, errno, _("write error"));
+
+          if (!ok)
+            die (EXIT_FAILURE, 0, _("invalid input"));
+        }
+    }
+  while (!feof (in));
+
+  finish_and_exit (in, infile);
+}
+
+int
+main (int argc, char **argv)
+{
+  int opt;
+  FILE *input_fh;
+  char const *infile;
+
+  /* True if --decode has been given and we should decode data. */
+  bool decode = false;
+  /* True if we should ignore non-base-alphabetic characters. */
+  bool ignore_garbage = false;
+  /* Wrap encoded data around the 76th column, by default. */
+  idx_t wrap_column = 76;
+
+#if BASE_TYPE == 42
+  int base_type = 0;
+#endif
+
+  initialize_main (&argc, &argv);
+  set_program_name (argv[0]);
+  setlocale (LC_ALL, "");
+  bindtextdomain (PACKAGE, LOCALEDIR);
+  textdomain (PACKAGE);
+
+  atexit (close_stdout);
+
+  while ((opt = getopt_long (argc, argv, "diw:", long_options, NULL)) != -1)
+    switch (opt)
+      {
+      case 'd':
+        decode = true;
+        break;
+
+      case 'w':
+        {
+          intmax_t w;
+          strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
+          if (LONGINT_OVERFLOW < s_err || w < 0)
+            die (EXIT_FAILURE, 0, "%s: %s",
+                 _("invalid wrap size"), quote (optarg));
+          wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
+        }
+        break;
+
+      case 'i':
+        ignore_garbage = true;
+        break;
+
+#if BASE_TYPE == 42
+      case BASE64_OPTION:
+      case BASE64URL_OPTION:
+      case BASE32_OPTION:
+      case BASE32HEX_OPTION:
+      case BASE16_OPTION:
+      case BASE2MSBF_OPTION:
+      case BASE2LSBF_OPTION:
+      case Z85_OPTION:
+        base_type = opt;
+        break;
+#endif
+
+      case_GETOPT_HELP_CHAR;
+
+      case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+      default:
+        usage (EXIT_FAILURE);
+        break;
+      }
+
+#if BASE_TYPE == 42
+  switch (base_type)
+    {
+    case BASE64_OPTION:
+      base_length = base64_length_wrapper;
+      isbase = isbase64;
+      base_encode = base64_encode;
+      base_decode_ctx_init = base64_decode_ctx_init_wrapper;
+      base_decode_ctx = base64_decode_ctx_wrapper;
+      break;
+
+    case BASE64URL_OPTION:
+      base_length = base64_length_wrapper;
+      isbase = isbase64url;
+      base_encode = base64url_encode;
+      base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
+      base_decode_ctx = base64url_decode_ctx_wrapper;
+      break;
+
+    case BASE32_OPTION:
+      base_length = base32_length_wrapper;
+      isbase = isbase32;
+      base_encode = base32_encode;
+      base_decode_ctx_init = base32_decode_ctx_init_wrapper;
+      base_decode_ctx = base32_decode_ctx_wrapper;
+      break;
+
+    case BASE32HEX_OPTION:
+      base_length = base32_length_wrapper;
+      isbase = isbase32hex;
+      base_encode = base32hex_encode;
+      base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
+      base_decode_ctx = base32hex_decode_ctx_wrapper;
+      break;
+
+    case BASE16_OPTION:
+      base_length = base16_length;
+      isbase = isbase16;
+      base_encode = base16_encode;
+      base_decode_ctx_init = base16_decode_ctx_init;
+      base_decode_ctx = base16_decode_ctx;
+      break;
+
+    case BASE2MSBF_OPTION:
+      base_length = base2_length;
+      isbase = isbase2;
+      base_encode = base2msbf_encode;
+      base_decode_ctx_init = base2_decode_ctx_init;
+      base_decode_ctx = base2msbf_decode_ctx;
+      break;
+
+    case BASE2LSBF_OPTION:
+      base_length = base2_length;
+      isbase = isbase2;
+      base_encode = base2lsbf_encode;
+      base_decode_ctx_init = base2_decode_ctx_init;
+      base_decode_ctx = base2lsbf_decode_ctx;
+      break;
+
+    case Z85_OPTION:
+      base_length = z85_length;
+      isbase = isz85;
+      base_encode = z85_encode;
+      base_decode_ctx_init = z85_decode_ctx_init;
+      base_decode_ctx = z85_decode_ctx;
+      break;
+
+    default:
+      error (0, 0, _("missing encoding type"));
+      usage (EXIT_FAILURE);
+    }
+#endif
+
+  if (argc - optind > 1)
+    {
+      error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
+      usage (EXIT_FAILURE);
+    }
+
+  if (optind < argc)
+    infile = argv[optind];
+  else
+    infile = "-";
+
+  if (STREQ (infile, "-"))
+    {
+      xset_binary_mode (STDIN_FILENO, O_BINARY);
+      input_fh = stdin;
+    }
+  else
+    {
+      input_fh = fopen (infile, "rb");
+      if (input_fh == NULL)
+        die (EXIT_FAILURE, errno, "%s", quotef (infile));
+    }
+
+  fadvise (input_fh, FADVISE_SEQUENTIAL);
+
+  if (decode)
+    do_decode (input_fh, infile, stdout, ignore_garbage);
+  else
+    do_encode (input_fh, infile, stdout, wrap_column);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 16:11:47 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 16:11:47 +0000
commit	758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree	48111ece75cf4f98316848b37a7e26356e00669e /src/basenc.c
parent	Initial commit. (diff)
download	coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip