From: =?utf-8?b?0L3QsNCx?= Date: Wed, 15 Mar 2023 16:16:31 +0100 Subject: write: correctly handle wide characters MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Do this by replacing fputc_careful() (notice that the description said it's locale-aware ‒ it very much is /not/), with a fputs_careful() which does the same thing, but if it were to output a byte in the \123 format, first it checks whether this byte starts a valid multibyte character. If it does, and that character is printable, write it verbatim. This means that echo 'foo åäö ąęćźżń bar' | write nabijaczleweli pts/4 instead of foo \303\245\303\244\303\266 \304\205\304\231\304\207\305\272\305\274\305\204 bar yields foo åäö ąęćźżń bar or, more realistically, from a message I got earlier today, Filip powiedzia\305\202 \305\274e zap\305\202aci jutro becomes Filip powiedział że zapłaci jutro Invalid/non-printable sequences get processed as before. Line reading in write must become getline() to avoid dealing with partial characters: for example on input consisting solely of ąęćźżń, where every {1} is an instance, the output would be {42}ąęć\305\272żń{84}ąęćź\305\274ń{84}ąęćźż\305\204{39} with just fixed-512 fgets() Bug-Debian: https://bugs.debian.org/826596 --- include/carefulputc.h | 60 +++++++++++++++++++++++++++++++++++++-------------- login-utils/last.c | 4 +--- term-utils/write.c | 25 ++++++--------------- 3 files changed, 52 insertions(+), 37 deletions(-) diff --git a/include/carefulputc.h b/include/carefulputc.h index 8860b12..740add6 100644 --- a/include/carefulputc.h +++ b/include/carefulputc.h @@ -1,31 +1,59 @@ #ifndef UTIL_LINUX_CAREFULPUTC_H #define UTIL_LINUX_CAREFULPUTC_H -/* - * A putc() for use in write and wall (that sometimes are sgid tty). - * It avoids control characters in our locale, and also ASCII control - * characters. Note that the locale of the recipient is unknown. -*/ #include #include #include +#ifdef HAVE_WIDECHAR +#include +#endif +#include #include "cctype.h" -static inline int fputc_careful(int c, FILE *fp, const char fail) +/* + * A puts() for use in write and wall (that sometimes are sgid tty). + * It avoids control and invalid characters. + * The locale of the recipient is nominally unknown, + * but it's a solid bet that the encoding is compatible with the author's. + */ +static inline int fputs_careful(const char * s, FILE *fp, const char ctrl, bool cr_lf) { - int ret; + int ret = 0; - if (isprint(c) || c == '\a' || c == '\t' || c == '\r' || c == '\n') - ret = putc(c, fp); - else if (!c_isascii(c)) - ret = fprintf(fp, "\\%3o", (unsigned char)c); - else { - ret = putc(fail, fp); - if (ret != EOF) - ret = putc(c ^ 0x40, fp); + for (size_t slen = strlen(s); *s; ++s, --slen) { + if (*s == '\n') + ret = fputs(cr_lf ? "\r\n" : "\n", fp); + else if (isprint(*s) || *s == '\a' || *s == '\t' || *s == '\r') + ret = putc(*s, fp); + else if (!c_isascii(*s)) { +#ifdef HAVE_WIDECHAR + wchar_t w; + size_t clen = mbtowc(&w, s, slen); + switch(clen) { + case (size_t)-2: // incomplete + case (size_t)-1: // EILSEQ + mbtowc(NULL, NULL, 0); + nonprint: + ret = fprintf(fp, "\\%3hho", *s); + break; + default: + if(!iswprint(w)) + goto nonprint; + ret = fwrite(s, 1, clen, fp); + s += clen - 1; + slen -= clen - 1; + break; + } +#else + ret = fprintf(fp, "\\%3hho", *s); +#endif + } else + ret = fputs((char[]){ ctrl, *s ^ 0x40, '\0' }, fp); + if (ret < 0) + return EOF; } - return (ret < 0) ? EOF : 0; + return 0; } static inline void fputs_quoted_case(const char *data, FILE *out, int dir) diff --git a/login-utils/last.c b/login-utils/last.c index 8462927..8d08641 100644 --- a/login-utils/last.c +++ b/login-utils/last.c @@ -404,7 +404,6 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t char final[512]; char utline[sizeof(p->ut_line) + 1]; char domain[256]; - char *s; int mins, hours, days; int r, len; struct last_timefmt *fmt; @@ -560,8 +559,7 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t /* * Print out "final" string safely. */ - for (s = final; *s; s++) - fputc_careful(*s, stdout, '*'); + fputs_careful(final, stdout, '*', false); if (len < 0 || (size_t)len >= sizeof(final)) putchar('\n'); diff --git a/term-utils/write.c b/term-utils/write.c index ee31580..01c3f35 100644 --- a/term-utils/write.c +++ b/term-utils/write.c @@ -223,21 +223,6 @@ static void signal_handler(int signo) signal_received = signo; } -/* - * write_line - like fputs(), but makes control characters visible and - * turns \n into \r\n. - */ -static void write_line(char *s) -{ - while (*s) { - const int c = *s++; - - if ((c == '\n' && fputc_careful('\r', stdout, '^') == EOF) - || fputc_careful(c, stdout, '^') == EOF) - err(EXIT_FAILURE, _("carefulputc failed")); - } -} - /* * do_write - actually make the connection */ @@ -247,7 +232,8 @@ static void do_write(const struct write_control *ctl) struct passwd *pwd; time_t now; struct tm *tm; - char *host, line[512]; + char *host, *line = NULL; + size_t linelen = 0; struct sigaction sigact; /* Determine our login name(s) before the we reopen() stdout */ @@ -286,11 +272,14 @@ static void do_write(const struct write_control *ctl) free(host); printf("\r\n"); - while (fgets(line, sizeof(line), stdin) != NULL) { + while (getline(&line, &linelen, stdin) >= 0) { if (signal_received) break; - write_line(line); + + if (fputs_careful(line, stdout, '^', true) == EOF) + err(EXIT_FAILURE, _("carefulputc failed")); } + free(line); printf("EOF\r\n"); }