summaryrefslogtreecommitdiffstats
path: root/src/util/printable.c
blob: 6c148fd0041a626056f0af2e26fb1a6019724c68 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*++
/* NAME
/*	printable 3
/* SUMMARY
/*	mask non-printable characters
/* SYNOPSIS
/*	#include <stringops.h>
/*
/*	int	util_utf8_enable;
/*
/*	char	*printable(buffer, replacement)
/*	char	*buffer;
/*	int	replacement;
/*
/*	char	*printable_except(buffer, replacement, except)
/*	char	*buffer;
/*	int	replacement;
/*	const char *except;
/* DESCRIPTION
/*	printable() replaces non-printable characters
/*	in its input with the given replacement.
/*
/*	util_utf8_enable controls whether UTF8 is considered printable.
/*	With util_utf8_enable equal to zero, non-ASCII text is replaced.
/*
/*	Arguments:
/* .IP buffer
/*	The null-terminated input string.
/* .IP replacement
/*	Replacement value for characters in \fIbuffer\fR that do not
/*	pass the ASCII isprint(3) test or that are not valid UTF8.
/* .IP except
/*	Null-terminated sequence of non-replaced ASCII characters.
/* LICENSE
/* .ad
/* .fi
/*	The Secure Mailer license must be distributed with this software.
/* AUTHOR(S)
/*	Wietse Venema
/*	IBM T.J. Watson Research
/*	P.O. Box 704
/*	Yorktown Heights, NY 10598, USA
/*
/*	Wietse Venema
/*	Google, Inc.
/*	111 8th Avenue
/*	New York, NY 10011, USA
/*--*/

/* System library. */

#include "sys_defs.h"
#include <ctype.h>
#include <string.h>

/* Utility library. */

#include "stringops.h"

int util_utf8_enable = 0;

/* printable -  binary compatibility */

#undef printable

char   *printable(char *, int);

char   *printable(char *string, int replacement)
{
    return (printable_except(string, replacement, (char *) 0));
}

/* printable_except -  pass through printable or other preserved characters */

char   *printable_except(char *string, int replacement, const char *except)
{
    unsigned char *cp;
    int     ch;

    /*
     * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
     * out-of-range code points, etc). See valid_utf8_string.c.
     */
    cp = (unsigned char *) string;
    while ((ch = *cp) != 0) {
	if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) {
	    /* ok */
	} else if (util_utf8_enable && ch >= 194 && ch <= 254
		   && cp[1] >= 128 && cp[1] < 192) {
	    /* UTF8; skip the rest of the bytes in the character. */
	    while (cp[1] >= 128 && cp[1] < 192)
		cp++;
	} else {
	    /* Not ASCII and not UTF8. */
	    *cp = replacement;
	}
	cp++;
    }
    return (string);
}