summaryrefslogtreecommitdiffstats
path: root/src/include/utils/ascii.h
blob: 7df024dad398d2218c1160c879a07db7f4fcf1f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*-----------------------------------------------------------------------
 * ascii.h
 *
 *	 Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group
 *
 * src/include/utils/ascii.h
 *
 *-----------------------------------------------------------------------
 */

#ifndef _ASCII_H_
#define _ASCII_H_

#include "port/simd.h"

extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);

/*
 * Verify a chunk of bytes for valid ASCII.
 *
 * Returns false if the input contains any zero bytes or bytes with the
 * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
 */
static inline bool
is_valid_ascii(const unsigned char *s, int len)
{
	const unsigned char *const s_end = s + len;
	Vector8		chunk;
	Vector8		highbit_cum = vector8_broadcast(0);
#ifdef USE_NO_SIMD
	Vector8		zero_cum = vector8_broadcast(0x80);
#endif

	Assert(len % sizeof(chunk) == 0);

	while (s < s_end)
	{
		vector8_load(&chunk, s);

		/* Capture any zero bytes in this chunk. */
#ifdef USE_NO_SIMD

		/*
		 * First, add 0x7f to each byte. This sets the high bit in each byte,
		 * unless it was a zero. If any resulting high bits are zero, the
		 * corresponding high bits in the zero accumulator will be cleared.
		 *
		 * If none of the bytes in the chunk had the high bit set, the max
		 * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
		 * and we don't need to worry about carrying over to the next byte. If
		 * any input bytes did have the high bit set, it doesn't matter
		 * because we check for those separately.
		 */
		zero_cum &= (chunk + vector8_broadcast(0x7F));
#else

		/*
		 * Set all bits in each lane of the highbit accumulator where input
		 * bytes are zero.
		 */
		highbit_cum = vector8_or(highbit_cum,
								 vector8_eq(chunk, vector8_broadcast(0)));
#endif

		/* Capture all set bits in this chunk. */
		highbit_cum = vector8_or(highbit_cum, chunk);

		s += sizeof(chunk);
	}

	/* Check if any high bits in the high bit accumulator got set. */
	if (vector8_is_highbit_set(highbit_cum))
		return false;

#ifdef USE_NO_SIMD
	/* Check if any high bits in the zero accumulator got cleared. */
	if (zero_cum != vector8_broadcast(0x80))
		return false;
#endif

	return true;
}

#endif							/* _ASCII_H_ */