summaryrefslogtreecommitdiffstats
path: root/src/common/utf8.h
blob: 83efe6fd6dad43c81e417208102c39f15e75dba6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
 * Ceph - scalable distributed file system
 *
 * Copyright (C) 2011 New Dream Network
 *
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software
 * Foundation.  See file COPYING.
 *
 */

#ifndef CEPH_COMMON_UTF8_H
#define CEPH_COMMON_UTF8_H

#define MAX_UTF8_SZ 6
#define INVALID_UTF8_CHAR 0xfffffffful

#ifdef __cplusplus
extern "C" {
#endif

/* Checks if a buffer is valid UTF-8.
 * Returns 0 if it is, and one plus the offset of the first invalid byte
 * if it is not.
 */
int check_utf8(const char *buf, int len);

/* Checks if a null-terminated string is valid UTF-8.
 * Returns 0 if it is, and one plus the offset of the first invalid byte
 * if it is not.
 */
int check_utf8_cstr(const char *buf);

/* Returns true if 'ch' is a control character.
 * We do count newline as a control character, but not NULL.
 */
int is_control_character(int ch);

/* Checks if a buffer contains control characters.
 */
int check_for_control_characters(const char *buf, int len);

/* Checks if a null-terminated string contains control characters.
 */
int check_for_control_characters_cstr(const char *buf);

/* Encode a 31-bit UTF8 code point to 'buf'.
 * Assumes buf is of size MAX_UTF8_SZ
 * Returns -1 on failure; number of bytes in the encoded value otherwise.
 */
int encode_utf8(unsigned long u, unsigned char *buf);

/*
 * Decode a UTF8 character from an array of bytes. Return character code.
 * Upon error, return INVALID_UTF8_CHAR.
 */
unsigned long decode_utf8(unsigned char *buf, int nbytes);

#ifdef __cplusplus
}
#endif

#endif