summaryrefslogtreecommitdiffstats
path: root/src/include/unicode.h
blob: 670864b081d767fa696bf53a895be6ac48307d3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// -*- C++ -*-
/* Copyright (C) 2002-2020 Free Software Foundation, Inc.
     Written by Werner Lemberg <wl@gnu.org>

This file is part of groff.

groff is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or
(at your option) any later version.

groff is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>. */

// Convert a groff glyph name to a string containing an underscore-separated
// list of Unicode code points.  For example,
//
//   '-'   ->  '2010'
//   ',c'  ->  '00E7'
//   'fl'  ->  '0066_006C'
//
// Return NULL if there is no equivalent.
const char *glyph_name_to_unicode(const char *);

// Convert a string containing an underscore-separated list of Unicode code
// points to a groff glyph name.  For example,
//
//   '2010'       ->  'hy'
//   '0066_006C'  ->  'fl'
//
// Return NULL if there is no equivalent.
const char *unicode_to_glyph_name(const char *);

// Convert a string containing a precomposed Unicode character to a string
// containing an underscore-separated list of Unicode code points,
// representing its canonical decomposition.  Also perform compatibility
// equivalent replacement.  For example,
//
//   '1F3A' -> '0399_0313_0300'
//   'FA6A' -> '983B'
//
// Return NULL if there is no equivalent.
const char *decompose_unicode(const char *);

// Test whether the given string denotes a Unicode character.  It must
// be of the form 'uNNNN', obeying the following rules.
//
//   - 'NNNN' must consist of at least 4 hexadecimal digits in upper case.
//   - If there are more than 4 hexadecimal digits, the leading one must not
//     be zero,
//   - 'NNNN' must denote a valid Unicode code point (U+0000..U+10FFFF,
//     excluding surrogate code points.
//
// Return a pointer to 'NNNN' (skipping the leading 'u' character) in case
// of success, NULL otherwise.
const char *check_unicode_name(const char *);

// end of unicode.h