summaryrefslogtreecommitdiffstats
path: root/src/libs/libgroff/unicode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libs/libgroff/unicode.cpp')
-rw-r--r--src/libs/libgroff/unicode.cpp65
1 files changed, 65 insertions, 0 deletions
diff --git a/src/libs/libgroff/unicode.cpp b/src/libs/libgroff/unicode.cpp
new file mode 100644
index 0000000..29e80c7
--- /dev/null
+++ b/src/libs/libgroff/unicode.cpp
@@ -0,0 +1,65 @@
+// -*- C++ -*-
+/* Copyright (C) 2002-2020 Free Software Foundation, Inc.
+ Written by Werner Lemberg <wl@gnu.org>
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "lib.h"
+#include "cset.h"
+#include "stringclass.h"
+
+#include "unicode.h"
+
+const char *check_unicode_name(const char *u)
+{
+ if (*u != 'u')
+ return 0;
+ const char *p = ++u;
+ for (;;) {
+ int val = 0;
+ const char *start = p;
+ for (;;) {
+ // only uppercase hex digits allowed
+ if (!csxdigit(*p))
+ return 0;
+ if (csdigit(*p))
+ val = val*0x10 + (*p-'0');
+ else if (csupper(*p))
+ val = val*0x10 + (*p-'A'+10);
+ else
+ return 0;
+ // biggest Unicode value is U+10FFFF
+ if (val > 0x10FFFF)
+ return 0;
+ p++;
+ if (*p == '\0' || *p == '_')
+ break;
+ }
+ // surrogates not allowed
+ if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
+ return 0;
+ if (val > 0xFFFF) {
+ if (*start == '0') // no leading zeros allowed if > 0xFFFF
+ return 0;
+ }
+ else if (p - start != 4) // otherwise, check for exactly 4 hex digits
+ return 0;
+ if (*p == '\0')
+ break;
+ p++;
+ }
+ return u;
+}