summaryrefslogtreecommitdiffstats
path: root/src/utf8proc/bench/icu.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/utf8proc/bench/icu.c
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/utf8proc/bench/icu.c')
-rw-r--r--src/utf8proc/bench/icu.c61
1 files changed, 61 insertions, 0 deletions
diff --git a/src/utf8proc/bench/icu.c b/src/utf8proc/bench/icu.c
new file mode 100644
index 000000000..3ac351415
--- /dev/null
+++ b/src/utf8proc/bench/icu.c
@@ -0,0 +1,61 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+/* ICU4C */
+#include <unicode/utypes.h>
+#include <unicode/ustring.h>
+#include <unicode/ucnv.h>
+#include <unicode/unorm2.h>
+
+#include "util.h"
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ UErrorCode err;
+ UConverter *uc = ucnv_open("UTF8", &err);
+ if (U_FAILURE(err)) return EXIT_FAILURE;
+
+ const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err);
+ if (U_FAILURE(err)) return EXIT_FAILURE;
+
+ for (i = 1; i < argc; ++i) {
+ if (argv[i][0] == '-') {
+ fprintf(stderr, "unrecognized option: %s\n", argv[i]);
+ return EXIT_FAILURE;
+ }
+
+ size_t len;
+ uint8_t *src = readfile(argv[i], &len);
+ if (!src) {
+ fprintf(stderr, "error reading %s\n", argv[i]);
+ return EXIT_FAILURE;
+ }
+
+ /* convert UTF8 data to ICU's UTF16 */
+ UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar));
+ ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err);
+ if (U_FAILURE(err)) return EXIT_FAILURE;
+ size_t ulen = u_strlen(usrc);
+
+ /* ICU's insane normalization API requires you to
+ know the size of the destination buffer in advance,
+ or alternatively to repeatly try normalizing and
+ double the buffer size until it succeeds. Here, I just
+ allocate a huge destination buffer to avoid the issue. */
+ UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar));
+
+ mytime start = gettime();
+ for (int i = 0; i < 100; ++i) {
+ unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err);
+ if (U_FAILURE(err)) return EXIT_FAILURE;
+ }
+ printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
+ free(udest);
+ free(usrc);
+ free(src);
+ }
+
+ return EXIT_SUCCESS;
+}