path: root/fluent-bit/lib/onigmo/testconvu.rb
diff options
Diffstat (limited to 'fluent-bit/lib/onigmo/testconvu.rb')
1 files changed, 414 insertions, 0 deletions
diff --git a/fluent-bit/lib/onigmo/testconvu.rb b/fluent-bit/lib/onigmo/testconvu.rb
new file mode 100755
index 00000000..384731e0
--- /dev/null
+++ b/fluent-bit/lib/onigmo/testconvu.rb
@@ -0,0 +1,414 @@
+# testconvu.rb
+# Copyright (C) 2004-2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp)
+WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0])
+ARGV.shift if WINDOWS
+def eucjp_char_pos(s, byte_pos)
+ pos = 0
+ i = 0
+ while (i < byte_pos)
+ x = s[i].ord
+ if ((x >= 0xa1 && x <= 0xfe) || x == 0x8e)
+ i += 2
+ elsif (x == 0x8f)
+ i += 3
+ else
+ i += 1
+ end
+ pos += 1
+ end
+ return pos
+def utf16_byte_pos(endian, s, char_pos)
+ i = 0
+ while (char_pos > 0)
+ x = (endian == BIG_ENDIAN ? s[i] : s[i+1]).ord
+ if (x >= 0xd8 && x <= 0xdb)
+ i += 4
+ else
+ i += 2
+ end
+ char_pos -= 1
+ end
+ return i
+def s_escape(s)
+ q = ''
+ s.each_byte { |b|
+ if (b < 0x20 || b >= 0x7f || b == 0x22 || b == 0x5c)
+ q << sprintf("\\%03o", b)
+ else
+ q << b.chr
+ end
+ }
+ q
+def conv_to_utf16(endian, s)
+ begin
+ if (endian == BIG_ENDIAN)
+ q = s.encode('UTF-16BE', 'EUC-JP')
+ else
+ q = s.encode('UTF-16LE', 'EUC-JP')
+ end
+ q.force_encoding('ASCII-8BIT')
+ rescue Encoding::InvalidByteSequenceError
+ q = 'Invalid character'
+ rescue Encoding::UndefinedConversionError
+ STDERR.printf("Encoding::UndefinedConversionError: [%s]\n", s)
+ return ''
+ end
+ q << "\000\000"
+ s_escape(q)
+def conv_reg(endian, s)
+ s = s.gsub(/\\([0-7]{2,3})\\([0-7]{2,3})/) {
+ $1.to_i(8).chr + $2.to_i(8).chr
+ }
+ s = s.gsub(/\\x([0-9A-Fa-f]{2})\\x([0-9A-Fa-f]{2})/) {
+ $1.to_i(16).chr + $2.to_i(16).chr
+ }
+ if (endian == BIG_ENDIAN)
+ s = s.gsub(/(\\[0-7]{2,3})/) { "\\000" + $1 }
+ s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { "\\x00" + $1 }
+ else
+ s = s.gsub(/(\\[0-7]{2,3})/) { $1 + "\\000" }
+ s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { $1 + "\\x00" }
+ end
+ s = s.gsub(/\\/, '\\\\') #'
+ if (WINDOWS)
+ s = s.gsub(/\?\?/, '?\\?') # escape ANSI trigraph
+ end
+ conv_to_utf16(endian, s)
+def conv_str(endian, s, from, to)
+ if (s[0] == ?')
+ s = s[1..-2]
+ q = s.gsub(/\\/, '\\\\') #'
+ else
+ q = s[1..-2]
+ q.gsub!(/\\n/, "\x0a")
+ q.gsub!(/\\t/, "\x09")
+ q.gsub!(/\\v/, "\x0b")
+ q.gsub!(/\\r/, "\x0d")
+ q.gsub!(/\\f/, "\x0c")
+ q.gsub!(/\\a/, "\x07")
+ q.gsub!(/\\e/, "\x1b")
+ q.gsub!(/\\([0-7]{2,3})/) { $1.to_i(8).chr }
+ q.gsub!(/\\x([0-9A-Fa-f]{2})/) { $1.to_i(16).chr }
+ end
+ q.force_encoding('ASCII-8BIT')
+ from = from.to_i
+ to = to.to_i
+ eucjp_from = eucjp_char_pos(q, from)
+ eucjp_to = eucjp_char_pos(q, to)
+ s = conv_to_utf16(endian, q)
+ from = utf16_byte_pos(endian, s, eucjp_from)
+ to = utf16_byte_pos(endian, s, eucjp_to)
+ return s, from, to
+ * This program was generated by testconv.rb.
+ */
+#ifdef POSIX_TEST
+#include "onigmoposix.h"
+#include "onigmo.h"
+static int nsucc = 0;
+static int nfail = 0;
+static int nerror = 0;
+static FILE* err_file;
+#ifndef POSIX_TEST
+static OnigRegion* region;
+static OnigEncoding ENC;
+#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p)
+static void uconv(char* from, char* to, int len)
+ int i;
+ unsigned char c;
+ char *q;
+ q = to;
+ for (i = 0; i < len; i += 2) {
+ c = (unsigned char )from[i];
+ if (c == 0) {
+ c = (unsigned char )from[i+1];
+ if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) {
+ sprintf(q, "\\\\%03o", c);
+ q += 4;
+ }
+ else {
+ sprintf(q, "%c", c);
+ q++;
+ }
+ }
+ else {
+ sprintf(q, "\\\\%03o", c);
+ q += 4;
+ c = (unsigned char )from[i+1];
+ sprintf(q, "\\\\%03o", c);
+ q += 4;
+ }
+ }
+ *q = 0;
+static void xx(char* pattern, char* str, int from, int to, int mem, int not)
+ int r;
+ char cpat[4000], cstr[4000];
+#ifdef POSIX_TEST
+ regex_t reg;
+ char buf[200];
+ regmatch_t pmatch[20];
+ uconv(pattern, cpat, ulen(pattern));
+ uconv(str, cstr, ulen(str));
+ r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
+ if (r) {
+ regerror(r, &reg, buf, sizeof(buf));
+ fprintf(err_file, "ERROR: %s\\n", buf);
+ nerror++;
+ return ;
+ }
+ r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
+ if (r != 0 && r != REG_NOMATCH) {
+ regerror(r, &reg, buf, sizeof(buf));
+ fprintf(err_file, "ERROR: %s\\n", buf);
+ nerror++;
+ return ;
+ }
+ if (r == REG_NOMATCH) {
+ if (not) {
+ fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr);
+ nfail++;
+ }
+ }
+ else {
+ if (not) {
+ fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr);
+ nfail++;
+ }
+ else {
+ if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
+ fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr,
+ (int)from, (int)to, (int)pmatch[mem].rm_so, (int)pmatch[mem].rm_eo);
+ nfail++;
+ }
+ }
+ }
+ regfree(&reg);
+ regex_t* reg;
+ OnigCompileInfo ci;
+ OnigErrorInfo einfo;
+ OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT;
+ uconv(pattern, cpat, ulen(pattern));
+ uconv(str, cstr, ulen(str));
+#if 0
+ r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)),
+ ONIG_OPTION_DEFAULT, ENC, &syn, &einfo);
+ ci.num_of_elements = 5;
+ ci.pattern_enc = ENC;
+ ci.target_enc = ENC;
+ ci.syntax = &syn;
+ ci.option = ONIG_OPTION_DEFAULT;
+ ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT;
+ r = onig_new_deluxe(&reg, (UChar* )pattern,
+ (UChar* )(pattern + ulen(pattern)),
+ &ci, &einfo);
+ if (r) {
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(err_file, "ERROR: %s\\n", s);
+ nerror++;
+ return ;
+ }
+ r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)),
+ (UChar* )str, (UChar* )(str + ulen(str)),
+ region, ONIG_OPTION_NONE);
+ if (r < ONIG_MISMATCH) {
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(err_file, "ERROR: %s\\n", s);
+ nerror++;
+ return ;
+ }
+ if (r == ONIG_MISMATCH) {
+ if (not) {
+ fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr);
+ nfail++;
+ }
+ }
+ else {
+ if (not) {
+ fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr);
+ nfail++;
+ }
+ else {
+ if (region->beg[mem] == from && region->end[mem] == to) {
+ fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr,
+ (int)from, (int)to, (int)region->beg[mem], (int)region->end[mem]);
+ nfail++;
+ }
+ }
+ }
+ onig_free(reg);
+static void x2(char* pattern, char* str, int from, int to)
+ xx(pattern, str, from, to, 0, 0);
+static void x3(char* pattern, char* str, int from, int to, int mem)
+ xx(pattern, str, from, to, mem, 0);
+static void n(char* pattern, char* str)
+ xx(pattern, str, 0, 0, 0, 1);
+extern int main(int argc, char* argv[])
+ err_file = stdout;
+#ifndef POSIX_TEST
+ region = onig_region_new();
+PAT = '\\/([^\\\\\\/]*(?:\\\\.[^\\\\\\/]*)*)\\/'
+CM = /\s*,\s*/
+RX2 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)}
+RI2 = %r{\Ai\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)}
+RX3 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)}
+RN = %r{\An\(#{PAT}#{CM}('[^']*'|"[^"]*")\)} #'
+def convert(endian, fp)
+ if (endian == BIG_ENDIAN)
+ se = 'BE'
+ else
+ se = 'LE'
+ end
+ print(<<"EOS")
+#ifdef POSIX_TEST
+ reg_set_encoding(REG_POSIX_ENCODING_UTF16_#{se});
+ while line = fp.gets()
+ if (m = RX2.match(line))
+ reg = conv_reg(endian, m[1])
+ str, from, to = conv_str(endian, m[2], m[3], m[4])
+ printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to)
+ elsif (m = RI2.match(line))
+ reg = conv_reg(endian, m[1])
+ str, from, to = conv_str(endian, m[2], m[3], m[4])
+ printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to)
+ elsif (m = RX3.match(line))
+ reg = conv_reg(endian, m[1])
+ str, from, to = conv_str(endian, m[2], m[3], m[4])
+ printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, from, to, m[5])
+ elsif (m = RN.match(line))
+ reg = conv_reg(endian, m[1])
+ str, from, to = conv_str(endian, m[2], 0, 0)
+ printf(" n(\"%s\", \"%s\");\n", reg, str)
+ else
+ end
+ end
+File::open(ARGV[0]) { |fp|
+ convert(BIG_ENDIAN, fp)
+#File::open(ARGV[0]) { |fp|
+# convert(LITTLE_ENDIAN, fp)
+ fprintf(stdout,
+ "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)\n",
+ nsucc, nfail, nerror, onig_version());
+#ifndef POSIX_TEST
+ onig_region_free(region, 1);
+ onig_end();
+ return ((nfail == 0 && nerror == 0) ? 0 : -1);