diff options
Diffstat (limited to 'src/fluent-bit/lib/onigmo/testconvu.rb')
-rwxr-xr-x | src/fluent-bit/lib/onigmo/testconvu.rb | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/src/fluent-bit/lib/onigmo/testconvu.rb b/src/fluent-bit/lib/onigmo/testconvu.rb new file mode 100755 index 000000000..384731e0d --- /dev/null +++ b/src/fluent-bit/lib/onigmo/testconvu.rb @@ -0,0 +1,414 @@ +#!/usr/local/bin/ruby +# testconvu.rb +# Copyright (C) 2004-2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp) + +WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0]) +ARGV.shift if WINDOWS + +BIG_ENDIAN = 1 +LITTLE_ENDIAN = 2 + +def eucjp_char_pos(s, byte_pos) + pos = 0 + i = 0 + while (i < byte_pos) + x = s[i].ord + if ((x >= 0xa1 && x <= 0xfe) || x == 0x8e) + i += 2 + elsif (x == 0x8f) + i += 3 + else + i += 1 + end + pos += 1 + end + return pos +end + +def utf16_byte_pos(endian, s, char_pos) + i = 0 + while (char_pos > 0) + x = (endian == BIG_ENDIAN ? s[i] : s[i+1]).ord + if (x >= 0xd8 && x <= 0xdb) + i += 4 + else + i += 2 + end + char_pos -= 1 + end + return i +end + +def s_escape(s) + q = '' + s.each_byte { |b| + if (b < 0x20 || b >= 0x7f || b == 0x22 || b == 0x5c) + q << sprintf("\\%03o", b) + else + q << b.chr + end + } + q +end + +def conv_to_utf16(endian, s) + begin + if (endian == BIG_ENDIAN) + q = s.encode('UTF-16BE', 'EUC-JP') + else + q = s.encode('UTF-16LE', 'EUC-JP') + end + q.force_encoding('ASCII-8BIT') + rescue Encoding::InvalidByteSequenceError + q = 'Invalid character' + rescue Encoding::UndefinedConversionError + STDERR.printf("Encoding::UndefinedConversionError: [%s]\n", s) + return '' + end + + q << "\000\000" + s_escape(q) +end + +def conv_reg(endian, s) + s = s.gsub(/\\([0-7]{2,3})\\([0-7]{2,3})/) { + $1.to_i(8).chr + $2.to_i(8).chr + } + + s = s.gsub(/\\x([0-9A-Fa-f]{2})\\x([0-9A-Fa-f]{2})/) { + $1.to_i(16).chr + $2.to_i(16).chr + } + + if (endian == BIG_ENDIAN) + s = s.gsub(/(\\[0-7]{2,3})/) { "\\000" + $1 } + s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { "\\x00" + $1 } + else + s = s.gsub(/(\\[0-7]{2,3})/) { $1 + "\\000" } + s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { $1 + "\\x00" } + end + + s = s.gsub(/\\/, '\\\\') #' + + if (WINDOWS) + s = s.gsub(/\?\?/, '?\\?') # escape ANSI trigraph + end + conv_to_utf16(endian, s) +end + +def conv_str(endian, s, from, to) + if (s[0] == ?') + s = s[1..-2] + q = s.gsub(/\\/, '\\\\') #' + else + q = s[1..-2] + q.gsub!(/\\n/, "\x0a") + q.gsub!(/\\t/, "\x09") + q.gsub!(/\\v/, "\x0b") + q.gsub!(/\\r/, "\x0d") + q.gsub!(/\\f/, "\x0c") + q.gsub!(/\\a/, "\x07") + q.gsub!(/\\e/, "\x1b") + + q.gsub!(/\\([0-7]{2,3})/) { $1.to_i(8).chr } + q.gsub!(/\\x([0-9A-Fa-f]{2})/) { $1.to_i(16).chr } + end + + q.force_encoding('ASCII-8BIT') + from = from.to_i + to = to.to_i + eucjp_from = eucjp_char_pos(q, from) + eucjp_to = eucjp_char_pos(q, to) + + s = conv_to_utf16(endian, q) + + from = utf16_byte_pos(endian, s, eucjp_from) + to = utf16_byte_pos(endian, s, eucjp_to) + return s, from, to +end + +print(<<"EOS") +/* + * This program was generated by testconv.rb. + */ +#include<stdio.h> + +#ifdef POSIX_TEST +#include "onigmoposix.h" +#else +#include "onigmo.h" +#endif + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +static OnigEncoding ENC; +#endif + +#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p) + +static void uconv(char* from, char* to, int len) +{ + int i; + unsigned char c; + char *q; + + q = to; + + for (i = 0; i < len; i += 2) { + c = (unsigned char )from[i]; + if (c == 0) { + c = (unsigned char )from[i+1]; + if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { + sprintf(q, "\\\\%03o", c); + q += 4; + } + else { + sprintf(q, "%c", c); + q++; + } + } + else { + sprintf(q, "\\\\%03o", c); + q += 4; + c = (unsigned char )from[i+1]; + sprintf(q, "\\\\%03o", c); + q += 4; + } + } + + *q = 0; +} + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + char cpat[4000], cstr[4000]; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[20]; + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr, + (int)from, (int)to, (int)pmatch[mem].rm_so, (int)pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT; + + /* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */ + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + +#if 0 + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)), + ONIG_OPTION_DEFAULT, ENC, &syn, &einfo); +#else + ci.num_of_elements = 5; + ci.pattern_enc = ENC; + ci.target_enc = ENC; + ci.syntax = &syn; + ci.option = ONIG_OPTION_DEFAULT; + ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT; + + r = onig_new_deluxe(®, (UChar* )pattern, + (UChar* )(pattern + ulen(pattern)), + &ci, &einfo); +#endif + + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)), + (UChar* )str, (UChar* )(str + ulen(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr, + (int)from, (int)to, (int)region->beg[mem], (int)region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifndef POSIX_TEST + region = onig_region_new(); +#endif +EOS + + +PAT = '\\/([^\\\\\\/]*(?:\\\\.[^\\\\\\/]*)*)\\/' +CM = /\s*,\s*/ +RX2 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)} +RI2 = %r{\Ai\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)} +RX3 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)} +RN = %r{\An\(#{PAT}#{CM}('[^']*'|"[^"]*")\)} #' + +def convert(endian, fp) + + if (endian == BIG_ENDIAN) + se = 'BE' + else + se = 'LE' + end + + print(<<"EOS") +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_UTF16_#{se}); +#else + ENC = ONIG_ENCODING_UTF16_#{se}; +#endif +EOS + + while line = fp.gets() + if (m = RX2.match(line)) + reg = conv_reg(endian, m[1]) + str, from, to = conv_str(endian, m[2], m[3], m[4]) + printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to) + elsif (m = RI2.match(line)) + reg = conv_reg(endian, m[1]) + str, from, to = conv_str(endian, m[2], m[3], m[4]) + printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to) + elsif (m = RX3.match(line)) + reg = conv_reg(endian, m[1]) + str, from, to = conv_str(endian, m[2], m[3], m[4]) + printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, from, to, m[5]) + elsif (m = RN.match(line)) + reg = conv_reg(endian, m[1]) + str, from, to = conv_str(endian, m[2], 0, 0) + printf(" n(\"%s\", \"%s\");\n", reg, str) + else + end + end +end + +File::open(ARGV[0]) { |fp| + convert(BIG_ENDIAN, fp) +} + +#File::open(ARGV[0]) { |fp| +# convert(LITTLE_ENDIAN, fp) +#} + +print(<<'EOS') + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} +EOS + +# END OF SCRIPT |