#!/usr/local/bin/ruby # testconvu.rb # Copyright (C) 2004-2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp) WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0]) ARGV.shift if WINDOWS BIG_ENDIAN = 1 LITTLE_ENDIAN = 2 def eucjp_char_pos(s, byte_pos) pos = 0 i = 0 while (i < byte_pos) x = s[i].ord if ((x >= 0xa1 && x <= 0xfe) || x == 0x8e) i += 2 elsif (x == 0x8f) i += 3 else i += 1 end pos += 1 end return pos end def utf16_byte_pos(endian, s, char_pos) i = 0 while (char_pos > 0) x = (endian == BIG_ENDIAN ? s[i] : s[i+1]).ord if (x >= 0xd8 && x <= 0xdb) i += 4 else i += 2 end char_pos -= 1 end return i end def s_escape(s) q = '' s.each_byte { |b| if (b < 0x20 || b >= 0x7f || b == 0x22 || b == 0x5c) q << sprintf("\\%03o", b) else q << b.chr end } q end def conv_to_utf16(endian, s) begin if (endian == BIG_ENDIAN) q = s.encode('UTF-16BE', 'EUC-JP') else q = s.encode('UTF-16LE', 'EUC-JP') end q.force_encoding('ASCII-8BIT') rescue Encoding::InvalidByteSequenceError q = 'Invalid character' rescue Encoding::UndefinedConversionError STDERR.printf("Encoding::UndefinedConversionError: [%s]\n", s) return '' end q << "\000\000" s_escape(q) end def conv_reg(endian, s) s = s.gsub(/\\([0-7]{2,3})\\([0-7]{2,3})/) { $1.to_i(8).chr + $2.to_i(8).chr } s = s.gsub(/\\x([0-9A-Fa-f]{2})\\x([0-9A-Fa-f]{2})/) { $1.to_i(16).chr + $2.to_i(16).chr } if (endian == BIG_ENDIAN) s = s.gsub(/(\\[0-7]{2,3})/) { "\\000" + $1 } s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { "\\x00" + $1 } else s = s.gsub(/(\\[0-7]{2,3})/) { $1 + "\\000" } s = s.gsub(/(\\x[0-9A-Fa-f]{2})/) { $1 + "\\x00" } end s = s.gsub(/\\/, '\\\\') #' if (WINDOWS) s = s.gsub(/\?\?/, '?\\?') # escape ANSI trigraph end conv_to_utf16(endian, s) end def conv_str(endian, s, from, to) if (s[0] == ?') s = s[1..-2] q = s.gsub(/\\/, '\\\\') #' else q = s[1..-2] q.gsub!(/\\n/, "\x0a") q.gsub!(/\\t/, "\x09") q.gsub!(/\\v/, "\x0b") q.gsub!(/\\r/, "\x0d") q.gsub!(/\\f/, "\x0c") q.gsub!(/\\a/, "\x07") q.gsub!(/\\e/, "\x1b") q.gsub!(/\\([0-7]{2,3})/) { $1.to_i(8).chr } q.gsub!(/\\x([0-9A-Fa-f]{2})/) { $1.to_i(16).chr } end q.force_encoding('ASCII-8BIT') from = from.to_i to = to.to_i eucjp_from = eucjp_char_pos(q, from) eucjp_to = eucjp_char_pos(q, to) s = conv_to_utf16(endian, q) from = utf16_byte_pos(endian, s, eucjp_from) to = utf16_byte_pos(endian, s, eucjp_to) return s, from, to end print(<<"EOS") /* * This program was generated by testconv.rb. */ #include #ifdef POSIX_TEST #include "onigmoposix.h" #else #include "onigmo.h" #endif static int nsucc = 0; static int nfail = 0; static int nerror = 0; static FILE* err_file; #ifndef POSIX_TEST static OnigRegion* region; static OnigEncoding ENC; #endif #define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p) static void uconv(char* from, char* to, int len) { int i; unsigned char c; char *q; q = to; for (i = 0; i < len; i += 2) { c = (unsigned char )from[i]; if (c == 0) { c = (unsigned char )from[i+1]; if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { sprintf(q, "\\\\%03o", c); q += 4; } else { sprintf(q, "%c", c); q++; } } else { sprintf(q, "\\\\%03o", c); q += 4; c = (unsigned char )from[i+1]; sprintf(q, "\\\\%03o", c); q += 4; } } *q = 0; } static void xx(char* pattern, char* str, int from, int to, int mem, int not) { int r; char cpat[4000], cstr[4000]; #ifdef POSIX_TEST regex_t reg; char buf[200]; regmatch_t pmatch[20]; uconv(pattern, cpat, ulen(pattern)); uconv(str, cstr, ulen(str)); r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); if (r) { regerror(r, ®, buf, sizeof(buf)); fprintf(err_file, "ERROR: %s\\n", buf); nerror++; return ; } r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); if (r != 0 && r != REG_NOMATCH) { regerror(r, ®, buf, sizeof(buf)); fprintf(err_file, "ERROR: %s\\n", buf); nerror++; return ; } if (r == REG_NOMATCH) { if (not) { fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr); nsucc++; } else { fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr); nfail++; } } else { if (not) { fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr); nfail++; } else { if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr); nsucc++; } else { fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr, (int)from, (int)to, (int)pmatch[mem].rm_so, (int)pmatch[mem].rm_eo); nfail++; } } } regfree(®); #else regex_t* reg; OnigCompileInfo ci; OnigErrorInfo einfo; OnigSyntaxType syn = *ONIG_SYNTAX_DEFAULT; /* ONIG_OPTION_OFF(syn.options, ONIG_OPTION_ASCII_RANGE); */ uconv(pattern, cpat, ulen(pattern)); uconv(str, cstr, ulen(str)); #if 0 r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)), ONIG_OPTION_DEFAULT, ENC, &syn, &einfo); #else ci.num_of_elements = 5; ci.pattern_enc = ENC; ci.target_enc = ENC; ci.syntax = &syn; ci.option = ONIG_OPTION_DEFAULT; ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT; r = onig_new_deluxe(®, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)), &ci, &einfo); #endif if (r) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(err_file, "ERROR: %s\\n", s); nerror++; return ; } r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)), (UChar* )str, (UChar* )(str + ulen(str)), region, ONIG_OPTION_NONE); if (r < ONIG_MISMATCH) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r); fprintf(err_file, "ERROR: %s\\n", s); nerror++; return ; } if (r == ONIG_MISMATCH) { if (not) { fprintf(stdout, "OK(N): /%s/ '%s'\\n", cpat, cstr); nsucc++; } else { fprintf(stdout, "FAIL: /%s/ '%s'\\n", cpat, cstr); nfail++; } } else { if (not) { fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", cpat, cstr); nfail++; } else { if (region->beg[mem] == from && region->end[mem] == to) { fprintf(stdout, "OK: /%s/ '%s'\\n", cpat, cstr); nsucc++; } else { fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", cpat, cstr, (int)from, (int)to, (int)region->beg[mem], (int)region->end[mem]); nfail++; } } } onig_free(reg); #endif } static void x2(char* pattern, char* str, int from, int to) { xx(pattern, str, from, to, 0, 0); } static void x3(char* pattern, char* str, int from, int to, int mem) { xx(pattern, str, from, to, mem, 0); } static void n(char* pattern, char* str) { xx(pattern, str, 0, 0, 0, 1); } extern int main(int argc, char* argv[]) { err_file = stdout; #ifndef POSIX_TEST region = onig_region_new(); #endif EOS PAT = '\\/([^\\\\\\/]*(?:\\\\.[^\\\\\\/]*)*)\\/' CM = /\s*,\s*/ RX2 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)} RI2 = %r{\Ai\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)} RX3 = %r{\Ax\(#{PAT}#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)} RN = %r{\An\(#{PAT}#{CM}('[^']*'|"[^"]*")\)} #' def convert(endian, fp) if (endian == BIG_ENDIAN) se = 'BE' else se = 'LE' end print(<<"EOS") #ifdef POSIX_TEST reg_set_encoding(REG_POSIX_ENCODING_UTF16_#{se}); #else ENC = ONIG_ENCODING_UTF16_#{se}; #endif EOS while line = fp.gets() if (m = RX2.match(line)) reg = conv_reg(endian, m[1]) str, from, to = conv_str(endian, m[2], m[3], m[4]) printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to) elsif (m = RI2.match(line)) reg = conv_reg(endian, m[1]) str, from, to = conv_str(endian, m[2], m[3], m[4]) printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, from, to) elsif (m = RX3.match(line)) reg = conv_reg(endian, m[1]) str, from, to = conv_str(endian, m[2], m[3], m[4]) printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, from, to, m[5]) elsif (m = RN.match(line)) reg = conv_reg(endian, m[1]) str, from, to = conv_str(endian, m[2], 0, 0) printf(" n(\"%s\", \"%s\");\n", reg, str) else end end end File::open(ARGV[0]) { |fp| convert(BIG_ENDIAN, fp) } #File::open(ARGV[0]) { |fp| # convert(LITTLE_ENDIAN, fp) #} print(<<'EOS') fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)\n", nsucc, nfail, nerror, onig_version()); #ifndef POSIX_TEST onig_region_free(region, 1); onig_end(); #endif return ((nfail == 0 && nerror == 0) ? 0 : -1); } EOS # END OF SCRIPT