diff options
Diffstat (limited to 'web/server/h2o/libh2o/deps/mruby-onig-regexp')
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis.yml | 10 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis_config.rb | 21 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/Onigmo-6.1.1.tar.gz | bin | 0 -> 821378 bytes | |||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/README.md | 45 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/mrbgem.rake | 113 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/mrblib/onig_regexp.rb | 140 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/src/mruby_onig_regexp.c | 1064 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/mruby-onig-regexp/test/mruby_onig_regexp.rb | 398 |
8 files changed, 1791 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis.yml b/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis.yml new file mode 100644 index 000000000..5a0d1ddf1 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis.yml @@ -0,0 +1,10 @@ +sudo: false +addons: + apt: + packages: + - gperf +script: + - export MRUBY_CONFIG="$TRAVIS_BUILD_DIR/.travis_config.rb" + - git clone --depth 1 "https://github.com/mruby/mruby.git" + - cd mruby + - ./minirake all test diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis_config.rb b/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis_config.rb new file mode 100644 index 000000000..769446c4c --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/.travis_config.rb @@ -0,0 +1,21 @@ +MRuby::Build.new do |conf| + toolchain :gcc + enable_debug + enable_test + + gem :core => 'mruby-print' + gem :core => 'mruby-sprintf' + gem "#{MRUBY_ROOT}/.." +end + +MRuby::Build.new("onigmo-bundled") do |conf| + toolchain :gcc + enable_debug + enable_test + + gem :core => 'mruby-print' + gem :core => 'mruby-sprintf' + gem "#{MRUBY_ROOT}/.." do |g| + g.bundle_onigmo + end +end diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/Onigmo-6.1.1.tar.gz b/web/server/h2o/libh2o/deps/mruby-onig-regexp/Onigmo-6.1.1.tar.gz Binary files differnew file mode 100644 index 000000000..139226584 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/Onigmo-6.1.1.tar.gz diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/README.md b/web/server/h2o/libh2o/deps/mruby-onig-regexp/README.md new file mode 100644 index 000000000..41eada48f --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/README.md @@ -0,0 +1,45 @@ +# mruby-onig-regexp + +[![Build Status](https://travis-ci.org/mattn/mruby-onig-regexp.svg)](https://travis-ci.org/mattn/mruby-onig-regexp) + +## install by mrbgems +```ruby +MRuby::Build.new do |conf| + + # ... (snip) ... + + conf.gem :github => 'mattn/mruby-onig-regexp' +end +``` + +## Example +```ruby + +def matchstr(str) + reg = Regexp.compile("abc") + + if reg =~ str then + p "match" + else + p "not match" + end +end + +matchstr("abcdef") # => match +matchstr("ghijkl") # => not match +matchstr("xyzabc") # => match +``` + +## License + +MIT + +### License of Onigmo +BSD licensed. + + Onigmo (Oniguruma-mod) -- (C) K.Takata <kentkt AT csc DOT jp> + Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + +## Author + +Yasuhiro Matsumoto (a.k.a mattn) diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrbgem.rake b/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrbgem.rake new file mode 100644 index 000000000..679986703 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrbgem.rake @@ -0,0 +1,113 @@ +MRuby::Gem::Specification.new('mruby-onig-regexp') do |spec| + spec.license = 'MIT' + spec.authors = 'mattn' + + def spec.bundle_onigmo + return if @onigmo_bundled + @onigmo_bundled = true + + visualcpp = ENV['VisualStudioVersion'] || ENV['VSINSTALLDIR'] + + require 'open3' + + # remove libonig, instead link directly against pthread + unless ENV['OS'] == 'Windows_NT' + linker.libraries = ['pthread'] + end + + version = '6.1.1' + oniguruma_dir = "#{build_dir}/onigmo-#{version}" + oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonigmo" + unless ENV['OS'] == 'Windows_NT' + oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonigmo" + else + if ENV['PROCESSOR_ARCHITECTURE'] == 'AMD64' + oniguruma_lib = libfile "#{oniguruma_dir}/build-x86-64/onigmo" + else + oniguruma_lib = libfile "#{oniguruma_dir}/build-i686/onigmo" + end + end + header = "#{oniguruma_dir}/onigmo.h" + + task :clean do + FileUtils.rm_rf [oniguruma_dir] + end + + file header do |t| + FileUtils.mkdir_p oniguruma_dir + Dir.chdir(build_dir) do + _pp 'extracting', "Onigmo-#{version}" + `gzip -dc "#{dir}/Onigmo-#{version}.tar.gz" | tar xf -` + end + end + + def run_command(env, command) + STDOUT.sync = true + Open3.popen2e(env, command) do |stdin, stdout, thread| + print stdout.read + fail "#{command} failed" if thread.value != 0 + end + end + + libonig_objs_dir = "#{oniguruma_dir}/libonig_objs" + libmruby_a = libfile("#{build.build_dir}/lib/libmruby") + objext = visualcpp ? '.obj' : '.o' + + file oniguruma_lib => header do |t| + Dir.chdir(oniguruma_dir) do + e = { + 'CC' => "#{build.cc.command} #{build.cc.flags.join(' ')}", + 'CXX' => "#{build.cxx.command} #{build.cxx.flags.join(' ')}", + 'LD' => "#{build.linker.command} #{build.linker.flags.join(' ')}", + 'AR' => build.archiver.command } + unless ENV['OS'] == 'Windows_NT' + if build.kind_of? MRuby::CrossBuild + host = "--host #{build.name}" + end + + _pp 'autotools', oniguruma_dir + run_command e, './autogen.sh' if File.exists? 'autogen.sh' + run_command e, "./configure --disable-shared --enable-static #{host}" + run_command e, 'make' + else + run_command e, 'cmd /c "copy /Y win32 > NUL"' + if visualcpp + run_command e, 'nmake -f Makefile' + else + run_command e, 'make -f Makefile.mingw' + end + end + end + + FileUtils.mkdir_p libonig_objs_dir + Dir.chdir(libonig_objs_dir) do + unless visualcpp + `ar x #{oniguruma_lib}` + else + winname = oniguruma_lib.gsub(%'/', '\\') + `lib -nologo -list #{winname}`.each_line do |line| + line.chomp! + `lib -nologo -extract:#{line} #{winname}` + end + end + end + file libmruby_a => Dir.glob("#{libonig_objs_dir}/*#{objext}") + end + + file libmruby_a => Dir.glob("#{libonig_objs_dir}/*#{objext}") if File.exists? oniguruma_lib + + file "#{dir}/src/mruby_onig_regexp.c" => oniguruma_lib + cc.include_paths << oniguruma_dir + cc.defines += ['HAVE_ONIGMO_H'] + end + + if spec.respond_to? :search_package and spec.search_package 'onigmo' + spec.cc.defines += ['HAVE_ONIGMO_H'] + elsif spec.respond_to? :search_package and spec.search_package 'oniguruma' + spec.cc.defines += ['HAVE_ONIGURUMA_H'] + elsif build.cc.respond_to? :search_header_path and build.cc.search_header_path 'oniguruma.h' + spec.linker.libraries << 'onig' + else + spec.bundle_onigmo + end +end diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrblib/onig_regexp.rb b/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrblib/onig_regexp.rb new file mode 100644 index 000000000..2dc8d55e0 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/mrblib/onig_regexp.rb @@ -0,0 +1,140 @@ +class OnigRegexp + @memo = {} + + # ISO 15.2.15.6.1 + def self.compile(*args) + as = args.to_s + unless @memo.key? as + @memo[as] = self.new(*args) + end + @memo[as] + end + + # ISO 15.2.15.6.3 + def self.last_match + @last_match + end + + # ISO 15.2.15.7.2 + def initialize_copy(other) + initialize(other.source, other.options) + end + + # ISO 15.2.15.7.4 + def ===(str) + not self.match(str).nil? + end + + # ISO 15.2.15.7.5 + def =~(str) + m = self.match(str) + m ? m.begin(0) : nil + end + + # ISO 15.2.15.7.8 + attr_reader :source +end + +class String + # ISO 15.2.10.5.5 + def =~(a) + begin + (a.class.to_s == 'String' ? Regexp.new(a.to_s) : a) =~ self + rescue + false + end + end + + # redefine methods with oniguruma regexp version + [:sub, :gsub, :split, :scan].each do |v| + alias_method "string_#{v}".to_sym, v + alias_method v, "onig_regexp_#{v}".to_sym + end + + alias_method :old_slice, :slice + alias_method :old_square_brancket, :[] + + def [](*args) + return old_square_brancket(*args) unless args[0].class == Regexp + + if args.size == 2 + match = args[0].match(self) + if match + if args[1] == 0 + str = match[0] + else + str = match.captures[args[1] - 1] + end + return str + end + end + + match_data = args[0].match(self) + if match_data + result = match_data.to_s + return result + end + end + + alias_method :slice, :[] + + def slice!(*args) + if args.size < 2 + result = slice(*args) + nth = args[0] + + if nth.class == Regexp + lm = Regexp.last_match + self[nth] = '' if result + Regexp.last_match = lm + else + self[nth] = '' if result + end + else + result = slice(*args) + + nth = args[0] + len = args[1] + + if nth.class == Regexp + lm = Regexp.last_match + self[nth, len] = '' if result + Regexp.last_match = lm + else + self[nth, len] = '' if result && nth != self.size + end + end + + result + end + + alias_method :old_index, :index + + def index(pattern, pos=0) + if pattern.class == Regexp + str = self[pos..-1] + if str + if num = (pattern =~ str) + if pos < 0 + num += self.size + end + return num + pos + end + end + nil + else + self.old_index(pattern, pos) + end + end +end + +module Kernel + def =~(_) + nil + end +end + +Regexp = OnigRegexp unless Object.const_defined?(:Regexp) +MatchData = OnigMatchData unless Object.const_defined? :MatchData + +# This is based on https://github.com/masamitsu-murase/mruby-hs-regexp diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/src/mruby_onig_regexp.c b/web/server/h2o/libh2o/deps/mruby-onig-regexp/src/mruby_onig_regexp.c new file mode 100644 index 000000000..38d93ccb9 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/src/mruby_onig_regexp.c @@ -0,0 +1,1064 @@ +/* +The MIT License (MIT) + +Copyright (c) 2015 mattn. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <memory.h> +#include <mruby.h> +#include <mruby/class.h> +#include <mruby/variable.h> +#include <mruby/array.h> +#include <mruby/string.h> +#include <mruby/data.h> +#include <mruby/variable.h> +#ifdef _MSC_VER +#define ONIG_EXTERN extern +#endif +#ifdef HAVE_ONIGMO_H +#include <onigmo.h> +#elif defined(HAVE_ONIGURUMA_H) +#include <oniguruma.h> +#else +#include "oniguruma.h" +#endif + +#ifdef MRUBY_VERSION +#define mrb_args_int mrb_int +#else +#define mrb_args_int int +#endif + +static const char utf8len_codepage[256] = +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, +}; + +static mrb_int +utf8len(const char* p, const char* e) +{ + mrb_int len; + mrb_int i; + + len = utf8len_codepage[(unsigned char)*p]; + if (p + len > e) return 1; + for (i = 1; i < len; ++i) + if ((p[i] & 0xc0) != 0x80) + return 1; + return len; +} + +static void +onig_regexp_free(mrb_state *mrb, void *p) { + onig_free((OnigRegex) p); +} + +static struct mrb_data_type mrb_onig_regexp_type = { + "PosixRegexp", onig_regexp_free +}; + +static void +match_data_free(mrb_state* mrb, void* p) { + (void)mrb; + onig_region_free((OnigRegion*)p, 1); +} + +static struct mrb_data_type mrb_onig_region_type = { + "OnigRegion", match_data_free +}; + +static mrb_value +onig_regexp_initialize(mrb_state *mrb, mrb_value self) { + mrb_value str, flag = mrb_nil_value(), code = mrb_nil_value(); + mrb_get_args(mrb, "S|oo", &str, &flag, &code); + + int cflag = 0; + OnigEncoding enc = ONIG_ENCODING_UTF8; + if(mrb_string_p(code)) { + char const* str_code = mrb_string_value_ptr(mrb, code); + if(strchr(str_code, 'n') || strchr(str_code, 'N')) { + enc = ONIG_ENCODING_ASCII; + } + } + if(mrb_nil_p(flag)) { + } else if(mrb_type(flag) == MRB_TT_TRUE) { + cflag |= ONIG_OPTION_IGNORECASE; + } else if(mrb_fixnum_p(flag)) { + int int_flags = mrb_fixnum(flag); + if(int_flags & 0x1) { cflag |= ONIG_OPTION_IGNORECASE; } + if(int_flags & 0x2) { cflag |= ONIG_OPTION_EXTEND; } + if(int_flags & 0x4) { cflag |= ONIG_OPTION_MULTILINE; } + } else if(mrb_string_p(flag)) { + char const* str_flags = mrb_string_value_ptr(mrb, flag); + if(strchr(str_flags, 'i')) { cflag |= ONIG_OPTION_IGNORECASE; } + if(strchr(str_flags, 'x')) { cflag |= ONIG_OPTION_EXTEND; } + if(strchr(str_flags, 'm')) { cflag |= ONIG_OPTION_MULTILINE; } + } else { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown regexp flag: %S", flag); + } + + OnigErrorInfo einfo; + OnigRegex reg; + int result = onig_new(®, (OnigUChar*)RSTRING_PTR(str), (OnigUChar*) RSTRING_PTR(str) + RSTRING_LEN(str), + cflag, enc, ONIG_SYNTAX_RUBY, &einfo); + if (result != ONIG_NORMAL) { + char err[ONIG_MAX_ERROR_MESSAGE_LEN] = ""; + onig_error_code_to_str((OnigUChar*)err, result); + mrb_raisef(mrb, E_ARGUMENT_ERROR, "'%S' is an invalid regular expression because %S.", + str, mrb_str_new_cstr(mrb, err)); + } + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), str); + + DATA_PTR(self) = reg; + DATA_TYPE(self) = &mrb_onig_regexp_type; + + return self; +} + +static mrb_value +create_onig_region(mrb_state* mrb, mrb_value const str, mrb_value rex) { + mrb_assert(mrb_string_p(str)); + mrb_assert(mrb_type(rex) == MRB_TT_DATA && DATA_TYPE(rex) == &mrb_onig_regexp_type); + mrb_value const c = mrb_obj_value(mrb_data_object_alloc( + mrb, mrb_class_get(mrb, "OnigMatchData"), onig_region_new(), &mrb_onig_region_type)); + mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "string"), mrb_str_dup(mrb, str)); + mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "regexp"), rex); + return c; +} + +static int +onig_match_common(mrb_state* mrb, OnigRegex reg, mrb_value match_value, mrb_value str, int pos) { + mrb_assert(mrb_string_p(str)); + mrb_assert(DATA_TYPE(match_value) == &mrb_onig_region_type); + OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value); + OnigUChar const* str_ptr = (OnigUChar const*)RSTRING_PTR(str); + int const result = onig_search(reg, str_ptr, str_ptr + RSTRING_LEN(str), + str_ptr + pos, str_ptr + RSTRING_LEN(str), match, 0); + if (result != ONIG_MISMATCH && result < 0) { + char err[ONIG_MAX_ERROR_MESSAGE_LEN] = ""; + onig_error_code_to_str((OnigUChar*)err, result); + mrb_raise(mrb, E_REGEXP_ERROR, err); + } + + struct RObject* const cls = (struct RObject*)mrb_class_get(mrb, "OnigRegexp"); + mrb_obj_iv_set(mrb, cls, mrb_intern_lit(mrb, "@last_match"), match_value); + + if (result != ONIG_MISMATCH && + mrb_class_get(mrb, "Regexp") == (struct RClass*)cls && + mrb_bool(mrb_obj_iv_get(mrb, (struct RObject*)cls, mrb_intern_lit(mrb, "@set_global_variables")))) + { + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"), match_value); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$&"), + mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(0))); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$`"), mrb_funcall(mrb, match_value, "pre_match", 0)); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$'"), mrb_funcall(mrb, match_value, "post_match", 0)); + mrb_gv_set(mrb, mrb_intern_lit(mrb, "$+"), + mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(match->num_regs - 1))); + + // $1 to $9 + int idx = 1; + int const idx_max = match->num_regs > 10? 10 : match->num_regs; + for(; idx < idx_max; ++idx) { + char const n[] = { '$', '0' + idx }; + mrb_gv_set(mrb, mrb_intern(mrb, n, 2), + mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(idx))); + } + + for(; idx < 10; ++idx) { + char const n[] = { '$', '0' + idx }; + mrb_gv_remove(mrb, mrb_intern(mrb, n, 2)); + } + } + + return result; +} + +static mrb_value +onig_regexp_match(mrb_state *mrb, mrb_value self) { + mrb_value str = mrb_nil_value(); + OnigRegex reg; + mrb_int pos = 0; + + mrb_get_args(mrb, "o|i", &str, &pos); + if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) { + return mrb_nil_value(); + } + + if (mrb_nil_p(str)) { + return mrb_nil_value(); + } + str = mrb_string_type(mrb, str); + + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg); + + mrb_value const ret = create_onig_region(mrb, str, self); + return (onig_match_common(mrb, reg, ret, str, pos) == ONIG_MISMATCH) + ? mrb_nil_value() : ret; +} + +static mrb_value +onig_regexp_equal(mrb_state *mrb, mrb_value self) { + mrb_value other; + OnigRegex self_reg, other_reg; + + mrb_get_args(mrb, "o", &other); + if (mrb_obj_equal(mrb, self, other)){ + return mrb_true_value(); + } + if (mrb_nil_p(other)) { + return mrb_false_value(); + } + if (!mrb_obj_is_kind_of(mrb, other, mrb_class_get(mrb, "OnigRegexp"))) { + return mrb_false_value(); + } + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, self_reg); + Data_Get_Struct(mrb, other, &mrb_onig_regexp_type, other_reg); + + if (!self_reg || !other_reg){ + mrb_raise(mrb, E_RUNTIME_ERROR, "Invalid OnigRegexp"); + } + if (onig_get_options(self_reg) != onig_get_options(other_reg)){ + return mrb_false_value(); + } + return mrb_str_equal(mrb, mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")), mrb_iv_get(mrb, other, mrb_intern_lit(mrb, "@source"))) ? + mrb_true_value() : mrb_false_value(); +} + +static mrb_value +onig_regexp_casefold_p(mrb_state *mrb, mrb_value self) { + OnigRegex reg; + + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg); + return (onig_get_options(reg) & ONIG_OPTION_IGNORECASE) ? mrb_true_value() : mrb_false_value(); +} + +static mrb_value +onig_regexp_options(mrb_state *mrb, mrb_value self) { + OnigRegex reg; + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg); + return mrb_fixnum_value(onig_get_options(reg)); +} + +static char * +option_to_str(char str[4], int options) { + char *p = str; + if (options & ONIG_OPTION_MULTILINE) *p++ = 'm'; + if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i'; + if (options & ONIG_OPTION_EXTEND) *p++ = 'x'; + *p = 0; + return str; +} + +static mrb_value +regexp_expr_str(mrb_state *mrb, mrb_value str, const char *p, int len) { + const char *pend; + char buf[5]; + + pend = (const char *) p + len; + for (;p < pend; p++) { + unsigned char c, cc; + + c = *p; + if (c == '/'|| c == '\\') { + buf[0] = '\\'; buf[1] = c; + mrb_str_cat(mrb, str, buf, 2); + continue; + } + if (ISPRINT(c)) { + buf[0] = c; + mrb_str_cat(mrb, str, buf, 1); + continue; + } + switch (c) { + case '\n': cc = 'n'; break; + case '\r': cc = 'r'; break; + case '\t': cc = 't'; break; + default: cc = 0; break; + } + if (cc) { + buf[0] = '\\'; + buf[1] = (char)cc; + mrb_str_cat(mrb, str, buf, 2); + continue; + } + else { + buf[0] = '\\'; + buf[3] = '0' + c % 8; c /= 8; + buf[2] = '0' + c % 8; c /= 8; + buf[1] = '0' + c % 8; + mrb_str_cat(mrb, str, buf, 4); + continue; + } + } + return str; +} + +static mrb_value +onig_regexp_inspect(mrb_state *mrb, mrb_value self) { + OnigRegex reg; + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg); + mrb_value str = mrb_str_new_lit(mrb, "/"); + mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + regexp_expr_str(mrb, str, (const char *)RSTRING_PTR(src), RSTRING_LEN(src)); + mrb_str_cat_lit(mrb, str, "/"); + char opts[4]; + if (*option_to_str(opts, onig_get_options(reg))) { + mrb_str_cat_cstr(mrb, str, opts); + } + if (onig_get_encoding(reg) == ONIG_ENCODING_ASCII) { + mrb_str_cat_lit(mrb, str, "n"); + } + return str; +} + +static mrb_value +onig_regexp_to_s(mrb_state *mrb, mrb_value self) { + int options; + const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; + long len; + const char* ptr; + mrb_value str = mrb_str_new_lit(mrb, "(?"); + char optbuf[5]; + + OnigRegex reg; + Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg); + options = onig_get_options(reg); + mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")); + ptr = RSTRING_PTR(src); + len = RSTRING_LEN(src); + + again: + if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') { + int err = 1; + ptr += 2; + if ((len -= 2) > 0) { + do { + if(strchr(ptr, 'i')) { options |= ONIG_OPTION_IGNORECASE; } + if(strchr(ptr, 'x')) { options |= ONIG_OPTION_EXTEND; } + if(strchr(ptr, 'm')) { options |= ONIG_OPTION_MULTILINE; } + ++ptr; + } while (--len > 0); + } + if (len > 1 && *ptr == '-') { + ++ptr; + --len; + do { + if(strchr(ptr, 'i')) { options &= ~ONIG_OPTION_IGNORECASE; } + if(strchr(ptr, 'x')) { options &= ~ONIG_OPTION_EXTEND; } + if(strchr(ptr, 'm')) { options &= ~ONIG_OPTION_MULTILINE; } + ++ptr; + } while (--len > 0); + } + if (*ptr == ')') { + --len; + ++ptr; + goto again; + } + if (*ptr == ':' && ptr[len-1] == ')') { + OnigRegex rp; + ++ptr; + len -= 2; + err = onig_new(&rp, (OnigUChar*)ptr, (OnigUChar*)ptr + len, ONIG_OPTION_DEFAULT, + ONIG_ENCODING_UTF8, OnigDefaultSyntax, NULL); + onig_free(rp); + } + if (err) { + options = onig_get_options(reg); + ptr = RSTRING_PTR(src); + len = RSTRING_LEN(src); + } + } + + if (*option_to_str(optbuf, options)) mrb_str_cat_cstr(mrb, str, optbuf); + + if ((options & embeddable) != embeddable) { + optbuf[0] = '-'; + option_to_str(optbuf + 1, ~options); + mrb_str_cat_cstr(mrb, str, optbuf); + } + + mrb_str_cat_cstr(mrb, str, ":"); + regexp_expr_str(mrb, str, ptr, len); + mrb_str_cat_cstr(mrb, str, ")"); + return str; +} + + +static mrb_value +onig_regexp_version(mrb_state* mrb, mrb_value self) { + (void)self; + return mrb_str_new_cstr(mrb, onig_version()); +} + +static mrb_value +match_data_to_a(mrb_state* mrb, mrb_value self); + +static mrb_int +match_data_actual_index(mrb_state* mrb, mrb_value self, mrb_value idx_value) { + if(mrb_fixnum_p(idx_value)) { return mrb_fixnum(idx_value); } + + char const* name = NULL; + char const* name_end = NULL; + if(mrb_symbol_p(idx_value)) { + mrb_int sym_len; + name = mrb_sym2name_len(mrb, mrb_symbol(idx_value), &sym_len); + name_end = name + sym_len; + } else if(mrb_string_p(idx_value)) { + name = mrb_string_value_ptr(mrb, idx_value); + name_end = name + strlen(name); + } else { mrb_assert(FALSE); } + mrb_assert(name && name_end); + + mrb_value const regexp = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp")); + mrb_assert(!mrb_nil_p(regexp)); + mrb_assert(DATA_TYPE(regexp) == &mrb_onig_regexp_type); + mrb_assert(DATA_TYPE(self) == &mrb_onig_region_type); + int const idx = onig_name_to_backref_number( + (OnigRegex)DATA_PTR(regexp), (OnigUChar const*)name, (OnigUChar const*)name_end, + (OnigRegion*)DATA_PTR(self)); + if (idx < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S", idx_value); + } + return idx; +} + +// ISO 15.2.16.3.1 +static mrb_value +match_data_index(mrb_state* mrb, mrb_value self) { + mrb_value src; + mrb_int argc; mrb_value *argv; + + mrb_get_args(mrb, "*", &argv, &argc); + + src = match_data_to_a(mrb, self); + + if (argc == 1) { + switch (mrb_type(argv[0])) { + case MRB_TT_FIXNUM: + case MRB_TT_SYMBOL: + case MRB_TT_STRING: + return mrb_ary_entry(src, match_data_actual_index(mrb, self, argv[0])); + default: break; + } + } + + return mrb_funcall_argv(mrb, src, mrb_intern_lit(mrb, "[]"), argc, argv); +} + +#define match_data_check_index(idx) \ + if(idx < 0 || reg->num_regs <= idx) \ + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of matches", mrb_fixnum_value(idx)) \ + +// ISO 15.2.16.3.2 +static mrb_value +match_data_begin(mrb_state* mrb, mrb_value self) { + mrb_value idx_value; + mrb_get_args(mrb, "o", &idx_value); + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + mrb_int const idx = match_data_actual_index(mrb, self, idx_value); + match_data_check_index(idx); + return mrb_fixnum_value(reg->beg[idx]); +} + +// ISO 15.2.16.3.3 +static mrb_value +match_data_captures(mrb_state* mrb, mrb_value self) { + mrb_value ary = match_data_to_a(mrb, self); + return mrb_ary_new_from_values(mrb, RARRAY_LEN(ary) - 1, RARRAY_PTR(ary) + 1); +} + +// ISO 15.2.16.3.4 +static mrb_value +match_data_end(mrb_state* mrb, mrb_value self) { + mrb_value idx_value; + mrb_get_args(mrb, "o", &idx_value); + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + mrb_int const idx = match_data_actual_index(mrb, self, idx_value); + match_data_check_index(idx); + return mrb_fixnum_value(reg->end[idx]); +} + +// ISO 15.2.16.3.5 +static mrb_value +match_data_copy(mrb_state* mrb, mrb_value self) { + mrb_value src_val; + mrb_get_args(mrb, "o", &src_val); + + OnigRegion* src; + Data_Get_Struct(mrb, src_val, &mrb_onig_region_type, src); + + OnigRegion* dst = onig_region_new(); + onig_region_copy(dst, src); + + DATA_PTR(self) = dst; + DATA_TYPE(self) = &mrb_onig_region_type; + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "string"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "string"))); + mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "regexp"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "regexp"))); + return self; +} + +// ISO 15.2.16.3.6 +// ISO 15.2.16.3.10 +static mrb_value +match_data_length(mrb_state* mrb, mrb_value self) { + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + return mrb_fixnum_value(reg->num_regs); +} + +// ISO 15.2.16.3.7 +static mrb_value +match_data_offset(mrb_state* mrb, mrb_value self) { + mrb_value idx_value; + mrb_get_args(mrb, "o", &idx_value); + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + mrb_int const idx = match_data_actual_index(mrb, self, idx_value); + match_data_check_index(idx); + mrb_value ret = mrb_ary_new_capa(mrb, 2); + mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->beg[idx])); + mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->end[idx])); + return ret; +} + +// ISO 15.2.16.3.8 +static mrb_value +match_data_post_match(mrb_state* mrb, mrb_value self) { + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string")); + return mrb_str_substr(mrb, str, reg->end[0], RSTRING_LEN(str) - reg->end[0]); +} + +// ISO 15.2.16.3.9 +static mrb_value +match_data_pre_match(mrb_state* mrb, mrb_value self) { + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string")); + return mrb_str_substr(mrb, str, 0, reg->beg[0]); +} + +// ISO 15.2.16.3.11 +static mrb_value +match_data_string(mrb_state* mrb, mrb_value self) { + return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string")); +} + +static mrb_value +match_data_regexp(mrb_state* mrb, mrb_value self) { + return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp")); +} + +// ISO 15.2.16.3.12 +static mrb_value +match_data_to_a(mrb_state* mrb, mrb_value self) { + mrb_value cache = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "cache")); + if(!mrb_nil_p(cache)) { + return cache; + } + + mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string")); + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + + mrb_value ret = mrb_ary_new_capa(mrb, reg->num_regs); + int i, ai = mrb_gc_arena_save(mrb); + for(i = 0; i < reg->num_regs; ++i) { + if(reg->beg[i] == ONIG_REGION_NOTPOS) { + mrb_ary_push(mrb, ret, mrb_nil_value()); + } else { + mrb_ary_push(mrb, ret, mrb_str_substr(mrb, str, reg->beg[i], reg->end[i] - reg->beg[i])); + } + mrb_gc_arena_restore(mrb, ai); + } + return ret; +} + +// ISO 15.2.16.3.13 +static mrb_value +match_data_to_s(mrb_state* mrb, mrb_value self) { + mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string")); + OnigRegion* reg; + Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg); + return mrb_str_substr(mrb, str, reg->beg[0], reg->end[0] - reg->beg[0]); +} + +static void +append_replace_str(mrb_state* mrb, mrb_value result, mrb_value replace, + mrb_value src, OnigRegex reg, OnigRegion* match) +{ + mrb_assert(mrb_string_p(replace)); + char const* ch; + char const* const end = RSTRING_PTR(replace) + RSTRING_LEN(replace); + for(ch = RSTRING_PTR(replace); ch < end; ++ch) { + if (*ch != '\\' || (ch + 1) >= end) { + mrb_str_cat(mrb, result, ch, 1); + continue; + } + + switch(*(++ch)) { // skip back slash and get next char + case 'k': { // group name + if ((ch + 2) >= end || ch[1] != '<') { goto replace_expr_error; } + char const* name_beg = ch += 2; + while (*ch != '>') { if(++ch == end) { goto replace_expr_error; } } + mrb_assert(ch < end); + mrb_assert(*ch == '>'); + int const idx = onig_name_to_backref_number( + reg, (OnigUChar const*)name_beg, (OnigUChar const*)ch, match); + if (idx < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S", + mrb_str_substr(mrb, replace, name_beg - RSTRING_PTR(replace), ch - name_beg)); + } + mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]); + } break; + + case '\\': // escaped back slash + mrb_str_cat(mrb, result, ch, 1); + break; + + default: + if (isdigit(*ch)) { // group number 0-9 + int const idx = *ch - '0'; + if (idx < match->num_regs) { + mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]); + } + } else { + char const str[] = { '\\', *ch }; + mrb_str_cat(mrb, result, str, 2); + } + break; + } + } + + if(ch == end) { return; } + +replace_expr_error: + mrb_raisef(mrb, E_REGEXP_ERROR, "invalid replace expression: %S", replace); +} + +// ISO 15.2.10.5.18 +static mrb_value +string_gsub(mrb_state* mrb, mrb_value self) { + mrb_value blk, match_expr, replace_expr = mrb_nil_value(); + int const argc = mrb_get_args(mrb, "&o|S", &blk, &match_expr, &replace_expr); + + if(mrb_string_p(match_expr)) { + mrb_value argv[] = { match_expr, replace_expr }; + return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_gsub"), argc, argv, blk); + } + + if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) { + blk = mrb_nil_value(); + } + + OnigRegex reg; + Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg); + mrb_value const result = mrb_str_new(mrb, NULL, 0); + mrb_value const match_value = create_onig_region(mrb, self, match_expr); + OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value); + int last_end_pos = 0; + + while(1) { + if(onig_match_common(mrb, reg, match_value, self, last_end_pos) == ONIG_MISMATCH) { break; } + + mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, match->beg[0] - last_end_pos); + + if(mrb_nil_p(blk)) { + append_replace_str(mrb, result, replace_expr, self, reg, match); + } else { + mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, mrb_str_substr( + mrb, self, match->beg[0], match->end[0] - match->beg[0]))); + mrb_assert(mrb_string_p(tmp_str)); + mrb_str_concat(mrb, result, tmp_str); + } + + last_end_pos = match->end[0]; + if (match->beg[0] == match->end[0]) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + char* p = RSTRING_PTR(self) + last_end_pos; + char* e = p + RSTRING_LEN(self); + int len = utf8len(p, e); + if (RSTRING_LEN(self) < last_end_pos + len) break; + mrb_str_cat(mrb, result, p, len); + last_end_pos += len; + } + } + + mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos); + return result; +} + +// ISO 15.2.10.5.32 +static mrb_value +string_scan(mrb_state* mrb, mrb_value self) { + mrb_value blk, match_expr; + mrb_get_args(mrb, "&o", &blk, &match_expr); + + if(mrb_string_p(match_expr)) { + return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_scan"), + 1, &match_expr, blk); + } + + OnigRegex reg; + Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg); + mrb_value const result = mrb_nil_p(blk)? mrb_ary_new(mrb) : self; + mrb_value m_value = create_onig_region(mrb, self, match_expr); + OnigRegion* const m = (OnigRegion*)DATA_PTR(m_value); + int last_end_pos = 0; + int i; + + while (1) { + if(onig_match_common(mrb, reg, m_value, self, last_end_pos) == ONIG_MISMATCH) { break; } + + if(mrb_nil_p(blk)) { + mrb_assert(mrb_array_p(result)); + if(m->num_regs == 1) { + mrb_ary_push(mrb, result, mrb_str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0])); + } else { + mrb_value const elem = mrb_ary_new_capa(mrb, m->num_regs - 1); + for(i = 1; i < m->num_regs; ++i) { + mrb_ary_push(mrb, elem, mrb_str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i])); + } + mrb_ary_push(mrb, result, elem); + } + } else { // call block + mrb_assert(mrb_string_p(result)); + if(m->num_regs == 1) { + mrb_yield(mrb, blk, mrb_str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0])); + } else { + mrb_value argv = mrb_ary_new_capa(mrb, m->num_regs - 1); + for(i = 1; i < m->num_regs; ++i) { + mrb_ary_push(mrb, argv, mrb_str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i])); + } + mrb_yield(mrb, blk, argv); + } + } + + last_end_pos = m->end[0]; + } + + return result; +} + +// ISO 15.2.10.5.35 +static mrb_value +string_split(mrb_state* mrb, mrb_value self) { + mrb_value pattern = mrb_nil_value(); mrb_int limit = 0; + int argc = mrb_get_args(mrb, "|oi", &pattern, &limit); + + if(argc == 0) { // check $; global variable + pattern = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$;")); + if(!mrb_nil_p(pattern)) { argc = 1; } + } + + if(mrb_nil_p(pattern) || mrb_string_p(pattern)) { + return mrb_funcall(mrb, self, "string_split", argc, pattern, mrb_fixnum_value(limit)); + } + + mrb_value const result = mrb_ary_new(mrb); + if(RSTRING_LEN(self) == 0) { return result; } + + OnigRegex reg; + Data_Get_Struct(mrb, pattern, &mrb_onig_regexp_type, reg); + mrb_value const match_value = create_onig_region(mrb, self, pattern); + OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value); + int last_end_pos = 0, next_match_pos = 0; + mrb_int num_matches = 0; + + while (limit <= 0 || (limit - 1) > num_matches) { + int i; + if(next_match_pos >= RSTRING_LEN(self) || + onig_match_common(mrb, reg, match_value, self, next_match_pos) == ONIG_MISMATCH) { break; } + + if (last_end_pos == match->end[0]) { + ++next_match_pos; + // Remove this loop if not using UTF-8 + for (; next_match_pos < RSTRING_LEN(self) && (RSTRING_PTR(self)[next_match_pos] & 0xC0) == 0x80; + ++next_match_pos) {} + } else { + mrb_ary_push(mrb, result, mrb_str_substr( + mrb, self, last_end_pos, match->beg[0] - last_end_pos)); + // If there are captures, add them to the array + for (i = 1; i < match->num_regs; ++i) { + mrb_ary_push(mrb, result, mrb_str_substr( + mrb, self, match->beg[i], match->end[i] - match->beg[i])); + } + last_end_pos = match->end[0]; + next_match_pos = last_end_pos; + ++num_matches; + } + } + if (last_end_pos <= RSTRING_LEN(self)) { + mrb_ary_push(mrb, result, mrb_str_substr( + mrb, self, last_end_pos, RSTRING_LEN(self) - last_end_pos)); + } + + if (limit == 0) { // remove empty trailing elements + int count = 0, i; + for (i = RARRAY_LEN(result); i > 0; --i) { + mrb_assert(mrb_string_p(RARRAY_PTR(result)[i - 1])); + if (RSTRING_LEN(RARRAY_PTR(result)[i - 1]) != 0) { break; } + else { ++count; } + } + if(count > 0) { + return mrb_ary_new_from_values(mrb, RARRAY_LEN(result) - count, RARRAY_PTR(result)); + } + } + + return result; +} + +// ISO 15.2.10.5.36 +static mrb_value +string_sub(mrb_state* mrb, mrb_value self) { + mrb_value blk, match_expr, replace_expr = mrb_nil_value(); + int const argc = mrb_get_args(mrb, "&o|S", &blk, &match_expr, &replace_expr); + + if(mrb_string_p(match_expr)) { + mrb_value argv[] = { match_expr, replace_expr }; + return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_sub"), argc, argv, blk); + } + + if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "both block and replace expression must not be passed"); + } + + OnigRegex reg; + Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg); + mrb_value const result = mrb_str_new(mrb, NULL, 0); + mrb_value const match_value = create_onig_region(mrb, self, match_expr); + OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value); + + int const onig_result = onig_match_common(mrb, reg, match_value, self, 0); + if(onig_result == ONIG_MISMATCH) { return self; } + + mrb_str_cat(mrb, result, RSTRING_PTR(self), match->beg[0]); + + if(mrb_nil_p(blk)) { + append_replace_str(mrb, result, replace_expr, self, reg, match); + } else { + mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, mrb_str_substr( + mrb, self, match->beg[0], match->end[0] - match->beg[0]))); + mrb_assert(mrb_string_p(tmp_str)); + mrb_str_concat(mrb, result, tmp_str); + } + + int const last_end_pos = match->end[0]; + mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos); + + return result; +} + +static mrb_value +onig_regexp_clear_global_variables(mrb_state* mrb, mrb_value self) { + mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$~")); + mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$&")); + mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$`")); + mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$'")); + mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$+")); + + int idx; + for(idx = 1; idx < 10; ++idx) { + char const n[] = { '$', '0' + idx }; + mrb_gv_remove(mrb, mrb_intern(mrb, n, 2)); + } + + return self; +} + +static mrb_value +onig_regexp_does_set_global_variables(mrb_state* mrb, mrb_value self) { + (void)self; + return mrb_obj_iv_get(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"), + mrb_intern_lit(mrb, "@set_global_variables")); +} +static mrb_value +onig_regexp_set_set_global_variables(mrb_state* mrb, mrb_value self) { + mrb_value arg; + mrb_get_args(mrb, "o", &arg); + mrb_value const ret = mrb_bool_value(mrb_bool(arg)); + mrb_obj_iv_set(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"), + mrb_intern_lit(mrb, "@set_global_variables"), ret); + onig_regexp_clear_global_variables(mrb, self); + return ret; +} + +// ISO 15.2.15.6.2 +static mrb_value +onig_regexp_escape(mrb_state* mrb, mrb_value self) { + char* str_begin; mrb_args_int str_len; + mrb_get_args(mrb, "s", &str_begin, &str_len); + + mrb_value const ret = mrb_str_new(mrb, NULL, 0); + char escaped_char = 0; + int substr_count = 0; + char const* str = str_begin; + + for(; str < (str_begin + str_len); ++str) { + switch(*str) { + case '\n': escaped_char = 'n'; break; + case '\t': escaped_char = 't'; break; + case '\r': escaped_char = 'r'; break; + case '\f': escaped_char = 'f'; break; + + case ' ': + case '#': + case '$': + case '(': + case ')': + case '*': + case '+': + case '-': + case '.': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + escaped_char = *str; break; + + default: ++substr_count; continue; + } + + mrb_str_cat(mrb, ret, str - substr_count, substr_count); + substr_count = 0; + + char const c[] = { '\\', escaped_char }; + mrb_str_cat(mrb, ret, c, 2); + } + mrb_str_cat(mrb, ret, str - substr_count, substr_count); + return ret; +} + +void +mrb_mruby_onig_regexp_gem_init(mrb_state* mrb) { + struct RClass *clazz; + + clazz = mrb_define_class(mrb, "OnigRegexp", mrb->object_class); + MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA); + + // enable global variables setting in onig_match_common by default + mrb_obj_iv_set(mrb, (struct RObject*)clazz, mrb_intern_lit(mrb, "@set_global_variables"), mrb_true_value()); + + mrb_define_const(mrb, clazz, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE)); + mrb_define_const(mrb, clazz, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND)); + mrb_define_const(mrb, clazz, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE)); + mrb_define_const(mrb, clazz, "SINGLELINE", mrb_fixnum_value(ONIG_OPTION_SINGLELINE)); + mrb_define_const(mrb, clazz, "FIND_LONGEST", mrb_fixnum_value(ONIG_OPTION_FIND_LONGEST)); + mrb_define_const(mrb, clazz, "FIND_NOT_EMPTY", mrb_fixnum_value(ONIG_OPTION_FIND_NOT_EMPTY)); + mrb_define_const(mrb, clazz, "NEGATE_SINGLELINE", mrb_fixnum_value(ONIG_OPTION_NEGATE_SINGLELINE)); + mrb_define_const(mrb, clazz, "DONT_CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_DONT_CAPTURE_GROUP)); + mrb_define_const(mrb, clazz, "CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_CAPTURE_GROUP)); + mrb_define_const(mrb, clazz, "NOTBOL", mrb_fixnum_value(ONIG_OPTION_NOTBOL)); + mrb_define_const(mrb, clazz, "NOTEOL", mrb_fixnum_value(ONIG_OPTION_NOTEOL)); +#ifdef ONIG_OPTION_POSIX_REGION + mrb_define_const(mrb, clazz, "POSIX_REGION", mrb_fixnum_value(ONIG_OPTION_POSIX_REGION)); +#endif +#ifdef ONIG_OPTION_ASCII_RANGE + mrb_define_const(mrb, clazz, "ASCII_RANGE", mrb_fixnum_value(ONIG_OPTION_ASCII_RANGE)); +#endif +#ifdef ONIG_OPTION_POSIX_BRACKET_ALL_RANGE + mrb_define_const(mrb, clazz, "POSIX_BRACKET_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)); +#endif +#ifdef ONIG_OPTION_WORD_BOUND_ALL_RANGE + mrb_define_const(mrb, clazz, "WORD_BOUND_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_WORD_BOUND_ALL_RANGE)); +#endif +#ifdef ONIG_OPTION_NEWLINE_CRLF + mrb_define_const(mrb, clazz, "NEWLINE_CRLF", mrb_fixnum_value(ONIG_OPTION_NEWLINE_CRLF)); +#endif +#ifdef ONIG_OPTION_NOTBOS + mrb_define_const(mrb, clazz, "NOTBOS", mrb_fixnum_value(ONIG_OPTION_NOTBOS)); +#endif +#ifdef ONIG_OPTION_NOTEOS + mrb_define_const(mrb, clazz, "NOTEOS", mrb_fixnum_value(ONIG_OPTION_NOTEOS)); +#endif + + mrb_define_method(mrb, clazz, "initialize", onig_regexp_initialize, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(2)); + mrb_define_method(mrb, clazz, "==", onig_regexp_equal, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, clazz, "match", onig_regexp_match, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1)); + mrb_define_method(mrb, clazz, "casefold?", onig_regexp_casefold_p, MRB_ARGS_NONE()); + + mrb_define_method(mrb, clazz, "options", onig_regexp_options, MRB_ARGS_NONE()); + mrb_define_method(mrb, clazz, "inspect", onig_regexp_inspect, MRB_ARGS_NONE()); + mrb_define_method(mrb, clazz, "to_s", onig_regexp_to_s, MRB_ARGS_NONE()); + + mrb_define_module_function(mrb, clazz, "escape", onig_regexp_escape, MRB_ARGS_REQ(1)); + mrb_define_module_function(mrb, clazz, "quote", onig_regexp_escape, MRB_ARGS_REQ(1)); + mrb_define_module_function(mrb, clazz, "version", onig_regexp_version, MRB_ARGS_NONE()); + mrb_define_module_function(mrb, clazz, "set_global_variables?", onig_regexp_does_set_global_variables, MRB_ARGS_NONE()); + mrb_define_module_function(mrb, clazz, "set_global_variables=", onig_regexp_set_set_global_variables, MRB_ARGS_REQ(1)); + mrb_define_module_function(mrb, clazz, "clear_global_variables", onig_regexp_clear_global_variables, MRB_ARGS_NONE()); + + struct RClass* match_data = mrb_define_class(mrb, "OnigMatchData", mrb->object_class); + MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA); + mrb_undef_class_method(mrb, match_data, "new"); + + // mrb_define_method(mrb, match_data, "==", &match_data_eq); + mrb_define_method(mrb, match_data, "[]", &match_data_index, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, match_data, "begin", &match_data_begin, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, match_data, "captures", &match_data_captures, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "end", &match_data_end, MRB_ARGS_REQ(1)); + // mrb_define_method(mrb, match_data, "eql?", &match_data_eq); + // mrb_define_method(mrb, match_data, "hash", &match_data_hash); + mrb_define_method(mrb, match_data, "initialize_copy", &match_data_copy, MRB_ARGS_REQ(1)); + // mrb_define_method(mrb, match_data, "inspect", &match_data_inspect); + mrb_define_method(mrb, match_data, "length", &match_data_length, MRB_ARGS_NONE()); + // mrb_define_method(mrb, match_data, "names", &match_data_names); + mrb_define_method(mrb, match_data, "offset", &match_data_offset, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, match_data, "post_match", &match_data_post_match, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "pre_match", &match_data_pre_match, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "regexp", &match_data_regexp, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "size", &match_data_length, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "string", &match_data_string, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "to_a", &match_data_to_a, MRB_ARGS_NONE()); + mrb_define_method(mrb, match_data, "to_s", &match_data_to_s, MRB_ARGS_NONE()); + // mrb_define_method(mrb, match_data, "values_at", &match_data_values_at); + + mrb_define_method(mrb, mrb->string_class, "onig_regexp_gsub", &string_gsub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK()); + mrb_define_method(mrb, mrb->string_class, "onig_regexp_sub", &string_sub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK()); + mrb_define_method(mrb, mrb->string_class, "onig_regexp_split", &string_split, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, mrb->string_class, "onig_regexp_scan", &string_scan, MRB_ARGS_REQ(1) | MRB_ARGS_BLOCK()); +} + +void +mrb_mruby_onig_regexp_gem_final(mrb_state* mrb) { + (void)mrb; +} + +// vim:set et: diff --git a/web/server/h2o/libh2o/deps/mruby-onig-regexp/test/mruby_onig_regexp.rb b/web/server/h2o/libh2o/deps/mruby-onig-regexp/test/mruby_onig_regexp.rb new file mode 100644 index 000000000..e70b06fd8 --- /dev/null +++ b/web/server/h2o/libh2o/deps/mruby-onig-regexp/test/mruby_onig_regexp.rb @@ -0,0 +1,398 @@ + +# Constant +assert("OnigRegexp::CONSTANT") do + OnigRegexp::IGNORECASE == 1 and OnigRegexp::EXTENDED == 2 and OnigRegexp::MULTILINE == 4 +end + + +# Class method +assert('OnigRgexp.compile', '15.2.15.6.2') do + assert_equal OnigRegexp.compile('.*'), OnigRegexp.compile('.*') +end + +assert('OnigRegexp.escape', '15.2.15.6.2') do + escaping_chars = "\n\t\r\f #$()*+-.?[\\]^{|}" + assert_equal '\n\t\r\f\\ \#\$\(\)\*\+\-\.\?\[\\\\\]\^\{\|\}', OnigRegexp.escape(escaping_chars) + assert_equal 'cute\nmruby\tcute', OnigRegexp.escape("cute\nmruby\tcute") +end + +assert('OnigRegexp.last_match', '15.2.15.6.3') do + OnigRegexp.new('.*') =~ 'ginka' + assert_equal 'ginka', OnigRegexp.last_match[0] +end + +assert('OnigRegexp.quote', '15.2.15.6.4') do + assert_equal '\n', OnigRegexp.quote("\n") +end + +# Instance method +assert('OnigRegexp#initialize', '15.2.15.7.1') do + OnigRegexp.new(".*") and OnigRegexp.new(".*", OnigRegexp::MULTILINE) +end + +assert('OnigRegexp#initialize_copy', '15.2.15.7.2') do + r1 = OnigRegexp.new(".*") + r2 = r1.dup + assert_equal r1, r2 + assert_equal 'kawa', r2.match('kawa')[0] +end + +assert("OnigRegexp#==", '15.2.15.7.3') do + reg1 = reg2 = OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+") + reg3 = OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+") + reg4 = OnigRegexp.new("(https://[^/]+)[-a-zA-Z0-9./]+") + + assert_true(reg1 == reg2 && reg1 == reg3 && !(reg1 == reg4)) + + assert_false(OnigRegexp.new("a") == "a") +end + +assert("OnigRegexp#===", '15.2.15.7.4') do + reg = OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+") + assert_true reg === "http://example.com" + assert_false reg === "htt://example.com" +end + +assert('OnigRegexp#=~', '15.2.15.7.5') do + assert_equal(0) { OnigRegexp.new('.*') =~ 'akari' } + assert_equal(nil) { OnigRegexp.new('t') =~ 'akari' } +end + +assert("OnigRegexp#casefold?", '15.2.15.7.6') do + assert_false OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+", OnigRegexp::MULTILINE).casefold? + assert_true OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+", OnigRegexp::IGNORECASE | OnigRegexp::EXTENDED).casefold? + assert_true OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+", OnigRegexp::MULTILINE | OnigRegexp::IGNORECASE).casefold? + assert_false OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+").casefold? + assert_true OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+", true).casefold? +end + +assert("OnigRegexp#match", '15.2.15.7.7') do + reg = OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+") + assert_false reg.match("http://masamitsu-murase.12345/hoge.html").nil? + assert_nil reg.match("http:///masamitsu-murase.12345/hoge.html") +end + +assert("OnigRegexp#source", '15.2.15.7.8') do + str = "(https?://[^/]+)[-a-zA-Z0-9./]+" + reg = OnigRegexp.new(str) + + reg.source == str +end + +if OnigRegexp.const_defined? :ASCII_RANGE + assert('OnigRegexp#options (no options)') do + assert_equal OnigRegexp::ASCII_RANGE | OnigRegexp::POSIX_BRACKET_ALL_RANGE | OnigRegexp::WORD_BOUND_ALL_RANGE, OnigRegexp.new(".*").options + end + + assert('OnigRegexp#options (multiline)') do + assert_equal OnigRegexp::MULTILINE | OnigRegexp::ASCII_RANGE | OnigRegexp::POSIX_BRACKET_ALL_RANGE | OnigRegexp::WORD_BOUND_ALL_RANGE, OnigRegexp.new(".*", OnigRegexp::MULTILINE).options + end +end + +assert("OnigRegexp#inspect") do + reg = OnigRegexp.new("(https?://[^/]+)[-a-zA-Z0-9./]+") + + assert_equal '/(https?:\/\/[^\/]+)[-a-zA-Z0-9.\/]+/', reg.inspect + assert_equal '/abc\nd\te/mi', OnigRegexp.new("abc\nd\te", OnigRegexp::MULTILINE | OnigRegexp::IGNORECASE).inspect + assert_equal '/abc/min', OnigRegexp.new("abc", OnigRegexp::MULTILINE | OnigRegexp::IGNORECASE, "none").inspect +end + +assert("OnigRegexp#to_s") do + assert_equal '(?-mix:ab+c)', OnigRegexp.new("ab+c").to_s + assert_equal '(?-mix:ab+c)', /ab+c/.to_s + assert_equal '(?mx-i:ab+c)', OnigRegexp.new("ab+c", OnigRegexp::MULTILINE | OnigRegexp::EXTENDED).to_s + assert_equal '(?mi-x:ab+c)', /ab+c/im.to_s + assert_equal '(?mi-x:ab+c)', /ab+c/imn.to_s +end + +assert("OnigRegexp#to_s (composition)") do + re1 = OnigRegexp.new("ab+c") + re2 = OnigRegexp.new("xy#{re1}z") + assert_equal '(?-mix:xy(?-mix:ab+c)z)', re2.to_s + + re3 = OnigRegexp.new("ab.+c", OnigRegexp::MULTILINE) + re4 = OnigRegexp.new("xy#{re3}z", OnigRegexp::IGNORECASE) + assert_equal '(?i-mx:xy(?m-ix:ab.+c)z)', re4.to_s +end + +# Extended patterns. +assert("OnigRegexp#match (no flags)") do + [ + [ ".*", "abcd\nefg", "abcd" ], + [ "^a.", "abcd\naefg", "ab" ], + [ "^a.", "bacd\naefg", "ae" ], + [ ".$", "bacd\naefg", "d" ] + ].each do |reg, str, result| + m = OnigRegexp.new(reg).match(str) + assert_equal result, m[0] if assert_false m.nil? + end +end + +assert("OnigRegexp#match (multiline)") do + patterns = [ + [ OnigRegexp.new(".*", OnigRegexp::MULTILINE), "abcd\nefg", "abcd\nefg" ] + ] + + patterns.all?{ |reg, str, result| reg.match(str)[0] == result } +end + +assert("OnigRegexp#match (ignorecase)") do + [ + [ "aBcD", "00AbcDef", "AbcD" ], + [ "0x[a-f]+", "00XaBCdefG", "0XaBCdef" ], + [ "0x[^c-f]+", "00XaBCdefG", "0XaB" ] + ].each do |reg, str, result| + m = OnigRegexp.new(reg, OnigRegexp::IGNORECASE|OnigRegexp::EXTENDED).match(str) + assert_equal result, m[0] if assert_false m.nil? + end +end + +assert("OnigRegexp#match (none encoding)") do + assert_equal 2, /\x82/n =~ "あ" +end + +assert('OnigRegexp.version') do + OnigRegexp.version.kind_of? String +end + +def onig_match_data_example + OnigRegexp.new('(\w+)(\w)').match('+aaabb-') +end + +assert('OnigMatchData.new') do + assert_raise(NoMethodError) { OnigMatchData.new('aaa', 'i') } +end + +assert('OnigMatchData#[]', '15.2.16.3.1') do + m = onig_match_data_example + assert_equal 'aaabb', m[0] + assert_equal 'aaab', m[1] + assert_equal 'b', m[2] + assert_nil m[3] + + m = OnigRegexp.new('(?<name>\w\w)').match('aba') + assert_raise(TypeError) { m[[]] } + assert_raise(IndexError) { m['nam'] } + assert_equal 'ab', m[:name] + assert_equal 'ab', m['name'] + assert_equal 'ab', m[1] + + m = OnigRegexp.new('(\w) (\w) (\w) (\w)').match('a b c d') + assert_equal %w(a b c d), m[1..-1] +end + +assert('OnigMatchData#begin', '15.2.16.3.2') do + m = onig_match_data_example + assert_equal 1, m.begin(0) + assert_equal 1, m.begin(1) + assert_raise(IndexError) { m.begin 3 } +end + +assert('OnigMatchData#captures', '15.2.16.3.3') do + m = onig_match_data_example + assert_equal ['aaab', 'b'], m.captures + + m = OnigRegexp.new('(\w+)(\d)?').match('+aaabb-') + assert_equal ['aaabb', nil], m.captures +end + +assert('OnigMatchData#end', '15.2.16.3.4') do + m = onig_match_data_example + assert_equal 6, m.end(0) + assert_equal 5, m.end(1) + assert_raise(IndexError) { m.end 3 } +end + +assert('OnigMatchData#initialize_copy', '15.2.16.3.5') do + m = onig_match_data_example + c = m.dup + assert_equal m.to_a, c.to_a +end + +assert('OnigMatchData#length', '15.2.16.3.6') do + assert_equal 3, onig_match_data_example.length +end + +assert('OnigMatchData#offset', '15.2.16.3.7') do + assert_equal [1, 6], onig_match_data_example.offset(0) + assert_equal [1, 5], onig_match_data_example.offset(1) +end + +assert('OnigMatchData#post_match', '15.2.16.3.8') do + assert_equal '-', onig_match_data_example.post_match +end + +assert('OnigMatchData#pre_match', '15.2.16.3.9') do + assert_equal '+', onig_match_data_example.pre_match +end + +assert('OnigMatchData#size', '15.2.16.3.10') do + assert_equal 3, onig_match_data_example.length +end + +assert('OnigMatchData#string', '15.2.16.3.11') do + assert_equal '+aaabb-', onig_match_data_example.string +end + +assert('OnigMatchData#to_a', '15.2.16.3.12') do + assert_equal ['aaabb', 'aaab', 'b'], onig_match_data_example.to_a +end + +assert('OnigMatchData#to_s', '15.2.16.3.13') do + assert_equal 'aaabb', onig_match_data_example.to_s +end + +assert('OnigMatchData#regexp') do + assert_equal '(\w+)(\w)', onig_match_data_example.regexp.source +end + +assert('Invalid regexp') do + assert_raise(ArgumentError) { OnigRegexp.new '[aio' } +end + +assert('String#onig_regexp_gsub') do + test_str = 'hello mruby' + assert_equal 'h*ll* mr*by', test_str.onig_regexp_gsub(OnigRegexp.new('[aeiou]'), '*') + assert_equal 'h<e>ll<o> mr<u>by', test_str.onig_regexp_gsub(OnigRegexp.new('([aeiou])'), '<\1>') + assert_equal 'h e l l o m r u b y ', test_str.onig_regexp_gsub(OnigRegexp.new('\w')) { |v| v + ' ' } + assert_equal 'h{e}ll{o} mr{u}by', test_str.onig_regexp_gsub(OnigRegexp.new('(?<hoge>[aeiou])'), '{\k<hoge>}') + assert_equal '.h.e.l.l.o. .m.r.u.b.y.', test_str.onig_regexp_gsub(OnigRegexp.new(''), '.') + assert_equal " hello\n mruby", "hello\nmruby".onig_regexp_gsub(OnigRegexp.new('^'), ' ') + assert_equal "he<l><><l><>o mruby", test_str.onig_regexp_gsub(OnigRegexp.new('(l)'), '<\1><\2>') +end + +assert('String#onig_regexp_scan') do + test_str = 'mruby world' + assert_equal ['mruby', 'world'], test_str.onig_regexp_scan(OnigRegexp.new('\w+')) + assert_equal ['mru', 'by ', 'wor'], test_str.onig_regexp_scan(OnigRegexp.new('...')) + assert_equal [['mru'], ['by '], ['wor']], test_str.onig_regexp_scan(OnigRegexp.new('(...)')) + assert_equal [['mr', 'ub'], ['y ', 'wo']], test_str.onig_regexp_scan(OnigRegexp.new('(..)(..)')) + + result = [] + assert_equal test_str, test_str.onig_regexp_scan(OnigRegexp.new('\w+')) { |v| result << "<<#{v}>>" } + assert_equal ['<<mruby>>', '<<world>>'], result + + result = '' + assert_equal test_str, test_str.onig_regexp_scan(OnigRegexp.new('(.)(.)')) { |x, y| result += y; result += x } + assert_equal 'rmbu yowlr', result +end + +assert('String#onig_regexp_sub') do + test_str = 'hello mruby' + assert_equal 'h*llo mruby', test_str.onig_regexp_sub(OnigRegexp.new('[aeiou]'), '*') + assert_equal 'h<e>llo mruby', test_str.onig_regexp_sub(OnigRegexp.new('([aeiou])'), '<\1>') + assert_equal 'h ello mruby', test_str.onig_regexp_sub(OnigRegexp.new('\w')) { |v| v + ' ' } + assert_equal 'h{e}llo mruby', test_str.onig_regexp_sub(OnigRegexp.new('(?<hoge>[aeiou])'), '{\k<hoge>}') +end + +assert('String#onig_regexp_split') do + test_str = 'cute mruby cute' + assert_equal ['cute', 'mruby', 'cute'], test_str.onig_regexp_split + assert_equal ['cute', 'mruby', 'cute'], test_str.onig_regexp_split(OnigRegexp.new(' ')) + + prev_splitter = $; + $; = OnigRegexp.new ' \w' + assert_equal ['cute', 'ruby', 'ute'], test_str.onig_regexp_split + $; = 't' + assert_equal ['cu', 'e mruby cu', 'e'], test_str.onig_regexp_split + $; = prev_splitter + + assert_equal ['h', 'e', 'l', 'l', 'o'], 'hello'.onig_regexp_split(OnigRegexp.new('')) + assert_equal ['h', 'e', 'llo'], 'hello'.onig_regexp_split(OnigRegexp.new(''), 3) + assert_equal ['h', 'i', 'd', 'a', 'd'], 'hi dad'.onig_regexp_split(OnigRegexp.new('\s*')) + + test_str = '1, 2, 3, 4, 5,, 6' + assert_equal ['1', '2', '3', '4', '5', '', '6'], test_str.onig_regexp_split(OnigRegexp.new(',\s*')) + + test_str = '1,,2,3,,4,,' + assert_equal ['1', '', '2', '3', '', '4'], test_str.onig_regexp_split(OnigRegexp.new(',')) + assert_equal ['1', '', '2', '3,,4,,'], test_str.onig_regexp_split(OnigRegexp.new(','), 4) + assert_equal ['1', '', '2', '3', '', '4', '', ''], test_str.onig_regexp_split(OnigRegexp.new(','), -4) + + assert_equal [], ''.onig_regexp_split(OnigRegexp.new(','), -1) +end + +assert('String#index') do + assert_equal 0, 'abc'.index('a') + assert_nil 'abc'.index('d') + assert_equal 3, 'abcabc'.index('a', 1) + assert_equal 1, "hello".index(?e) + + assert_equal 0, 'abcabc'.index(/a/) + assert_nil 'abc'.index(/d/) + assert_equal 3, 'abcabc'.index(/a/, 1) + assert_equal 4, "hello".index(/[aeiou]/, -3) + assert_equal 3, "regexpindex".index(/e.*x/, 2) +end + +prev_regexp = Regexp + +Regexp = OnigRegexp + +# global variables +assert('$~') do + m = onig_match_data_example + assert_equal m[0], $~[0] +end + +assert('$&') do + m = onig_match_data_example + assert_equal m[0], $& +end + +assert('$`') do + m = onig_match_data_example + assert_equal m.pre_match, $` +end + +assert('$\'') do + m = onig_match_data_example + assert_equal m.post_match, $' +end + +assert('$+') do + m = onig_match_data_example + assert_equal m[-1], $+ +end + +assert('$1 to $9') do + onig_match_data_example + assert_equal 'aaab', $1 + assert_equal 'b', $2 + assert_nil $3 + assert_nil $4 + assert_nil $5 + assert_nil $6 + assert_nil $7 + assert_nil $8 + assert_nil $9 +end + +assert('default OnigRegexp.set_global_variables?') do + assert_true OnigRegexp.set_global_variables? +end + +assert('change set_global_variables') do + m = onig_match_data_example + assert_equal m[0], $~[0] + + OnigRegexp.set_global_variables = false + assert_false OnigRegexp.set_global_variables? + + # global variables must be cleared when OnigRegexp.set_global_variables gets change + assert_nil $~ + + onig_match_data_example + assert_nil $~ + + OnigRegexp.set_global_variables = true +end + +Regexp = Object + +assert('OnigRegexp not default') do + onig_match_data_example + assert_nil $~ +end + +Regexp = prev_regexp |