From 77e50caaf2ef81cd91075cf836fed0e75718ffb4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 23:12:02 +0200 Subject: Adding debian version 1.8.3-2. Signed-off-by: Daniel Baumann --- debian/vendor-h2o/deps/mruby/src/string.c | 3013 +++++++++++++++++++++++++++++ 1 file changed, 3013 insertions(+) create mode 100644 debian/vendor-h2o/deps/mruby/src/string.c (limited to 'debian/vendor-h2o/deps/mruby/src/string.c') diff --git a/debian/vendor-h2o/deps/mruby/src/string.c b/debian/vendor-h2o/deps/mruby/src/string.c new file mode 100644 index 0000000..01d706f --- /dev/null +++ b/debian/vendor-h2o/deps/mruby/src/string.c @@ -0,0 +1,3013 @@ +/* +** string.c - String class +** +** See Copyright Notice in mruby.h +*/ + +#ifdef _MSC_VER +# define _CRT_NONSTDC_NO_DEPRECATE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct mrb_shared_string { + mrb_bool nofree : 1; + int refcnt; + char *ptr; + mrb_int len; +} mrb_shared_string; + +const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + +#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) + +static struct RString* +str_new_static(mrb_state *mrb, const char *p, size_t len) +{ + struct RString *s; + + if (len >= MRB_INT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); + } + s = mrb_obj_alloc_string(mrb); + s->as.heap.len = (mrb_int)len; + s->as.heap.aux.capa = 0; /* nofree */ + s->as.heap.ptr = (char *)p; + s->flags = MRB_STR_NOFREE; + + return s; +} + +static struct RString* +str_new(mrb_state *mrb, const char *p, size_t len) +{ + struct RString *s; + + if (p && mrb_ro_data_p(p)) { + return str_new_static(mrb, p, len); + } + s = mrb_obj_alloc_string(mrb); + if (len < RSTRING_EMBED_LEN_MAX) { + RSTR_SET_EMBED_FLAG(s); + RSTR_SET_EMBED_LEN(s, len); + if (p) { + memcpy(s->as.ary, p, len); + } + } + else { + if (len >= MRB_INT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); + } + s->as.heap.len = (mrb_int)len; + s->as.heap.aux.capa = (mrb_int)len; + s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1); + if (p) { + memcpy(s->as.heap.ptr, p, len); + } + } + RSTR_PTR(s)[len] = '\0'; + return s; +} + +static inline void +str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) +{ + s->c = mrb_str_ptr(obj)->c; +} + +static mrb_value +mrb_str_new_empty(mrb_state *mrb, mrb_value str) +{ + struct RString *s = str_new(mrb, 0, 0); + + str_with_class(mrb, s, str); + return mrb_obj_value(s); +} + +MRB_API mrb_value +mrb_str_new_capa(mrb_state *mrb, size_t capa) +{ + struct RString *s; + + s = mrb_obj_alloc_string(mrb); + + if (capa >= MRB_INT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big"); + } + s->as.heap.len = 0; + s->as.heap.aux.capa = (mrb_int)capa; + s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1); + RSTR_PTR(s)[0] = '\0'; + + return mrb_obj_value(s); +} + +#ifndef MRB_STR_BUF_MIN_SIZE +# define MRB_STR_BUF_MIN_SIZE 128 +#endif + +MRB_API mrb_value +mrb_str_buf_new(mrb_state *mrb, size_t capa) +{ + if (capa < MRB_STR_BUF_MIN_SIZE) { + capa = MRB_STR_BUF_MIN_SIZE; + } + return mrb_str_new_capa(mrb, capa); +} + +static void +resize_capa(mrb_state *mrb, struct RString *s, size_t capacity) +{ +#if SIZE_MAX > MRB_INT_MAX + mrb_assert(capacity < MRB_INT_MAX); +#endif + if (RSTR_EMBED_P(s)) { + if (RSTRING_EMBED_LEN_MAX < capacity) { + char *const tmp = (char *)mrb_malloc(mrb, capacity+1); + const mrb_int len = RSTR_EMBED_LEN(s); + memcpy(tmp, s->as.ary, len); + RSTR_UNSET_EMBED_FLAG(s); + s->as.heap.ptr = tmp; + s->as.heap.len = len; + s->as.heap.aux.capa = (mrb_int)capacity; + } + } + else { + s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1); + s->as.heap.aux.capa = (mrb_int)capacity; + } +} + +MRB_API mrb_value +mrb_str_new(mrb_state *mrb, const char *p, size_t len) +{ + return mrb_obj_value(str_new(mrb, p, len)); +} + +/* + * call-seq: (Caution! NULL string) + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ + +MRB_API mrb_value +mrb_str_new_cstr(mrb_state *mrb, const char *p) +{ + struct RString *s; + size_t len; + + if (p) { + len = strlen(p); + } + else { + len = 0; + } + + s = str_new(mrb, p, len); + + return mrb_obj_value(s); +} + +MRB_API mrb_value +mrb_str_new_static(mrb_state *mrb, const char *p, size_t len) +{ + struct RString *s = str_new_static(mrb, p, len); + return mrb_obj_value(s); +} + +static void +str_decref(mrb_state *mrb, mrb_shared_string *shared) +{ + shared->refcnt--; + if (shared->refcnt == 0) { + if (!shared->nofree) { + mrb_free(mrb, shared->ptr); + } + mrb_free(mrb, shared); + } +} + +void +mrb_gc_free_str(mrb_state *mrb, struct RString *str) +{ + if (RSTR_EMBED_P(str)) + /* no code */; + else if (RSTR_SHARED_P(str)) + str_decref(mrb, str->as.heap.aux.shared); + else if (!RSTR_NOFREE_P(str)) + mrb_free(mrb, str->as.heap.ptr); +} + +#ifdef MRB_UTF8_STRING +static const char utf8len_codepage[256] = +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, +}; + +static mrb_int +utf8len(const char* p, const char* e) +{ + mrb_int len; + mrb_int i; + + len = utf8len_codepage[(unsigned char)*p]; + if (p + len > e) return 1; + for (i = 1; i < len; ++i) + if ((p[i] & 0xc0) != 0x80) + return 1; + return len; +} + +static mrb_int +utf8_strlen(mrb_value str, mrb_int len) +{ + mrb_int total = 0; + char* p = RSTRING_PTR(str); + char* e = p; + if (RSTRING(str)->flags & MRB_STR_NO_UTF) { + return RSTRING_LEN(str); + } + e += len < 0 ? RSTRING_LEN(str) : len; + while (pflags |= MRB_STR_NO_UTF; + } + return total; +} + +#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1) + +/* map character index to byte offset index */ +static mrb_int +chars2bytes(mrb_value s, mrb_int off, mrb_int idx) +{ + mrb_int i, b, n; + const char *p = RSTRING_PTR(s) + off; + const char *e = RSTRING_END(s); + + for (b=i=0; p n) return -1; + else if (m == n) { + return memcmp(x0, y0, m) == 0 ? 0 : -1; + } + else if (m < 1) { + return 0; + } + else if (m == 1) { + const unsigned char *ys = (const unsigned char *)memchr(y, *x, n); + + if (ys) + return (mrb_int)(ys - y); + else + return -1; + } + return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); +} + +static void +str_make_shared(mrb_state *mrb, struct RString *s) +{ + if (!RSTR_SHARED_P(s)) { + mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); + + shared->refcnt = 1; + if (RSTR_EMBED_P(s)) { + const mrb_int len = RSTR_EMBED_LEN(s); + char *const tmp = (char *)mrb_malloc(mrb, len+1); + memcpy(tmp, s->as.ary, len); + tmp[len] = '\0'; + RSTR_UNSET_EMBED_FLAG(s); + s->as.heap.ptr = tmp; + s->as.heap.len = len; + shared->nofree = FALSE; + shared->ptr = s->as.heap.ptr; + } + else if (RSTR_NOFREE_P(s)) { + shared->nofree = TRUE; + shared->ptr = s->as.heap.ptr; + RSTR_UNSET_NOFREE_FLAG(s); + } + else { + shared->nofree = FALSE; + if (s->as.heap.aux.capa > s->as.heap.len) { + s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1); + } + else { + shared->ptr = s->as.heap.ptr; + } + } + shared->len = s->as.heap.len; + s->as.heap.aux.shared = shared; + RSTR_SET_SHARED_FLAG(s); + } +} + +static mrb_value +byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + struct RString *orig, *s; + mrb_shared_string *shared; + + orig = mrb_str_ptr(str); + if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) { + s = str_new(mrb, orig->as.ary+beg, len); + } + else { + str_make_shared(mrb, orig); + shared = orig->as.heap.aux.shared; + s = mrb_obj_alloc_string(mrb); + s->as.heap.ptr = orig->as.heap.ptr + beg; + s->as.heap.len = len; + s->as.heap.aux.shared = shared; + RSTR_SET_SHARED_FLAG(s); + shared->refcnt++; + } + + return mrb_obj_value(s); +} +#ifdef MRB_UTF8_STRING +static inline mrb_value +str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + beg = chars2bytes(str, 0, beg); + len = chars2bytes(str, beg, len); + + return byte_subseq(mrb, str, beg, len); +} +#else +#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#endif + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + mrb_int clen = RSTRING_CHAR_LEN(str); + + if (len < 0) return mrb_nil_value(); + if (clen == 0) { + len = 0; + } + else if (beg < 0) { + beg = clen + beg; + } + if (beg > clen) return mrb_nil_value(); + if (beg < 0) { + beg += clen; + if (beg < 0) return mrb_nil_value(); + } + if (len > clen - beg) + len = clen - beg; + if (len <= 0) { + len = 0; + } + return str_subseq(mrb, str, beg, len); +} + +MRB_API mrb_int +mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset) +{ + mrb_int pos; + char *s; + mrb_int len; + + len = RSTRING_LEN(str); + if (offset < 0) { + offset += len; + if (offset < 0) return -1; + } + if (len - offset < slen) return -1; + s = RSTRING_PTR(str); + if (offset) { + s += offset; + } + if (slen == 0) return offset; + /* need proceed one character at a time */ + len = RSTRING_LEN(str) - offset; + pos = mrb_memsearch(sptr, slen, s, len); + if (pos < 0) return pos; + return pos + offset; +} + +static mrb_int +str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) +{ + const char *ptr; + mrb_int len; + + ptr = RSTRING_PTR(str2); + len = RSTRING_LEN(str2); + + return mrb_str_index(mrb, str, ptr, len, offset); +} + +static void +check_frozen(mrb_state *mrb, struct RString *s) +{ + if (MRB_FROZEN_P(s)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string"); + } +} + +static mrb_value +str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) +{ + long len; + + check_frozen(mrb, s1); + if (s1 == s2) return mrb_obj_value(s1); + s1->flags &= ~MRB_STR_NO_UTF; + s1->flags |= s2->flags&MRB_STR_NO_UTF; + len = RSTR_LEN(s2); + if (RSTR_SHARED_P(s1)) { + str_decref(mrb, s1->as.heap.aux.shared); + } + else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) { + mrb_free(mrb, s1->as.heap.ptr); + } + + RSTR_UNSET_NOFREE_FLAG(s1); + + if (RSTR_SHARED_P(s2)) { +L_SHARE: + RSTR_UNSET_EMBED_FLAG(s1); + s1->as.heap.ptr = s2->as.heap.ptr; + s1->as.heap.len = len; + s1->as.heap.aux.shared = s2->as.heap.aux.shared; + RSTR_SET_SHARED_FLAG(s1); + s1->as.heap.aux.shared->refcnt++; + } + else { + if (len <= RSTRING_EMBED_LEN_MAX) { + RSTR_UNSET_SHARED_FLAG(s1); + RSTR_SET_EMBED_FLAG(s1); + memcpy(s1->as.ary, RSTR_PTR(s2), len); + RSTR_SET_EMBED_LEN(s1, len); + } + else { + str_make_shared(mrb, s2); + goto L_SHARE; + } + } + + return mrb_obj_value(s1); +} + +static mrb_int +str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) +{ + char *s, *sbeg, *t; + struct RString *ps = mrb_str_ptr(str); + mrb_int len = RSTRING_LEN(sub); + + /* substring longer than string */ + if (RSTR_LEN(ps) < len) return -1; + if (RSTR_LEN(ps) - pos < len) { + pos = RSTR_LEN(ps) - len; + } + sbeg = RSTR_PTR(ps); + s = RSTR_PTR(ps) + pos; + t = RSTRING_PTR(sub); + if (len) { + while (sbeg <= s) { + if (memcmp(s, t, len) == 0) { + return (mrb_int)(s - RSTR_PTR(ps)); + } + s--; + } + return -1; + } + else { + return pos; + } +} + +MRB_API mrb_int +mrb_str_strlen(mrb_state *mrb, struct RString *s) +{ + mrb_int i, max = RSTR_LEN(s); + char *p = RSTR_PTR(s); + + if (!p) return 0; + for (i=0; i + +char* +mrb_utf8_from_locale(const char *str, int len) +{ + wchar_t* wcsp; + char* mbsp; + int mbssize, wcssize; + + if (len == 0) + return strdup(""); + if (len == -1) + len = (int)strlen(str); + wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0); + wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); + if (!wcsp) + return NULL; + wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1); + wcsp[wcssize] = 0; + + mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL); + mbsp = (char*) malloc((mbssize + 1)); + if (!mbsp) { + free(wcsp); + return NULL; + } + mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL); + mbsp[mbssize] = 0; + free(wcsp); + return mbsp; +} + +char* +mrb_locale_from_utf8(const char *utf8, int len) +{ + wchar_t* wcsp; + char* mbsp; + int mbssize, wcssize; + + if (len == 0) + return strdup(""); + if (len == -1) + len = (int)strlen(utf8); + wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0); + wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); + if (!wcsp) + return NULL; + wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1); + wcsp[wcssize] = 0; + mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL); + mbsp = (char*) malloc((mbssize + 1)); + if (!mbsp) { + free(wcsp); + return NULL; + } + mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL); + mbsp[mbssize] = 0; + free(wcsp); + return mbsp; +} +#endif + +MRB_API void +mrb_str_modify(mrb_state *mrb, struct RString *s) +{ + check_frozen(mrb, s); + s->flags &= ~MRB_STR_NO_UTF; + if (RSTR_SHARED_P(s)) { + mrb_shared_string *shared = s->as.heap.aux.shared; + + if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { + s->as.heap.ptr = shared->ptr; + s->as.heap.aux.capa = shared->len; + RSTR_PTR(s)[s->as.heap.len] = '\0'; + mrb_free(mrb, shared); + } + else { + char *ptr, *p; + mrb_int len; + + p = RSTR_PTR(s); + len = s->as.heap.len; + if (len < RSTRING_EMBED_LEN_MAX) { + RSTR_SET_EMBED_FLAG(s); + RSTR_SET_EMBED_LEN(s, len); + ptr = RSTR_PTR(s); + } + else { + ptr = (char *)mrb_malloc(mrb, (size_t)len + 1); + s->as.heap.ptr = ptr; + s->as.heap.aux.capa = len; + } + if (p) { + memcpy(ptr, p, len); + } + ptr[len] = '\0'; + str_decref(mrb, shared); + } + RSTR_UNSET_SHARED_FLAG(s); + return; + } + if (RSTR_NOFREE_P(s)) { + char *p = s->as.heap.ptr; + mrb_int len = s->as.heap.len; + + RSTR_UNSET_NOFREE_FLAG(s); + if (len < RSTRING_EMBED_LEN_MAX) { + RSTR_SET_EMBED_FLAG(s); + RSTR_SET_EMBED_LEN(s, len); + } + else { + s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1); + s->as.heap.aux.capa = len; + } + if (p) { + memcpy(RSTR_PTR(s), p, len); + } + RSTR_PTR(s)[len] = '\0'; + return; + } +} + +MRB_API mrb_value +mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) +{ + mrb_int slen; + struct RString *s = mrb_str_ptr(str); + + mrb_str_modify(mrb, s); + slen = RSTR_LEN(s); + if (len != slen) { + if (slen < len || slen - len > 256) { + resize_capa(mrb, s, len); + } + RSTR_SET_LEN(s, len); + RSTR_PTR(s)[len] = '\0'; /* sentinel */ + } + return str; +} + +MRB_API char* +mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) +{ + struct RString *s; + + if (!mrb_string_p(str0)) { + mrb_raise(mrb, E_TYPE_ERROR, "expected String"); + } + + s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); + if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); + } + return RSTR_PTR(s); +} + +/* + * call-seq: (Caution! String("abcd") change) + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +MRB_API void +mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) +{ + if (!mrb_string_p(other)) { + other = mrb_str_to_str(mrb, other); + } + mrb_str_cat_str(mrb, self, other); +} + +/* + * call-seq: (Caution! String("abcd") remain) + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +MRB_API mrb_value +mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) +{ + struct RString *s = mrb_str_ptr(a); + struct RString *s2 = mrb_str_ptr(b); + struct RString *t; + + t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2)); + memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s)); + memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2)); + + return mrb_obj_value(t); +} + +/* 15.2.10.5.2 */ + +/* + * call-seq: (Caution! String("abcd") remain) for stack_argument + * String("abcdefg") = String("abcd") + String("efg") + * + * Returns a new string object containing a copy of str. + */ +static mrb_value +mrb_str_plus_m(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + mrb_get_args(mrb, "S", &str); + return mrb_str_plus(mrb, self, str); +} + +/* 15.2.10.5.26 */ +/* 15.2.10.5.33 */ +/* + * call-seq: + * "abcd".size => int + * + * Returns the length of string. + */ +static mrb_value +mrb_str_size(mrb_state *mrb, mrb_value self) +{ + mrb_int len = RSTRING_CHAR_LEN(self); + return mrb_fixnum_value(len); +} + +static mrb_value +mrb_str_bytesize(mrb_state *mrb, mrb_value self) +{ + mrb_int len = RSTRING_LEN(self); + return mrb_fixnum_value(len); +} + +/* 15.2.10.5.1 */ +/* + * call-seq: + * str * integer => new_str + * + * Copy---Returns a new String containing integer copies of + * the receiver. + * + * "Ho! " * 3 #=> "Ho! Ho! Ho! " + */ +static mrb_value +mrb_str_times(mrb_state *mrb, mrb_value self) +{ + mrb_int n,len,times; + struct RString *str2; + char *p; + + mrb_get_args(mrb, "i", ×); + if (times < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); + } + if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); + } + + len = RSTRING_LEN(self)*times; + str2 = str_new(mrb, 0, len); + str_with_class(mrb, str2, self); + p = RSTR_PTR(str2); + if (len > 0) { + n = RSTRING_LEN(self); + memcpy(p, RSTRING_PTR(self), n); + while (n <= len/2) { + memcpy(p + n, p, n); + n *= 2; + } + memcpy(p + n, p, len-n); + } + p[RSTR_LEN(str2)] = '\0'; + + return mrb_obj_value(str2); +} +/* -------------------------------------------------------------- */ + +#define lesser(a,b) (((a)>(b))?(b):(a)) + +/* ---------------------------*/ +/* + * call-seq: + * mrb_value str1 <=> mrb_value str2 => int + * > 1 + * = 0 + * < -1 + */ +MRB_API int +mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + mrb_int len; + mrb_int retval; + struct RString *s1 = mrb_str_ptr(str1); + struct RString *s2 = mrb_str_ptr(str2); + + len = lesser(RSTR_LEN(s1), RSTR_LEN(s2)); + retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len); + if (retval == 0) { + if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0; + if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1; + return -1; + } + if (retval > 0) return 1; + return -1; +} + +/* 15.2.10.5.3 */ + +/* + * call-seq: + * str <=> other_str => -1, 0, +1 + * + * Comparison---Returns -1 if other_str is less than, 0 if + * other_str is equal to, and +1 if other_str is greater than + * str. If the strings are of different lengths, and the strings are + * equal when compared up to the shortest length, then the longer string is + * considered greater than the shorter one. If the variable $= is + * false, the comparison is based on comparing the binary values + * of each character in the string. In older versions of Ruby, setting + * $= allowed case-insensitive comparisons; this is now deprecated + * in favor of using String#casecmp. + * + * <=> is the basis for the methods <, + * <=, >, >=, and between?, + * included from module Comparable. The method + * String#== does not use Comparable#==. + * + * "abcdef" <=> "abcde" #=> 1 + * "abcdef" <=> "abcdef" #=> 0 + * "abcdef" <=> "abcdefg" #=> -1 + * "abcdef" <=> "ABCDEF" #=> 1 + */ +static mrb_value +mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) +{ + mrb_value str2; + mrb_int result; + + mrb_get_args(mrb, "o", &str2); + if (!mrb_string_p(str2)) { + if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) { + return mrb_nil_value(); + } + else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) { + return mrb_nil_value(); + } + else { + mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); + + if (!mrb_nil_p(tmp)) return mrb_nil_value(); + if (!mrb_fixnum_p(tmp)) { + return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); + } + result = -mrb_fixnum(tmp); + } + } + else { + result = mrb_str_cmp(mrb, str1, str2); + } + return mrb_fixnum_value(result); +} + +static mrb_bool +str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) +{ + const mrb_int len = RSTRING_LEN(str1); + + if (len != RSTRING_LEN(str2)) return FALSE; + if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0) + return TRUE; + return FALSE; +} + +MRB_API mrb_bool +mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + if (mrb_immediate_p(str2)) return FALSE; + if (!mrb_string_p(str2)) { + if (mrb_nil_p(str2)) return FALSE; + if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { + return FALSE; + } + str2 = mrb_funcall(mrb, str2, "to_str", 0); + return mrb_equal(mrb, str2, str1); + } + return str_eql(mrb, str1, str2); +} + +/* 15.2.10.5.4 */ +/* + * call-seq: + * str == obj => true or false + * + * Equality--- + * If obj is not a String, returns false. + * Otherwise, returns false or true + * + * caution:if str <=> obj returns zero. + */ +static mrb_value +mrb_str_equal_m(mrb_state *mrb, mrb_value str1) +{ + mrb_value str2; + + mrb_get_args(mrb, "o", &str2); + + return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); +} +/* ---------------------------------- */ +MRB_API mrb_value +mrb_str_to_str(mrb_state *mrb, mrb_value str) +{ + mrb_value s; + + if (!mrb_string_p(str)) { + s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); + if (mrb_nil_p(s)) { + s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); + } + return s; + } + return str; +} + +MRB_API const char* +mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) +{ + mrb_value str = mrb_str_to_str(mrb, ptr); + return RSTRING_PTR(str); +} + +MRB_API mrb_int +mrb_string_value_len(mrb_state *mrb, mrb_value ptr) +{ + mrb_value str = mrb_str_to_str(mrb, ptr); + return RSTRING_LEN(str); +} + +void +mrb_noregexp(mrb_state *mrb, mrb_value self) +{ + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented"); +} + +void +mrb_regexp_check(mrb_state *mrb, mrb_value obj) +{ + if (mrb_regexp_p(mrb, obj)) { + mrb_noregexp(mrb, obj); + } +} + +MRB_API mrb_value +mrb_str_dup(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + struct RString *dup = str_new(mrb, 0, 0); + + str_with_class(mrb, dup, str); + return str_replace(mrb, dup, s); +} + +static mrb_value +mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) +{ + mrb_int idx; + + mrb_regexp_check(mrb, indx); + switch (mrb_type(indx)) { + case MRB_TT_FIXNUM: + idx = mrb_fixnum(indx); + +num_index: + str = str_substr(mrb, str, idx, 1); + if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); + return str; + + case MRB_TT_STRING: + if (str_index_str(mrb, str, indx, 0) != -1) + return mrb_str_dup(mrb, indx); + return mrb_nil_value(); + + case MRB_TT_RANGE: + goto range_arg; + + default: + indx = mrb_Integer(mrb, indx); + if (mrb_nil_p(indx)) { + range_arg: + { + mrb_int beg, len; + + len = RSTRING_CHAR_LEN(str); + switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) { + case 1: + return str_subseq(mrb, str, beg, len); + case 2: + return mrb_nil_value(); + default: + break; + } + } + mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); + } + idx = mrb_fixnum(indx); + goto num_index; + } + return mrb_nil_value(); /* not reached */ +} + +/* 15.2.10.5.6 */ +/* 15.2.10.5.34 */ +/* + * call-seq: + * str[fixnum] => fixnum or nil + * str[fixnum, fixnum] => new_str or nil + * str[range] => new_str or nil + * str[regexp] => new_str or nil + * str[regexp, fixnum] => new_str or nil + * str[other_str] => new_str or nil + * str.slice(fixnum) => fixnum or nil + * str.slice(fixnum, fixnum) => new_str or nil + * str.slice(range) => new_str or nil + * str.slice(other_str) => new_str or nil + * + * Element Reference---If passed a single Fixnum, returns the code + * of the character at that position. If passed two Fixnum + * objects, returns a substring starting at the offset given by the first, and + * a length given by the second. If given a range, a substring containing + * characters at offsets given by the range is returned. In all three cases, if + * an offset is negative, it is counted from the end of str. Returns + * nil if the initial offset falls outside the string, the length + * is negative, or the beginning of the range is greater than the end. + * + * If a String is given, that string is returned if it occurs in + * str. In both cases, nil is returned if there is no + * match. + * + * a = "hello there" + * a[1] #=> 101(1.8.7) "e"(1.9.2) + * a[1.1] #=> "e"(1.9.2) + * a[1,3] #=> "ell" + * a[1..3] #=> "ell" + * a[-3,2] #=> "er" + * a[-4..-2] #=> "her" + * a[12..-1] #=> nil + * a[-2..-4] #=> "" + * a["lo"] #=> "lo" + * a["bye"] #=> nil + */ +static mrb_value +mrb_str_aref_m(mrb_state *mrb, mrb_value str) +{ + mrb_value a1, a2; + int argc; + + argc = mrb_get_args(mrb, "o|o", &a1, &a2); + if (argc == 2) { + mrb_int n1, n2; + + mrb_regexp_check(mrb, a1); + mrb_get_args(mrb, "ii", &n1, &n2); + return str_substr(mrb, str, n1, n2); + } + if (argc != 1) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc)); + } + return mrb_str_aref(mrb, str, a1); +} + +/* 15.2.10.5.8 */ +/* + * call-seq: + * str.capitalize! => str or nil + * + * Modifies str by converting the first character to uppercase and the + * remainder to lowercase. Returns nil if no changes are made. + * + * a = "hello" + * a.capitalize! #=> "Hello" + * a #=> "Hello" + * a.capitalize! #=> nil + */ +static mrb_value +mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) +{ + char *p, *pend; + mrb_bool modify = FALSE; + struct RString *s = mrb_str_ptr(str); + + mrb_str_modify(mrb, s); + if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value(); + p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s); + if (ISLOWER(*p)) { + *p = TOUPPER(*p); + modify = TRUE; + } + while (++p < pend) { + if (ISUPPER(*p)) { + *p = TOLOWER(*p); + modify = TRUE; + } + } + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.7 */ +/* + * call-seq: + * str.capitalize => new_str + * + * Returns a copy of str with the first character converted to uppercase + * and the remainder to lowercase. + * + * "hello".capitalize #=> "Hello" + * "HELLO".capitalize #=> "Hello" + * "123ABC".capitalize #=> "123abc" + */ +static mrb_value +mrb_str_capitalize(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_capitalize_bang(mrb, str); + return str; +} + +/* 15.2.10.5.10 */ +/* + * call-seq: + * str.chomp!(separator="\n") => str or nil + * + * Modifies str in place as described for String#chomp, + * returning str, or nil if no modifications were made. + */ +static mrb_value +mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value rs; + mrb_int newline; + char *p, *pp; + mrb_int rslen; + mrb_int len; + mrb_int argc; + struct RString *s = mrb_str_ptr(str); + + mrb_str_modify(mrb, s); + argc = mrb_get_args(mrb, "|S", &rs); + len = RSTR_LEN(s); + if (argc == 0) { + if (len == 0) return mrb_nil_value(); + smart_chomp: + if (RSTR_PTR(s)[len-1] == '\n') { + RSTR_SET_LEN(s, RSTR_LEN(s) - 1); + if (RSTR_LEN(s) > 0 && + RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') { + RSTR_SET_LEN(s, RSTR_LEN(s) - 1); + } + } + else if (RSTR_PTR(s)[len-1] == '\r') { + RSTR_SET_LEN(s, RSTR_LEN(s) - 1); + } + else { + return mrb_nil_value(); + } + RSTR_PTR(s)[RSTR_LEN(s)] = '\0'; + return str; + } + + if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value(); + p = RSTR_PTR(s); + rslen = RSTRING_LEN(rs); + if (rslen == 0) { + while (len>0 && p[len-1] == '\n') { + len--; + if (len>0 && p[len-1] == '\r') + len--; + } + if (len < RSTR_LEN(s)) { + RSTR_SET_LEN(s, len); + p[len] = '\0'; + return str; + } + return mrb_nil_value(); + } + if (rslen > len) return mrb_nil_value(); + newline = RSTRING_PTR(rs)[rslen-1]; + if (rslen == 1 && newline == '\n') + newline = RSTRING_PTR(rs)[rslen-1]; + if (rslen == 1 && newline == '\n') + goto smart_chomp; + + pp = p + len - rslen; + if (p[len-1] == newline && + (rslen <= 1 || + memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { + RSTR_SET_LEN(s, len - rslen); + p[RSTR_LEN(s)] = '\0'; + return str; + } + return mrb_nil_value(); +} + +/* 15.2.10.5.9 */ +/* + * call-seq: + * str.chomp(separator="\n") => new_str + * + * Returns a new String with the given record separator removed + * from the end of str (if present). If $/ has not been + * changed from the default Ruby record separator, then chomp also + * removes carriage return characters (that is it will remove \n, + * \r, and \r\n). + * + * "hello".chomp #=> "hello" + * "hello\n".chomp #=> "hello" + * "hello\r\n".chomp #=> "hello" + * "hello\n\r".chomp #=> "hello\n" + * "hello\r".chomp #=> "hello" + * "hello \n there".chomp #=> "hello \n there" + * "hello".chomp("llo") #=> "he" + */ +static mrb_value +mrb_str_chomp(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_chomp_bang(mrb, str); + return str; +} + +/* 15.2.10.5.12 */ +/* + * call-seq: + * str.chop! => str or nil + * + * Processes str as for String#chop, returning str, + * or nil if str is the empty string. See also + * String#chomp!. + */ +static mrb_value +mrb_str_chop_bang(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + + mrb_str_modify(mrb, s); + if (RSTR_LEN(s) > 0) { + mrb_int len; +#ifdef MRB_UTF8_STRING + const char* t = RSTR_PTR(s), *p = t; + const char* e = p + RSTR_LEN(s); + while (p=e) break; + p += clen; + } + len = p - t; +#else + len = RSTR_LEN(s) - 1; +#endif + if (RSTR_PTR(s)[len] == '\n') { + if (len > 0 && + RSTR_PTR(s)[len-1] == '\r') { + len--; + } + } + RSTR_SET_LEN(s, len); + RSTR_PTR(s)[len] = '\0'; + return str; + } + return mrb_nil_value(); +} + +/* 15.2.10.5.11 */ +/* + * call-seq: + * str.chop => new_str + * + * Returns a new String with the last character removed. If the + * string ends with \r\n, both characters are removed. Applying + * chop to an empty string returns an empty + * string. String#chomp is often a safer alternative, as it leaves + * the string unchanged if it doesn't end in a record separator. + * + * "string\r\n".chop #=> "string" + * "string\n\r".chop #=> "string\n" + * "string\n".chop #=> "string" + * "string".chop #=> "strin" + * "x".chop #=> "" + */ +static mrb_value +mrb_str_chop(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + str = mrb_str_dup(mrb, self); + mrb_str_chop_bang(mrb, str); + return str; +} + +/* 15.2.10.5.14 */ +/* + * call-seq: + * str.downcase! => str or nil + * + * Downcases the contents of str, returning nil if no + * changes were made. + */ +static mrb_value +mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) +{ + char *p, *pend; + mrb_bool modify = FALSE; + struct RString *s = mrb_str_ptr(str); + + mrb_str_modify(mrb, s); + p = RSTR_PTR(s); + pend = RSTR_PTR(s) + RSTR_LEN(s); + while (p < pend) { + if (ISUPPER(*p)) { + *p = TOLOWER(*p); + modify = TRUE; + } + p++; + } + + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.13 */ +/* + * call-seq: + * str.downcase => new_str + * + * Returns a copy of str with all uppercase letters replaced with their + * lowercase counterparts. The operation is locale insensitive---only + * characters 'A' to 'Z' are affected. + * + * "hEllO".downcase #=> "hello" + */ +static mrb_value +mrb_str_downcase(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_downcase_bang(mrb, str); + return str; +} + +/* 15.2.10.5.16 */ +/* + * call-seq: + * str.empty? => true or false + * + * Returns true if str has a length of zero. + * + * "hello".empty? #=> false + * "".empty? #=> true + */ +static mrb_value +mrb_str_empty_p(mrb_state *mrb, mrb_value self) +{ + struct RString *s = mrb_str_ptr(self); + + return mrb_bool_value(RSTR_LEN(s) == 0); +} + +/* 15.2.10.5.17 */ +/* + * call-seq: + * str.eql?(other) => true or false + * + * Two strings are equal if the have the same length and content. + */ +static mrb_value +mrb_str_eql(mrb_state *mrb, mrb_value self) +{ + mrb_value str2; + mrb_bool eql_p; + + mrb_get_args(mrb, "o", &str2); + eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2); + + return mrb_bool_value(eql_p); +} + +MRB_API mrb_value +mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return str_substr(mrb, str, beg, len); +} + +mrb_int +mrb_str_hash(mrb_state *mrb, mrb_value str) +{ + /* 1-8-7 */ + struct RString *s = mrb_str_ptr(str); + mrb_int len = RSTR_LEN(s); + char *p = RSTR_PTR(s); + uint64_t key = 0; + + while (len--) { + key = key*65599 + *p; + p++; + } + return (mrb_int)(key + (key>>5)); +} + +/* 15.2.10.5.20 */ +/* + * call-seq: + * str.hash => fixnum + * + * Return a hash based on the string's length and content. + */ +static mrb_value +mrb_str_hash_m(mrb_state *mrb, mrb_value self) +{ + mrb_int key = mrb_str_hash(mrb, self); + return mrb_fixnum_value(key); +} + +/* 15.2.10.5.21 */ +/* + * call-seq: + * str.include? other_str => true or false + * str.include? fixnum => true or false + * + * Returns true if str contains the given string or + * character. + * + * "hello".include? "lo" #=> true + * "hello".include? "ol" #=> false + * "hello".include? ?h #=> true + */ +static mrb_value +mrb_str_include(mrb_state *mrb, mrb_value self) +{ + mrb_value str2; + + mrb_get_args(mrb, "S", &str2); + if (str_index_str(mrb, self, str2, 0) < 0) + return mrb_bool_value(FALSE); + return mrb_bool_value(TRUE); +} + +/* 15.2.10.5.22 */ +/* + * call-seq: + * str.index(substring [, offset]) => fixnum or nil + * str.index(fixnum [, offset]) => fixnum or nil + * str.index(regexp [, offset]) => fixnum or nil + * + * Returns the index of the first occurrence of the given + * substring, + * character (fixnum), or pattern (regexp) in str. + * Returns + * nil if not found. + * If the second parameter is present, it + * specifies the position in the string to begin the search. + * + * "hello".index('e') #=> 1 + * "hello".index('lo') #=> 3 + * "hello".index('a') #=> nil + * "hello".index(101) #=> 1(101=0x65='e') + * "hello".index(/[aeiou]/, -3) #=> 4 + */ +static mrb_value +mrb_str_index_m(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + mrb_int argc; + mrb_value sub; + mrb_int pos, clen; + + mrb_get_args(mrb, "*!", &argv, &argc); + if (argc == 2) { + mrb_get_args(mrb, "oi", &sub, &pos); + } + else { + pos = 0; + if (argc > 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + } + mrb_regexp_check(mrb, sub); + clen = RSTRING_CHAR_LEN(str); + if (pos < 0) { + pos += clen; + if (pos < 0) { + return mrb_nil_value(); + } + } + if (pos > clen) return mrb_nil_value(); + pos = chars2bytes(str, 0, pos); + + switch (mrb_type(sub)) { + default: { + mrb_value tmp; + + tmp = mrb_check_string_type(mrb, sub); + if (mrb_nil_p(tmp)) { + mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); + } + sub = tmp; + } + /* fall through */ + case MRB_TT_STRING: + pos = str_index_str(mrb, str, sub, pos); + break; + } + + if (pos == -1) return mrb_nil_value(); + pos = bytes2chars(RSTRING_PTR(str), pos); + BYTES_ALIGN_CHECK(pos); + return mrb_fixnum_value(pos); +} + +#define STR_REPLACE_SHARED_MIN 10 + +/* 15.2.10.5.24 */ +/* 15.2.10.5.28 */ +/* + * call-seq: + * str.replace(other_str) => str + * + * s = "hello" #=> "hello" + * s.replace "world" #=> "world" + */ +static mrb_value +mrb_str_replace(mrb_state *mrb, mrb_value str) +{ + mrb_value str2; + + mrb_get_args(mrb, "S", &str2); + return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2)); +} + +/* 15.2.10.5.23 */ +/* + * call-seq: + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ +static mrb_value +mrb_str_init(mrb_state *mrb, mrb_value self) +{ + mrb_value str2; + + if (mrb_get_args(mrb, "|S", &str2) == 0) { + struct RString *s = str_new(mrb, 0, 0); + str2 = mrb_obj_value(s); + } + str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2)); + return self; +} + +/* 15.2.10.5.25 */ +/* 15.2.10.5.41 */ +/* + * call-seq: + * str.intern => symbol + * str.to_sym => symbol + * + * Returns the Symbol corresponding to str, creating the + * symbol if it did not previously exist. See Symbol#id2name. + * + * "Koala".intern #=> :Koala + * s = 'cat'.to_sym #=> :cat + * s == :cat #=> true + * s = '@cat'.to_sym #=> :@cat + * s == :@cat #=> true + * + * This can also be used to create symbols that cannot be represented using the + * :xxx notation. + * + * 'cat and dog'.to_sym #=> :"cat and dog" + */ +MRB_API mrb_value +mrb_str_intern(mrb_state *mrb, mrb_value self) +{ + return mrb_symbol_value(mrb_intern_str(mrb, self)); +} +/* ---------------------------------- */ +MRB_API mrb_value +mrb_obj_as_string(mrb_state *mrb, mrb_value obj) +{ + mrb_value str; + + if (mrb_string_p(obj)) { + return obj; + } + str = mrb_funcall(mrb, obj, "to_s", 0); + if (!mrb_string_p(str)) + return mrb_any_to_s(mrb, obj); + return str; +} + +MRB_API mrb_value +mrb_ptr_to_str(mrb_state *mrb, void *p) +{ + struct RString *p_str; + char *p1; + char *p2; + uintptr_t n = (uintptr_t)p; + + p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4); + p1 = RSTR_PTR(p_str); + *p1++ = '0'; + *p1++ = 'x'; + p2 = p1; + + do { + *p2++ = mrb_digitmap[n % 16]; + n /= 16; + } while (n > 0); + *p2 = '\0'; + RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str))); + + while (p1 < p2) { + const char c = *p1; + *p1++ = *--p2; + *p2 = c; + } + + return mrb_obj_value(p_str); +} + +MRB_API mrb_value +mrb_string_type(mrb_state *mrb, mrb_value str) +{ + return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); +} + +MRB_API mrb_value +mrb_check_string_type(mrb_state *mrb, mrb_value str) +{ + return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); +} + +/* 15.2.10.5.30 */ +/* + * call-seq: + * str.reverse! => str + * + * Reverses str in place. + */ +static mrb_value +mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) +{ +#ifdef MRB_UTF8_STRING + mrb_int utf8_len = RSTRING_CHAR_LEN(str); + mrb_int len = RSTRING_LEN(str); + + if (utf8_len == len) goto bytes; + if (utf8_len > 1) { + char *buf; + char *p, *e, *r; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + len = RSTRING_LEN(str); + buf = (char*)mrb_malloc(mrb, (size_t)len); + p = buf; + e = buf + len; + + memcpy(buf, RSTRING_PTR(str), len); + r = RSTRING_PTR(str) + len; + + while (p 1) { + p = RSTR_PTR(s); + e = p + RSTR_LEN(s) - 1; + while (p < e) { + c = *p; + *p++ = *e; + *e-- = c; + } + } + return str; + } +} + +/* ---------------------------------- */ +/* 15.2.10.5.29 */ +/* + * call-seq: + * str.reverse => new_str + * + * Returns a new string with the characters from str in reverse order. + * + * "stressed".reverse #=> "desserts" + */ +static mrb_value +mrb_str_reverse(mrb_state *mrb, mrb_value str) +{ + mrb_value str2 = mrb_str_dup(mrb, str); + mrb_str_reverse_bang(mrb, str2); + return str2; +} + +/* 15.2.10.5.31 */ +/* + * call-seq: + * str.rindex(substring [, fixnum]) => fixnum or nil + * str.rindex(fixnum [, fixnum]) => fixnum or nil + * str.rindex(regexp [, fixnum]) => fixnum or nil + * + * Returns the index of the last occurrence of the given substring, + * character (fixnum), or pattern (regexp) in str. Returns + * nil if not found. If the second parameter is present, it + * specifies the position in the string to end the search---characters beyond + * this point will not be considered. + * + * "hello".rindex('e') #=> 1 + * "hello".rindex('l') #=> 3 + * "hello".rindex('a') #=> nil + * "hello".rindex(101) #=> 1 + * "hello".rindex(/[aeiou]/, -2) #=> 1 + */ +static mrb_value +mrb_str_rindex(mrb_state *mrb, mrb_value str) +{ + mrb_value *argv; + mrb_int argc; + mrb_value sub; + mrb_int pos, len = RSTRING_CHAR_LEN(str); + + mrb_get_args(mrb, "*!", &argv, &argc); + if (argc == 2) { + mrb_get_args(mrb, "oi", &sub, &pos); + if (pos < 0) { + pos += len; + if (pos < 0) { + mrb_regexp_check(mrb, sub); + return mrb_nil_value(); + } + } + if (pos > len) pos = len; + } + else { + pos = len; + if (argc > 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + } + pos = chars2bytes(str, 0, pos); + mrb_regexp_check(mrb, sub); + + switch (mrb_type(sub)) { + default: { + mrb_value tmp; + + tmp = mrb_check_string_type(mrb, sub); + if (mrb_nil_p(tmp)) { + mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); + } + sub = tmp; + } + /* fall through */ + case MRB_TT_STRING: + pos = str_rindex(mrb, str, sub, pos); + if (pos >= 0) { + pos = bytes2chars(RSTRING_PTR(str), pos); + BYTES_ALIGN_CHECK(pos); + return mrb_fixnum_value(pos); + } + break; + + } /* end of switch (TYPE(sub)) */ + return mrb_nil_value(); +} + +/* 15.2.10.5.35 */ + +/* + * call-seq: + * str.split(pattern="\n", [limit]) => anArray + * + * Divides str into substrings based on a delimiter, returning an array + * of these substrings. + * + * If pattern is a String, then its contents are used as + * the delimiter when splitting str. If pattern is a single + * space, str is split on whitespace, with leading whitespace and runs + * of contiguous whitespace characters ignored. + * + * If pattern is a Regexp, str is divided where the + * pattern matches. Whenever the pattern matches a zero-length string, + * str is split into individual characters. + * + * If pattern is omitted, the value of $; is used. If + * $; is nil (which is the default), str is + * split on whitespace as if ' ' were specified. + * + * If the limit parameter is omitted, trailing null fields are + * suppressed. If limit is a positive number, at most that number of + * fields will be returned (if limit is 1, the entire + * string is returned as the only entry in an array). If negative, there is no + * limit to the number of fields returned, and trailing null fields are not + * suppressed. + * + * " now's the time".split #=> ["now's", "the", "time"] + * " now's the time".split(' ') #=> ["now's", "the", "time"] + * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] + * "hello".split(//) #=> ["h", "e", "l", "l", "o"] + * "hello".split(//, 3) #=> ["h", "e", "llo"] + * + * "mellow yellow".split("ello") #=> ["m", "w y", "w"] + * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] + * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] + * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] + */ + +static mrb_value +mrb_str_split_m(mrb_state *mrb, mrb_value str) +{ + int argc; + mrb_value spat = mrb_nil_value(); + enum {awk, string, regexp} split_type = string; + mrb_int i = 0; + mrb_int beg; + mrb_int end; + mrb_int lim = 0; + mrb_bool lim_p; + mrb_value result, tmp; + + argc = mrb_get_args(mrb, "|oi", &spat, &lim); + lim_p = (lim > 0 && argc == 2); + if (argc == 2) { + if (lim == 1) { + if (RSTRING_LEN(str) == 0) + return mrb_ary_new_capa(mrb, 0); + return mrb_ary_new_from_values(mrb, 1, &str); + } + i = 1; + } + + if (argc == 0 || mrb_nil_p(spat)) { + split_type = awk; + } + else { + if (mrb_string_p(spat)) { + split_type = string; + if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') { + split_type = awk; + } + } + else { + mrb_noregexp(mrb, str); + } + } + + result = mrb_ary_new(mrb); + beg = 0; + if (split_type == awk) { + mrb_bool skip = TRUE; + mrb_int idx = 0; + mrb_int str_len = RSTRING_LEN(str); + unsigned int c; + int ai = mrb_gc_arena_save(mrb); + + idx = end = beg; + while (idx < str_len) { + c = (unsigned char)RSTRING_PTR(str)[idx++]; + if (skip) { + if (ISSPACE(c)) { + beg = idx; + } + else { + end = idx; + skip = FALSE; + if (lim_p && lim <= i) break; + } + } + else if (ISSPACE(c)) { + mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_gc_arena_restore(mrb, ai); + skip = TRUE; + beg = idx; + if (lim_p) ++i; + } + else { + end = idx; + } + } + } + else if (split_type == string) { + mrb_int str_len = RSTRING_LEN(str); + mrb_int pat_len = RSTRING_LEN(spat); + mrb_int idx = 0; + int ai = mrb_gc_arena_save(mrb); + + while (idx < str_len) { + if (pat_len > 0) { + end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx); + if (end < 0) break; + } + else { + end = chars2bytes(str, idx, 1); + } + mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_gc_arena_restore(mrb, ai); + idx += end + pat_len; + if (lim_p && lim <= ++i) break; + } + beg = idx; + } + else { + mrb_noregexp(mrb, str); + } + if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) { + if (RSTRING_LEN(str) == beg) { + tmp = mrb_str_new_empty(mrb, str); + } + else { + tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + } + mrb_ary_push(mrb, result, tmp); + } + if (!lim_p && lim == 0) { + mrb_int len; + while ((len = RARRAY_LEN(result)) > 0 && + (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) + mrb_ary_pop(mrb, result); + } + + return result; +} + +MRB_API mrb_value +mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, int base, int badcheck) +{ + const char *p = str; + const char *pend = str + len; + char sign = 1; + int c; + uint64_t n = 0; + mrb_int val; + +#define conv_digit(c) \ + (ISDIGIT(c) ? ((c) - '0') : \ + ISLOWER(c) ? ((c) - 'a' + 10) : \ + ISUPPER(c) ? ((c) - 'A' + 10) : \ + -1) + + if (!p) { + if (badcheck) goto bad; + return mrb_fixnum_value(0); + } + while (p=pend) { + if (badcheck) goto bad; + return mrb_fixnum_value(0); + } + if (*p == '0') { /* squeeze preceding 0s */ + p++; + while (p= base) { + break; + } + n *= base; + n += c; + if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", + mrb_str_new(mrb, str, pend-str)); + } + } + val = (mrb_int)n; + if (badcheck) { + if (p == str) goto bad; /* no number */ + while (p integer + * + * Returns the result of interpreting leading characters in str as an + * integer base base (between 2 and 36). Extraneous characters past the + * end of a valid number are ignored. If there is not a valid number at the + * start of str, 0 is returned. This method never raises an + * exception. + * + * "12345".to_i #=> 12345 + * "99 red balloons".to_i #=> 99 + * "0a".to_i #=> 0 + * "0a".to_i(16) #=> 10 + * "hello".to_i #=> 0 + * "1100101".to_i(2) #=> 101 + * "1100101".to_i(8) #=> 294977 + * "1100101".to_i(10) #=> 1100101 + * "1100101".to_i(16) #=> 17826049 + */ +static mrb_value +mrb_str_to_i(mrb_state *mrb, mrb_value self) +{ + mrb_int base = 10; + + mrb_get_args(mrb, "|i", &base); + if (base < 0) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base)); + } + return mrb_str_to_inum(mrb, self, base, FALSE); +} + +MRB_API double +mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) +{ + char *end; + char buf[DBL_DIG * 4 + 10]; + double d; + + enum {max_width = 20}; + + if (!p) return 0.0; + while (ISSPACE(*p)) p++; + + if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + return 0.0; + } + d = mrb_float_read(p, &end); + if (p == end) { + if (badcheck) { +bad: + mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p)); + /* not reached */ + } + return d; + } + if (*end) { + char *n = buf; + char *e = buf + sizeof(buf) - 1; + char prev = 0; + + while (p < end && n < e) prev = *n++ = *p++; + while (*p) { + if (*p == '_') { + /* remove underscores between digits */ + if (badcheck) { + if (n == buf || !ISDIGIT(prev)) goto bad; + ++p; + if (!ISDIGIT(*p)) goto bad; + } + else { + while (*++p == '_'); + continue; + } + } + prev = *p++; + if (n < e) *n++ = prev; + } + *n = '\0'; + p = buf; + + if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + return 0.0; + } + + d = mrb_float_read(p, &end); + if (badcheck) { + if (!end || p == end) goto bad; + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; + } + } + return d; +} + +MRB_API double +mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) +{ + char *s; + mrb_int len; + + str = mrb_str_to_str(mrb, str); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + if (s) { + if (badcheck && memchr(s, '\0', len)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); + } + if (s[len]) { /* no sentinel somehow */ + struct RString *temp_str = str_new(mrb, s, len); + s = RSTR_PTR(temp_str); + } + } + return mrb_cstr_to_dbl(mrb, s, badcheck); +} + +/* 15.2.10.5.39 */ +/* + * call-seq: + * str.to_f => float + * + * Returns the result of interpreting leading characters in str as a + * floating point number. Extraneous characters past the end of a valid number + * are ignored. If there is not a valid number at the start of str, + * 0.0 is returned. This method never raises an exception. + * + * "123.45e1".to_f #=> 1234.5 + * "45.67 degrees".to_f #=> 45.67 + * "thx1138".to_f #=> 0.0 + */ +static mrb_value +mrb_str_to_f(mrb_state *mrb, mrb_value self) +{ + return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE)); +} + +/* 15.2.10.5.40 */ +/* + * call-seq: + * str.to_s => str + * str.to_str => str + * + * Returns the receiver. + */ +static mrb_value +mrb_str_to_s(mrb_state *mrb, mrb_value self) +{ + if (mrb_obj_class(mrb, self) != mrb->string_class) { + return mrb_str_dup(mrb, self); + } + return self; +} + +/* 15.2.10.5.43 */ +/* + * call-seq: + * str.upcase! => str or nil + * + * Upcases the contents of str, returning nil if no changes + * were made. + */ +static mrb_value +mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + char *p, *pend; + mrb_bool modify = FALSE; + + mrb_str_modify(mrb, s); + p = RSTRING_PTR(str); + pend = RSTRING_END(str); + while (p < pend) { + if (ISLOWER(*p)) { + *p = TOUPPER(*p); + modify = TRUE; + } + p++; + } + + if (modify) return str; + return mrb_nil_value(); +} + +/* 15.2.10.5.42 */ +/* + * call-seq: + * str.upcase => new_str + * + * Returns a copy of str with all lowercase letters replaced with their + * uppercase counterparts. The operation is locale insensitive---only + * characters 'a' to 'z' are affected. + * + * "hEllO".upcase #=> "HELLO" + */ +static mrb_value +mrb_str_upcase(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_upcase_bang(mrb, str); + return str; +} + +#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) + +/* + * call-seq: + * str.dump -> new_str + * + * Produces a version of str with all nonprinting characters replaced by + * \nnn notation and all special characters escaped. + */ +mrb_value +mrb_str_dump(mrb_state *mrb, mrb_value str) +{ + mrb_int len; + const char *p, *pend; + char *q; + struct RString *result; + + len = 2; /* "" */ + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + while (p < pend) { + unsigned char c = *p++; + switch (c) { + case '"': case '\\': + case '\n': case '\r': + case '\t': case '\f': + case '\013': case '\010': case '\007': case '\033': + len += 2; + break; + + case '#': + len += IS_EVSTR(p, pend) ? 2 : 1; + break; + + default: + if (ISPRINT(c)) { + len++; + } + else { + len += 4; /* \NNN */ + } + break; + } + } + + result = str_new(mrb, 0, len); + str_with_class(mrb, result, str); + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + q = RSTR_PTR(result); + *q++ = '"'; + while (p < pend) { + unsigned char c = *p++; + + switch (c) { + case '"': + case '\\': + *q++ = '\\'; + *q++ = c; + break; + + case '\n': + *q++ = '\\'; + *q++ = 'n'; + break; + + case '\r': + *q++ = '\\'; + *q++ = 'r'; + break; + + case '\t': + *q++ = '\\'; + *q++ = 't'; + break; + + case '\f': + *q++ = '\\'; + *q++ = 'f'; + break; + + case '\013': + *q++ = '\\'; + *q++ = 'v'; + break; + + case '\010': + *q++ = '\\'; + *q++ = 'b'; + break; + + case '\007': + *q++ = '\\'; + *q++ = 'a'; + break; + + case '\033': + *q++ = '\\'; + *q++ = 'e'; + break; + + case '#': + if (IS_EVSTR(p, pend)) *q++ = '\\'; + *q++ = '#'; + break; + + default: + if (ISPRINT(c)) { + *q++ = c; + } + else { + *q++ = '\\'; + q[2] = '0' + c % 8; c /= 8; + q[1] = '0' + c % 8; c /= 8; + q[0] = '0' + c % 8; + q += 3; + } + } + } + *q = '"'; + return mrb_obj_value(result); +} + +MRB_API mrb_value +mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) +{ + struct RString *s = mrb_str_ptr(str); + size_t capa; + size_t total; + ptrdiff_t off = -1; + + if (len == 0) return str; + mrb_str_modify(mrb, s); + if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) { + off = ptr - RSTR_PTR(s); + } + + capa = RSTR_CAPA(s); + total = RSTR_LEN(s)+len; + if (total >= MRB_INT_MAX) { + size_error: + mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); + } + if (capa <= total) { + if (capa == 0) capa = 1; + while (capa <= total) { + if (capa <= MRB_INT_MAX / 2) { + capa *= 2; + } + else { + capa = total+1; + } + } + if (capa <= total || capa > MRB_INT_MAX) { + goto size_error; + } + resize_capa(mrb, s, capa); + } + if (off != -1) { + ptr = RSTR_PTR(s) + off; + } + memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len); + mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX); + RSTR_SET_LEN(s, total); + RSTR_PTR(s)[total] = '\0'; /* sentinel */ + return str; +} + +MRB_API mrb_value +mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) +{ + return mrb_str_cat(mrb, str, ptr, strlen(ptr)); +} + +MRB_API mrb_value +mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) +{ + return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); +} + +MRB_API mrb_value +mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) +{ + str2 = mrb_str_to_str(mrb, str2); + return mrb_str_cat_str(mrb, str1, str2); +} + +#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ + +/* + * call-seq: + * str.inspect -> string + * + * Returns a printable version of _str_, surrounded by quote marks, + * with special characters escaped. + * + * str = "hello" + * str[3] = "\b" + * str.inspect #=> "\"hel\\bo\"" + */ +mrb_value +mrb_str_inspect(mrb_state *mrb, mrb_value str) +{ + const char *p, *pend; + char buf[CHAR_ESC_LEN + 1]; + mrb_value result = mrb_str_new_lit(mrb, "\""); + + p = RSTRING_PTR(str); pend = RSTRING_END(str); + for (;p < pend; p++) { + unsigned char c, cc; +#ifdef MRB_UTF8_STRING + mrb_int clen; + + clen = utf8len(p, pend); + if (clen > 1) { + mrb_int i; + + for (i=0; i array of fixnums + * + * Returns an array of bytes in _str_. + * + * str = "hello" + * str.bytes #=> [104, 101, 108, 108, 111] + */ +static mrb_value +mrb_str_bytes(mrb_state *mrb, mrb_value str) +{ + struct RString *s = mrb_str_ptr(str); + mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s)); + unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s); + + while (p < pend) { + mrb_ary_push(mrb, a, mrb_fixnum_value(p[0])); + p++; + } + return a; +} + +/* ---------------------------*/ +void +mrb_init_string(mrb_state *mrb) +{ + struct RClass *s; + + mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string"); + + mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ + MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); + + mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE()); + + mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */ + mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */ + mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ + mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ + mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ + mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ + mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ + mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ + mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */ + mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */ + mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */ + mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */ + mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */ + mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */ + mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */ + + mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ + mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ + mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */ + mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ + mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */ + mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */ + mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */ + mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */ + mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */ + mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */ + mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */ + mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */ + mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */ + mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */ + + mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */ + mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ + mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ + mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ + mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ + mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ + mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ + mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); +} + +/* + * Source code for the "strtod" library procedure. + * + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. + * + * Permission to use, copy, modify, and distribute this + * software and its documentation for any purpose and without + * fee is hereby granted, provided that the above copyright + * notice appear in all copies. The University of California + * makes no representations about the suitability of this + * software for any purpose. It is provided "as is" without + * express or implied warranty. + * + * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $ + */ + +#include +#include + +static const int maxExponent = 511; /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. + */ +static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */ + 10., /* is 10^2^i. Used to convert decimal */ + 100., /* exponents into floating-point numbers. */ + 1.0e4, + 1.0e8, + 1.0e16, + 1.0e32, + 1.0e64, + 1.0e128, + 1.0e256 +}; + +MRB_API double +mrb_float_read(const char *string, char **endPtr) +/* const char *string; A decimal ASCII floating-point number, + * optionally preceded by white space. + * Must have form "-I.FE-X", where I is the + * integer part of the mantissa, F is the + * fractional part of the mantissa, and X + * is the exponent. Either of the signs + * may be "+", "-", or omitted. Either I + * or F may be omitted, or both. The decimal + * point isn't necessary unless F is present. + * The "E" may actually be an "e". E and X + * may both be omitted (but not just one). + */ +/* char **endPtr; If non-NULL, store terminating character's + * address here. */ +{ + int sign, expSign = FALSE; + double fraction, dblExp; + const double *d; + register const char *p; + register int c; + int exp = 0; /* Exponent read from "EX" field. */ + int fracExp = 0; /* Exponent that derives from the fractional + * part. Under normal circumstatnces, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int mantSize; /* Number of digits in mantissa. */ + int decPt; /* Number of mantissa digits BEFORE decimal + * point. */ + const char *pExp; /* Temporarily holds location of exponent + * in string. */ + + /* + * Strip off leading blanks and check for a sign. + */ + + p = string; + while (isspace(*p)) { + p += 1; + } + if (*p == '-') { + sign = TRUE; + p += 1; + } + else { + if (*p == '+') { + p += 1; + } + sign = FALSE; + } + + /* + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ + + decPt = -1; + for (mantSize = 0; ; mantSize += 1) + { + c = *p; + if (!isdigit(c)) { + if ((c != '.') || (decPt >= 0)) { + break; + } + decPt = mantSize; + } + p += 1; + } + + /* + * Now suck up the digits in the mantissa. Use two integers to + * collect 9 digits each (this is faster than using floating-point). + * If the mantissa has more than 18 digits, ignore the extras, since + * they can't affect the value anyway. + */ + + pExp = p; + p -= mantSize; + if (decPt < 0) { + decPt = mantSize; + } + else { + mantSize -= 1; /* One of the digits was the point. */ + } + if (mantSize > 18) { + if (decPt - 18 > 29999) { + fracExp = 29999; + } + else { + fracExp = decPt - 18; + } + mantSize = 18; + } + else { + fracExp = decPt - mantSize; + } + if (mantSize == 0) { + fraction = 0.0; + p = string; + goto done; + } + else { + int frac1, frac2; + frac1 = 0; + for ( ; mantSize > 9; mantSize -= 1) + { + c = *p; + p += 1; + if (c == '.') { + c = *p; + p += 1; + } + frac1 = 10*frac1 + (c - '0'); + } + frac2 = 0; + for (; mantSize > 0; mantSize -= 1) + { + c = *p; + p += 1; + if (c == '.') { + c = *p; + p += 1; + } + frac2 = 10*frac2 + (c - '0'); + } + fraction = (1.0e9 * frac1) + frac2; + } + + /* + * Skim off the exponent. + */ + + p = pExp; + if ((*p == 'E') || (*p == 'e')) { + p += 1; + if (*p == '-') { + expSign = TRUE; + p += 1; + } + else { + if (*p == '+') { + p += 1; + } + expSign = FALSE; + } + while (isdigit(*p)) { + exp = exp * 10 + (*p - '0'); + if (exp > 19999) { + exp = 19999; + } + p += 1; + } + } + if (expSign) { + exp = fracExp - exp; + } + else { + exp = fracExp + exp; + } + + /* + * Generate a floating-point number that represents the exponent. + * Do this by processing the exponent one bit at a time to combine + * many powers of 2 of 10. Then combine the exponent with the + * fraction. + */ + + if (exp < 0) { + expSign = TRUE; + exp = -exp; + } + else { + expSign = FALSE; + } + if (exp > maxExponent) { + exp = maxExponent; + errno = ERANGE; + } + dblExp = 1.0; + for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { + if (exp & 01) { + dblExp *= *d; + } + } + if (expSign) { + fraction /= dblExp; + } + else { + fraction *= dblExp; + } + +done: + if (endPtr != NULL) { + *endPtr = (char *) p; + } + + if (sign) { + return -fraction; + } + return fraction; +} -- cgit v1.2.3