summaryrefslogtreecommitdiffstats
path: root/web/server/h2o/libh2o/deps/mruby/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--web/server/h2o/libh2o/deps/mruby/src/string.c3013
1 files changed, 3013 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/mruby/src/string.c b/web/server/h2o/libh2o/deps/mruby/src/string.c
new file mode 100644
index 00000000..01d706fa
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/mruby/src/string.c
@@ -0,0 +1,3013 @@
+/*
+** string.c - String class
+**
+** See Copyright Notice in mruby.h
+*/
+
+#ifdef _MSC_VER
+# define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
+#include <float.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <mruby.h>
+#include <mruby/array.h>
+#include <mruby/class.h>
+#include <mruby/range.h>
+#include <mruby/string.h>
+#include <mruby/re.h>
+
+typedef struct mrb_shared_string {
+ mrb_bool nofree : 1;
+ int refcnt;
+ char *ptr;
+ mrb_int len;
+} mrb_shared_string;
+
+const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+
+#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
+
+static struct RString*
+str_new_static(mrb_state *mrb, const char *p, size_t len)
+{
+ struct RString *s;
+
+ if (len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ s = mrb_obj_alloc_string(mrb);
+ s->as.heap.len = (mrb_int)len;
+ s->as.heap.aux.capa = 0; /* nofree */
+ s->as.heap.ptr = (char *)p;
+ s->flags = MRB_STR_NOFREE;
+
+ return s;
+}
+
+static struct RString*
+str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ struct RString *s;
+
+ if (p && mrb_ro_data_p(p)) {
+ return str_new_static(mrb, p, len);
+ }
+ s = mrb_obj_alloc_string(mrb);
+ if (len < RSTRING_EMBED_LEN_MAX) {
+ RSTR_SET_EMBED_FLAG(s);
+ RSTR_SET_EMBED_LEN(s, len);
+ if (p) {
+ memcpy(s->as.ary, p, len);
+ }
+ }
+ else {
+ if (len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ s->as.heap.len = (mrb_int)len;
+ s->as.heap.aux.capa = (mrb_int)len;
+ s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1);
+ if (p) {
+ memcpy(s->as.heap.ptr, p, len);
+ }
+ }
+ RSTR_PTR(s)[len] = '\0';
+ return s;
+}
+
+static inline void
+str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
+{
+ s->c = mrb_str_ptr(obj)->c;
+}
+
+static mrb_value
+mrb_str_new_empty(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = str_new(mrb, 0, 0);
+
+ str_with_class(mrb, s, str);
+ return mrb_obj_value(s);
+}
+
+MRB_API mrb_value
+mrb_str_new_capa(mrb_state *mrb, size_t capa)
+{
+ struct RString *s;
+
+ s = mrb_obj_alloc_string(mrb);
+
+ if (capa >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
+ }
+ s->as.heap.len = 0;
+ s->as.heap.aux.capa = (mrb_int)capa;
+ s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1);
+ RSTR_PTR(s)[0] = '\0';
+
+ return mrb_obj_value(s);
+}
+
+#ifndef MRB_STR_BUF_MIN_SIZE
+# define MRB_STR_BUF_MIN_SIZE 128
+#endif
+
+MRB_API mrb_value
+mrb_str_buf_new(mrb_state *mrb, size_t capa)
+{
+ if (capa < MRB_STR_BUF_MIN_SIZE) {
+ capa = MRB_STR_BUF_MIN_SIZE;
+ }
+ return mrb_str_new_capa(mrb, capa);
+}
+
+static void
+resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
+{
+#if SIZE_MAX > MRB_INT_MAX
+ mrb_assert(capacity < MRB_INT_MAX);
+#endif
+ if (RSTR_EMBED_P(s)) {
+ if (RSTRING_EMBED_LEN_MAX < capacity) {
+ char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
+ const mrb_int len = RSTR_EMBED_LEN(s);
+ memcpy(tmp, s->as.ary, len);
+ RSTR_UNSET_EMBED_FLAG(s);
+ s->as.heap.ptr = tmp;
+ s->as.heap.len = len;
+ s->as.heap.aux.capa = (mrb_int)capacity;
+ }
+ }
+ else {
+ s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
+ s->as.heap.aux.capa = (mrb_int)capacity;
+ }
+}
+
+MRB_API mrb_value
+mrb_str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ return mrb_obj_value(str_new(mrb, p, len));
+}
+
+/*
+ * call-seq: (Caution! NULL string)
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+
+MRB_API mrb_value
+mrb_str_new_cstr(mrb_state *mrb, const char *p)
+{
+ struct RString *s;
+ size_t len;
+
+ if (p) {
+ len = strlen(p);
+ }
+ else {
+ len = 0;
+ }
+
+ s = str_new(mrb, p, len);
+
+ return mrb_obj_value(s);
+}
+
+MRB_API mrb_value
+mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
+{
+ struct RString *s = str_new_static(mrb, p, len);
+ return mrb_obj_value(s);
+}
+
+static void
+str_decref(mrb_state *mrb, mrb_shared_string *shared)
+{
+ shared->refcnt--;
+ if (shared->refcnt == 0) {
+ if (!shared->nofree) {
+ mrb_free(mrb, shared->ptr);
+ }
+ mrb_free(mrb, shared);
+ }
+}
+
+void
+mrb_gc_free_str(mrb_state *mrb, struct RString *str)
+{
+ if (RSTR_EMBED_P(str))
+ /* no code */;
+ else if (RSTR_SHARED_P(str))
+ str_decref(mrb, str->as.heap.aux.shared);
+ else if (!RSTR_NOFREE_P(str))
+ mrb_free(mrb, str->as.heap.ptr);
+}
+
+#ifdef MRB_UTF8_STRING
+static const char utf8len_codepage[256] =
+{
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+static mrb_int
+utf8len(const char* p, const char* e)
+{
+ mrb_int len;
+ mrb_int i;
+
+ len = utf8len_codepage[(unsigned char)*p];
+ if (p + len > e) return 1;
+ for (i = 1; i < len; ++i)
+ if ((p[i] & 0xc0) != 0x80)
+ return 1;
+ return len;
+}
+
+static mrb_int
+utf8_strlen(mrb_value str, mrb_int len)
+{
+ mrb_int total = 0;
+ char* p = RSTRING_PTR(str);
+ char* e = p;
+ if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
+ return RSTRING_LEN(str);
+ }
+ e += len < 0 ? RSTRING_LEN(str) : len;
+ while (p<e) {
+ p += utf8len(p, e);
+ total++;
+ }
+ if (RSTRING_LEN(str) == total) {
+ RSTRING(str)->flags |= MRB_STR_NO_UTF;
+ }
+ return total;
+}
+
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+
+/* map character index to byte offset index */
+static mrb_int
+chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
+{
+ mrb_int i, b, n;
+ const char *p = RSTRING_PTR(s) + off;
+ const char *e = RSTRING_END(s);
+
+ for (b=i=0; p<e && i<idx; i++) {
+ n = utf8len(p, e);
+ b += n;
+ p += n;
+ }
+ return b;
+}
+
+/* map byte offset to character index */
+static mrb_int
+bytes2chars(char *p, mrb_int bi)
+{
+ mrb_int i, b, n;
+
+ for (b=i=0; b<bi; i++) {
+ n = utf8len_codepage[(unsigned char)*p];
+ b += n;
+ p += n;
+ }
+ if (b != bi) return -1;
+ return i;
+}
+
+#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
+#else
+#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
+#define chars2bytes(p, off, ci) (ci)
+#define bytes2chars(p, bi) (bi)
+#define BYTES_ALIGN_CHECK(pos)
+#endif
+
+static inline mrb_int
+mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i;
+ ptrdiff_t qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return (mrb_int)(y - ys);
+ }
+ return -1;
+}
+
+static mrb_int
+mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
+{
+ const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
+
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = (const unsigned char *)memchr(y, *x, n);
+
+ if (ys)
+ return (mrb_int)(ys - y);
+ else
+ return -1;
+ }
+ return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
+}
+
+static void
+str_make_shared(mrb_state *mrb, struct RString *s)
+{
+ if (!RSTR_SHARED_P(s)) {
+ mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
+
+ shared->refcnt = 1;
+ if (RSTR_EMBED_P(s)) {
+ const mrb_int len = RSTR_EMBED_LEN(s);
+ char *const tmp = (char *)mrb_malloc(mrb, len+1);
+ memcpy(tmp, s->as.ary, len);
+ tmp[len] = '\0';
+ RSTR_UNSET_EMBED_FLAG(s);
+ s->as.heap.ptr = tmp;
+ s->as.heap.len = len;
+ shared->nofree = FALSE;
+ shared->ptr = s->as.heap.ptr;
+ }
+ else if (RSTR_NOFREE_P(s)) {
+ shared->nofree = TRUE;
+ shared->ptr = s->as.heap.ptr;
+ RSTR_UNSET_NOFREE_FLAG(s);
+ }
+ else {
+ shared->nofree = FALSE;
+ if (s->as.heap.aux.capa > s->as.heap.len) {
+ s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
+ }
+ else {
+ shared->ptr = s->as.heap.ptr;
+ }
+ }
+ shared->len = s->as.heap.len;
+ s->as.heap.aux.shared = shared;
+ RSTR_SET_SHARED_FLAG(s);
+ }
+}
+
+static mrb_value
+byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ struct RString *orig, *s;
+ mrb_shared_string *shared;
+
+ orig = mrb_str_ptr(str);
+ if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) {
+ s = str_new(mrb, orig->as.ary+beg, len);
+ }
+ else {
+ str_make_shared(mrb, orig);
+ shared = orig->as.heap.aux.shared;
+ s = mrb_obj_alloc_string(mrb);
+ s->as.heap.ptr = orig->as.heap.ptr + beg;
+ s->as.heap.len = len;
+ s->as.heap.aux.shared = shared;
+ RSTR_SET_SHARED_FLAG(s);
+ shared->refcnt++;
+ }
+
+ return mrb_obj_value(s);
+}
+#ifdef MRB_UTF8_STRING
+static inline mrb_value
+str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ beg = chars2bytes(str, 0, beg);
+ len = chars2bytes(str, beg, len);
+
+ return byte_subseq(mrb, str, beg, len);
+}
+#else
+#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
+#endif
+
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ mrb_int clen = RSTRING_CHAR_LEN(str);
+
+ if (len < 0) return mrb_nil_value();
+ if (clen == 0) {
+ len = 0;
+ }
+ else if (beg < 0) {
+ beg = clen + beg;
+ }
+ if (beg > clen) return mrb_nil_value();
+ if (beg < 0) {
+ beg += clen;
+ if (beg < 0) return mrb_nil_value();
+ }
+ if (len > clen - beg)
+ len = clen - beg;
+ if (len <= 0) {
+ len = 0;
+ }
+ return str_subseq(mrb, str, beg, len);
+}
+
+MRB_API mrb_int
+mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset)
+{
+ mrb_int pos;
+ char *s;
+ mrb_int len;
+
+ len = RSTRING_LEN(str);
+ if (offset < 0) {
+ offset += len;
+ if (offset < 0) return -1;
+ }
+ if (len - offset < slen) return -1;
+ s = RSTRING_PTR(str);
+ if (offset) {
+ s += offset;
+ }
+ if (slen == 0) return offset;
+ /* need proceed one character at a time */
+ len = RSTRING_LEN(str) - offset;
+ pos = mrb_memsearch(sptr, slen, s, len);
+ if (pos < 0) return pos;
+ return pos + offset;
+}
+
+static mrb_int
+str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset)
+{
+ const char *ptr;
+ mrb_int len;
+
+ ptr = RSTRING_PTR(str2);
+ len = RSTRING_LEN(str2);
+
+ return mrb_str_index(mrb, str, ptr, len, offset);
+}
+
+static void
+check_frozen(mrb_state *mrb, struct RString *s)
+{
+ if (MRB_FROZEN_P(s)) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string");
+ }
+}
+
+static mrb_value
+str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
+{
+ long len;
+
+ check_frozen(mrb, s1);
+ if (s1 == s2) return mrb_obj_value(s1);
+ s1->flags &= ~MRB_STR_NO_UTF;
+ s1->flags |= s2->flags&MRB_STR_NO_UTF;
+ len = RSTR_LEN(s2);
+ if (RSTR_SHARED_P(s1)) {
+ str_decref(mrb, s1->as.heap.aux.shared);
+ }
+ else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
+ mrb_free(mrb, s1->as.heap.ptr);
+ }
+
+ RSTR_UNSET_NOFREE_FLAG(s1);
+
+ if (RSTR_SHARED_P(s2)) {
+L_SHARE:
+ RSTR_UNSET_EMBED_FLAG(s1);
+ s1->as.heap.ptr = s2->as.heap.ptr;
+ s1->as.heap.len = len;
+ s1->as.heap.aux.shared = s2->as.heap.aux.shared;
+ RSTR_SET_SHARED_FLAG(s1);
+ s1->as.heap.aux.shared->refcnt++;
+ }
+ else {
+ if (len <= RSTRING_EMBED_LEN_MAX) {
+ RSTR_UNSET_SHARED_FLAG(s1);
+ RSTR_SET_EMBED_FLAG(s1);
+ memcpy(s1->as.ary, RSTR_PTR(s2), len);
+ RSTR_SET_EMBED_LEN(s1, len);
+ }
+ else {
+ str_make_shared(mrb, s2);
+ goto L_SHARE;
+ }
+ }
+
+ return mrb_obj_value(s1);
+}
+
+static mrb_int
+str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+{
+ char *s, *sbeg, *t;
+ struct RString *ps = mrb_str_ptr(str);
+ mrb_int len = RSTRING_LEN(sub);
+
+ /* substring longer than string */
+ if (RSTR_LEN(ps) < len) return -1;
+ if (RSTR_LEN(ps) - pos < len) {
+ pos = RSTR_LEN(ps) - len;
+ }
+ sbeg = RSTR_PTR(ps);
+ s = RSTR_PTR(ps) + pos;
+ t = RSTRING_PTR(sub);
+ if (len) {
+ while (sbeg <= s) {
+ if (memcmp(s, t, len) == 0) {
+ return (mrb_int)(s - RSTR_PTR(ps));
+ }
+ s--;
+ }
+ return -1;
+ }
+ else {
+ return pos;
+ }
+}
+
+MRB_API mrb_int
+mrb_str_strlen(mrb_state *mrb, struct RString *s)
+{
+ mrb_int i, max = RSTR_LEN(s);
+ char *p = RSTR_PTR(s);
+
+ if (!p) return 0;
+ for (i=0; i<max; i++) {
+ if (p[i] == '\0') {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
+ }
+ return max;
+}
+
+#ifdef _WIN32
+#include <windows.h>
+
+char*
+mrb_utf8_from_locale(const char *str, int len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ int mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = (int)strlen(str);
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+
+char*
+mrb_locale_from_utf8(const char *utf8, int len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ int mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = (int)strlen(utf8);
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+#endif
+
+MRB_API void
+mrb_str_modify(mrb_state *mrb, struct RString *s)
+{
+ check_frozen(mrb, s);
+ s->flags &= ~MRB_STR_NO_UTF;
+ if (RSTR_SHARED_P(s)) {
+ mrb_shared_string *shared = s->as.heap.aux.shared;
+
+ if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
+ s->as.heap.ptr = shared->ptr;
+ s->as.heap.aux.capa = shared->len;
+ RSTR_PTR(s)[s->as.heap.len] = '\0';
+ mrb_free(mrb, shared);
+ }
+ else {
+ char *ptr, *p;
+ mrb_int len;
+
+ p = RSTR_PTR(s);
+ len = s->as.heap.len;
+ if (len < RSTRING_EMBED_LEN_MAX) {
+ RSTR_SET_EMBED_FLAG(s);
+ RSTR_SET_EMBED_LEN(s, len);
+ ptr = RSTR_PTR(s);
+ }
+ else {
+ ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
+ s->as.heap.ptr = ptr;
+ s->as.heap.aux.capa = len;
+ }
+ if (p) {
+ memcpy(ptr, p, len);
+ }
+ ptr[len] = '\0';
+ str_decref(mrb, shared);
+ }
+ RSTR_UNSET_SHARED_FLAG(s);
+ return;
+ }
+ if (RSTR_NOFREE_P(s)) {
+ char *p = s->as.heap.ptr;
+ mrb_int len = s->as.heap.len;
+
+ RSTR_UNSET_NOFREE_FLAG(s);
+ if (len < RSTRING_EMBED_LEN_MAX) {
+ RSTR_SET_EMBED_FLAG(s);
+ RSTR_SET_EMBED_LEN(s, len);
+ }
+ else {
+ s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
+ s->as.heap.aux.capa = len;
+ }
+ if (p) {
+ memcpy(RSTR_PTR(s), p, len);
+ }
+ RSTR_PTR(s)[len] = '\0';
+ return;
+ }
+}
+
+MRB_API mrb_value
+mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
+{
+ mrb_int slen;
+ struct RString *s = mrb_str_ptr(str);
+
+ mrb_str_modify(mrb, s);
+ slen = RSTR_LEN(s);
+ if (len != slen) {
+ if (slen < len || slen - len > 256) {
+ resize_capa(mrb, s, len);
+ }
+ RSTR_SET_LEN(s, len);
+ RSTR_PTR(s)[len] = '\0'; /* sentinel */
+ }
+ return str;
+}
+
+MRB_API char*
+mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
+{
+ struct RString *s;
+
+ if (!mrb_string_p(str0)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+
+ s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
+ if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
+ return RSTR_PTR(s);
+}
+
+/*
+ * call-seq: (Caution! String("abcd") change)
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+MRB_API void
+mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
+{
+ if (!mrb_string_p(other)) {
+ other = mrb_str_to_str(mrb, other);
+ }
+ mrb_str_cat_str(mrb, self, other);
+}
+
+/*
+ * call-seq: (Caution! String("abcd") remain)
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+MRB_API mrb_value
+mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ struct RString *s = mrb_str_ptr(a);
+ struct RString *s2 = mrb_str_ptr(b);
+ struct RString *t;
+
+ t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2));
+ memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s));
+ memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2));
+
+ return mrb_obj_value(t);
+}
+
+/* 15.2.10.5.2 */
+
+/*
+ * call-seq: (Caution! String("abcd") remain) for stack_argument
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+static mrb_value
+mrb_str_plus_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ mrb_get_args(mrb, "S", &str);
+ return mrb_str_plus(mrb, self, str);
+}
+
+/* 15.2.10.5.26 */
+/* 15.2.10.5.33 */
+/*
+ * call-seq:
+ * "abcd".size => int
+ *
+ * Returns the length of string.
+ */
+static mrb_value
+mrb_str_size(mrb_state *mrb, mrb_value self)
+{
+ mrb_int len = RSTRING_CHAR_LEN(self);
+ return mrb_fixnum_value(len);
+}
+
+static mrb_value
+mrb_str_bytesize(mrb_state *mrb, mrb_value self)
+{
+ mrb_int len = RSTRING_LEN(self);
+ return mrb_fixnum_value(len);
+}
+
+/* 15.2.10.5.1 */
+/*
+ * call-seq:
+ * str * integer => new_str
+ *
+ * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
+ * the receiver.
+ *
+ * "Ho! " * 3 #=> "Ho! Ho! Ho! "
+ */
+static mrb_value
+mrb_str_times(mrb_state *mrb, mrb_value self)
+{
+ mrb_int n,len,times;
+ struct RString *str2;
+ char *p;
+
+ mrb_get_args(mrb, "i", &times);
+ if (times < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
+ }
+ if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
+ }
+
+ len = RSTRING_LEN(self)*times;
+ str2 = str_new(mrb, 0, len);
+ str_with_class(mrb, str2, self);
+ p = RSTR_PTR(str2);
+ if (len > 0) {
+ n = RSTRING_LEN(self);
+ memcpy(p, RSTRING_PTR(self), n);
+ while (n <= len/2) {
+ memcpy(p + n, p, n);
+ n *= 2;
+ }
+ memcpy(p + n, p, len-n);
+ }
+ p[RSTR_LEN(str2)] = '\0';
+
+ return mrb_obj_value(str2);
+}
+/* -------------------------------------------------------------- */
+
+#define lesser(a,b) (((a)>(b))?(b):(a))
+
+/* ---------------------------*/
+/*
+ * call-seq:
+ * mrb_value str1 <=> mrb_value str2 => int
+ * > 1
+ * = 0
+ * < -1
+ */
+MRB_API int
+mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ mrb_int len;
+ mrb_int retval;
+ struct RString *s1 = mrb_str_ptr(str1);
+ struct RString *s2 = mrb_str_ptr(str2);
+
+ len = lesser(RSTR_LEN(s1), RSTR_LEN(s2));
+ retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len);
+ if (retval == 0) {
+ if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0;
+ if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1;
+ return -1;
+ }
+ if (retval > 0) return 1;
+ return -1;
+}
+
+/* 15.2.10.5.3 */
+
+/*
+ * call-seq:
+ * str <=> other_str => -1, 0, +1
+ *
+ * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
+ * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
+ * <i>str</i>. If the strings are of different lengths, and the strings are
+ * equal when compared up to the shortest length, then the longer string is
+ * considered greater than the shorter one. If the variable <code>$=</code> is
+ * <code>false</code>, the comparison is based on comparing the binary values
+ * of each character in the string. In older versions of Ruby, setting
+ * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
+ * in favor of using <code>String#casecmp</code>.
+ *
+ * <code><=></code> is the basis for the methods <code><</code>,
+ * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
+ * included from module <code>Comparable</code>. The method
+ * <code>String#==</code> does not use <code>Comparable#==</code>.
+ *
+ * "abcdef" <=> "abcde" #=> 1
+ * "abcdef" <=> "abcdef" #=> 0
+ * "abcdef" <=> "abcdefg" #=> -1
+ * "abcdef" <=> "ABCDEF" #=> 1
+ */
+static mrb_value
+mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
+{
+ mrb_value str2;
+ mrb_int result;
+
+ mrb_get_args(mrb, "o", &str2);
+ if (!mrb_string_p(str2)) {
+ if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
+ return mrb_nil_value();
+ }
+ else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
+ return mrb_nil_value();
+ }
+ else {
+ mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
+
+ if (!mrb_nil_p(tmp)) return mrb_nil_value();
+ if (!mrb_fixnum_p(tmp)) {
+ return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
+ }
+ result = -mrb_fixnum(tmp);
+ }
+ }
+ else {
+ result = mrb_str_cmp(mrb, str1, str2);
+ }
+ return mrb_fixnum_value(result);
+}
+
+static mrb_bool
+str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
+{
+ const mrb_int len = RSTRING_LEN(str1);
+
+ if (len != RSTRING_LEN(str2)) return FALSE;
+ if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
+ return TRUE;
+ return FALSE;
+}
+
+MRB_API mrb_bool
+mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ if (mrb_immediate_p(str2)) return FALSE;
+ if (!mrb_string_p(str2)) {
+ if (mrb_nil_p(str2)) return FALSE;
+ if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
+ return FALSE;
+ }
+ str2 = mrb_funcall(mrb, str2, "to_str", 0);
+ return mrb_equal(mrb, str2, str1);
+ }
+ return str_eql(mrb, str1, str2);
+}
+
+/* 15.2.10.5.4 */
+/*
+ * call-seq:
+ * str == obj => true or false
+ *
+ * Equality---
+ * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
+ * Otherwise, returns <code>false</code> or <code>true</code>
+ *
+ * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
+ */
+static mrb_value
+mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "o", &str2);
+
+ return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
+}
+/* ---------------------------------- */
+MRB_API mrb_value
+mrb_str_to_str(mrb_state *mrb, mrb_value str)
+{
+ mrb_value s;
+
+ if (!mrb_string_p(str)) {
+ s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
+ if (mrb_nil_p(s)) {
+ s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
+ }
+ return s;
+ }
+ return str;
+}
+
+MRB_API const char*
+mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
+{
+ mrb_value str = mrb_str_to_str(mrb, ptr);
+ return RSTRING_PTR(str);
+}
+
+MRB_API mrb_int
+mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
+{
+ mrb_value str = mrb_str_to_str(mrb, ptr);
+ return RSTRING_LEN(str);
+}
+
+void
+mrb_noregexp(mrb_state *mrb, mrb_value self)
+{
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
+}
+
+void
+mrb_regexp_check(mrb_state *mrb, mrb_value obj)
+{
+ if (mrb_regexp_p(mrb, obj)) {
+ mrb_noregexp(mrb, obj);
+ }
+}
+
+MRB_API mrb_value
+mrb_str_dup(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+ struct RString *dup = str_new(mrb, 0, 0);
+
+ str_with_class(mrb, dup, str);
+ return str_replace(mrb, dup, s);
+}
+
+static mrb_value
+mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
+{
+ mrb_int idx;
+
+ mrb_regexp_check(mrb, indx);
+ switch (mrb_type(indx)) {
+ case MRB_TT_FIXNUM:
+ idx = mrb_fixnum(indx);
+
+num_index:
+ str = str_substr(mrb, str, idx, 1);
+ if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
+ return str;
+
+ case MRB_TT_STRING:
+ if (str_index_str(mrb, str, indx, 0) != -1)
+ return mrb_str_dup(mrb, indx);
+ return mrb_nil_value();
+
+ case MRB_TT_RANGE:
+ goto range_arg;
+
+ default:
+ indx = mrb_Integer(mrb, indx);
+ if (mrb_nil_p(indx)) {
+ range_arg:
+ {
+ mrb_int beg, len;
+
+ len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) {
+ case 1:
+ return str_subseq(mrb, str, beg, len);
+ case 2:
+ return mrb_nil_value();
+ default:
+ break;
+ }
+ }
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
+ }
+ idx = mrb_fixnum(indx);
+ goto num_index;
+ }
+ return mrb_nil_value(); /* not reached */
+}
+
+/* 15.2.10.5.6 */
+/* 15.2.10.5.34 */
+/*
+ * call-seq:
+ * str[fixnum] => fixnum or nil
+ * str[fixnum, fixnum] => new_str or nil
+ * str[range] => new_str or nil
+ * str[regexp] => new_str or nil
+ * str[regexp, fixnum] => new_str or nil
+ * str[other_str] => new_str or nil
+ * str.slice(fixnum) => fixnum or nil
+ * str.slice(fixnum, fixnum) => new_str or nil
+ * str.slice(range) => new_str or nil
+ * str.slice(other_str) => new_str or nil
+ *
+ * Element Reference---If passed a single <code>Fixnum</code>, returns the code
+ * of the character at that position. If passed two <code>Fixnum</code>
+ * objects, returns a substring starting at the offset given by the first, and
+ * a length given by the second. If given a range, a substring containing
+ * characters at offsets given by the range is returned. In all three cases, if
+ * an offset is negative, it is counted from the end of <i>str</i>. Returns
+ * <code>nil</code> if the initial offset falls outside the string, the length
+ * is negative, or the beginning of the range is greater than the end.
+ *
+ * If a <code>String</code> is given, that string is returned if it occurs in
+ * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
+ * match.
+ *
+ * a = "hello there"
+ * a[1] #=> 101(1.8.7) "e"(1.9.2)
+ * a[1.1] #=> "e"(1.9.2)
+ * a[1,3] #=> "ell"
+ * a[1..3] #=> "ell"
+ * a[-3,2] #=> "er"
+ * a[-4..-2] #=> "her"
+ * a[12..-1] #=> nil
+ * a[-2..-4] #=> ""
+ * a["lo"] #=> "lo"
+ * a["bye"] #=> nil
+ */
+static mrb_value
+mrb_str_aref_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value a1, a2;
+ int argc;
+
+ argc = mrb_get_args(mrb, "o|o", &a1, &a2);
+ if (argc == 2) {
+ mrb_int n1, n2;
+
+ mrb_regexp_check(mrb, a1);
+ mrb_get_args(mrb, "ii", &n1, &n2);
+ return str_substr(mrb, str, n1, n2);
+ }
+ if (argc != 1) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
+ }
+ return mrb_str_aref(mrb, str, a1);
+}
+
+/* 15.2.10.5.8 */
+/*
+ * call-seq:
+ * str.capitalize! => str or nil
+ *
+ * Modifies <i>str</i> by converting the first character to uppercase and the
+ * remainder to lowercase. Returns <code>nil</code> if no changes are made.
+ *
+ * a = "hello"
+ * a.capitalize! #=> "Hello"
+ * a #=> "Hello"
+ * a.capitalize! #=> nil
+ */
+static mrb_value
+mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
+{
+ char *p, *pend;
+ mrb_bool modify = FALSE;
+ struct RString *s = mrb_str_ptr(str);
+
+ mrb_str_modify(mrb, s);
+ if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
+ p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
+ if (ISLOWER(*p)) {
+ *p = TOUPPER(*p);
+ modify = TRUE;
+ }
+ while (++p < pend) {
+ if (ISUPPER(*p)) {
+ *p = TOLOWER(*p);
+ modify = TRUE;
+ }
+ }
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.7 */
+/*
+ * call-seq:
+ * str.capitalize => new_str
+ *
+ * Returns a copy of <i>str</i> with the first character converted to uppercase
+ * and the remainder to lowercase.
+ *
+ * "hello".capitalize #=> "Hello"
+ * "HELLO".capitalize #=> "Hello"
+ * "123ABC".capitalize #=> "123abc"
+ */
+static mrb_value
+mrb_str_capitalize(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_capitalize_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.10 */
+/*
+ * call-seq:
+ * str.chomp!(separator="\n") => str or nil
+ *
+ * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
+ * returning <i>str</i>, or <code>nil</code> if no modifications were made.
+ */
+static mrb_value
+mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value rs;
+ mrb_int newline;
+ char *p, *pp;
+ mrb_int rslen;
+ mrb_int len;
+ mrb_int argc;
+ struct RString *s = mrb_str_ptr(str);
+
+ mrb_str_modify(mrb, s);
+ argc = mrb_get_args(mrb, "|S", &rs);
+ len = RSTR_LEN(s);
+ if (argc == 0) {
+ if (len == 0) return mrb_nil_value();
+ smart_chomp:
+ if (RSTR_PTR(s)[len-1] == '\n') {
+ RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
+ if (RSTR_LEN(s) > 0 &&
+ RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') {
+ RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
+ }
+ }
+ else if (RSTR_PTR(s)[len-1] == '\r') {
+ RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
+ }
+ else {
+ return mrb_nil_value();
+ }
+ RSTR_PTR(s)[RSTR_LEN(s)] = '\0';
+ return str;
+ }
+
+ if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
+ p = RSTR_PTR(s);
+ rslen = RSTRING_LEN(rs);
+ if (rslen == 0) {
+ while (len>0 && p[len-1] == '\n') {
+ len--;
+ if (len>0 && p[len-1] == '\r')
+ len--;
+ }
+ if (len < RSTR_LEN(s)) {
+ RSTR_SET_LEN(s, len);
+ p[len] = '\0';
+ return str;
+ }
+ return mrb_nil_value();
+ }
+ if (rslen > len) return mrb_nil_value();
+ newline = RSTRING_PTR(rs)[rslen-1];
+ if (rslen == 1 && newline == '\n')
+ newline = RSTRING_PTR(rs)[rslen-1];
+ if (rslen == 1 && newline == '\n')
+ goto smart_chomp;
+
+ pp = p + len - rslen;
+ if (p[len-1] == newline &&
+ (rslen <= 1 ||
+ memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
+ RSTR_SET_LEN(s, len - rslen);
+ p[RSTR_LEN(s)] = '\0';
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.9 */
+/*
+ * call-seq:
+ * str.chomp(separator="\n") => new_str
+ *
+ * Returns a new <code>String</code> with the given record separator removed
+ * from the end of <i>str</i> (if present). If <code>$/</code> has not been
+ * changed from the default Ruby record separator, then <code>chomp</code> also
+ * removes carriage return characters (that is it will remove <code>\n</code>,
+ * <code>\r</code>, and <code>\r\n</code>).
+ *
+ * "hello".chomp #=> "hello"
+ * "hello\n".chomp #=> "hello"
+ * "hello\r\n".chomp #=> "hello"
+ * "hello\n\r".chomp #=> "hello\n"
+ * "hello\r".chomp #=> "hello"
+ * "hello \n there".chomp #=> "hello \n there"
+ * "hello".chomp("llo") #=> "he"
+ */
+static mrb_value
+mrb_str_chomp(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_chomp_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.12 */
+/*
+ * call-seq:
+ * str.chop! => str or nil
+ *
+ * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
+ * or <code>nil</code> if <i>str</i> is the empty string. See also
+ * <code>String#chomp!</code>.
+ */
+static mrb_value
+mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+
+ mrb_str_modify(mrb, s);
+ if (RSTR_LEN(s) > 0) {
+ mrb_int len;
+#ifdef MRB_UTF8_STRING
+ const char* t = RSTR_PTR(s), *p = t;
+ const char* e = p + RSTR_LEN(s);
+ while (p<e) {
+ mrb_int clen = utf8len(p, e);
+ if (p + clen>=e) break;
+ p += clen;
+ }
+ len = p - t;
+#else
+ len = RSTR_LEN(s) - 1;
+#endif
+ if (RSTR_PTR(s)[len] == '\n') {
+ if (len > 0 &&
+ RSTR_PTR(s)[len-1] == '\r') {
+ len--;
+ }
+ }
+ RSTR_SET_LEN(s, len);
+ RSTR_PTR(s)[len] = '\0';
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.11 */
+/*
+ * call-seq:
+ * str.chop => new_str
+ *
+ * Returns a new <code>String</code> with the last character removed. If the
+ * string ends with <code>\r\n</code>, both characters are removed. Applying
+ * <code>chop</code> to an empty string returns an empty
+ * string. <code>String#chomp</code> is often a safer alternative, as it leaves
+ * the string unchanged if it doesn't end in a record separator.
+ *
+ * "string\r\n".chop #=> "string"
+ * "string\n\r".chop #=> "string\n"
+ * "string\n".chop #=> "string"
+ * "string".chop #=> "strin"
+ * "x".chop #=> ""
+ */
+static mrb_value
+mrb_str_chop(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+ str = mrb_str_dup(mrb, self);
+ mrb_str_chop_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.14 */
+/*
+ * call-seq:
+ * str.downcase! => str or nil
+ *
+ * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
+ * changes were made.
+ */
+static mrb_value
+mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
+{
+ char *p, *pend;
+ mrb_bool modify = FALSE;
+ struct RString *s = mrb_str_ptr(str);
+
+ mrb_str_modify(mrb, s);
+ p = RSTR_PTR(s);
+ pend = RSTR_PTR(s) + RSTR_LEN(s);
+ while (p < pend) {
+ if (ISUPPER(*p)) {
+ *p = TOLOWER(*p);
+ modify = TRUE;
+ }
+ p++;
+ }
+
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.13 */
+/*
+ * call-seq:
+ * str.downcase => new_str
+ *
+ * Returns a copy of <i>str</i> with all uppercase letters replaced with their
+ * lowercase counterparts. The operation is locale insensitive---only
+ * characters 'A' to 'Z' are affected.
+ *
+ * "hEllO".downcase #=> "hello"
+ */
+static mrb_value
+mrb_str_downcase(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_downcase_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.16 */
+/*
+ * call-seq:
+ * str.empty? => true or false
+ *
+ * Returns <code>true</code> if <i>str</i> has a length of zero.
+ *
+ * "hello".empty? #=> false
+ * "".empty? #=> true
+ */
+static mrb_value
+mrb_str_empty_p(mrb_state *mrb, mrb_value self)
+{
+ struct RString *s = mrb_str_ptr(self);
+
+ return mrb_bool_value(RSTR_LEN(s) == 0);
+}
+
+/* 15.2.10.5.17 */
+/*
+ * call-seq:
+ * str.eql?(other) => true or false
+ *
+ * Two strings are equal if the have the same length and content.
+ */
+static mrb_value
+mrb_str_eql(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str2;
+ mrb_bool eql_p;
+
+ mrb_get_args(mrb, "o", &str2);
+ eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
+
+ return mrb_bool_value(eql_p);
+}
+
+MRB_API mrb_value
+mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ return str_substr(mrb, str, beg, len);
+}
+
+mrb_int
+mrb_str_hash(mrb_state *mrb, mrb_value str)
+{
+ /* 1-8-7 */
+ struct RString *s = mrb_str_ptr(str);
+ mrb_int len = RSTR_LEN(s);
+ char *p = RSTR_PTR(s);
+ uint64_t key = 0;
+
+ while (len--) {
+ key = key*65599 + *p;
+ p++;
+ }
+ return (mrb_int)(key + (key>>5));
+}
+
+/* 15.2.10.5.20 */
+/*
+ * call-seq:
+ * str.hash => fixnum
+ *
+ * Return a hash based on the string's length and content.
+ */
+static mrb_value
+mrb_str_hash_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_int key = mrb_str_hash(mrb, self);
+ return mrb_fixnum_value(key);
+}
+
+/* 15.2.10.5.21 */
+/*
+ * call-seq:
+ * str.include? other_str => true or false
+ * str.include? fixnum => true or false
+ *
+ * Returns <code>true</code> if <i>str</i> contains the given string or
+ * character.
+ *
+ * "hello".include? "lo" #=> true
+ * "hello".include? "ol" #=> false
+ * "hello".include? ?h #=> true
+ */
+static mrb_value
+mrb_str_include(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "S", &str2);
+ if (str_index_str(mrb, self, str2, 0) < 0)
+ return mrb_bool_value(FALSE);
+ return mrb_bool_value(TRUE);
+}
+
+/* 15.2.10.5.22 */
+/*
+ * call-seq:
+ * str.index(substring [, offset]) => fixnum or nil
+ * str.index(fixnum [, offset]) => fixnum or nil
+ * str.index(regexp [, offset]) => fixnum or nil
+ *
+ * Returns the index of the first occurrence of the given
+ * <i>substring</i>,
+ * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
+ * Returns
+ * <code>nil</code> if not found.
+ * If the second parameter is present, it
+ * specifies the position in the string to begin the search.
+ *
+ * "hello".index('e') #=> 1
+ * "hello".index('lo') #=> 3
+ * "hello".index('a') #=> nil
+ * "hello".index(101) #=> 1(101=0x65='e')
+ * "hello".index(/[aeiou]/, -3) #=> 4
+ */
+static mrb_value
+mrb_str_index_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ mrb_int argc;
+ mrb_value sub;
+ mrb_int pos, clen;
+
+ mrb_get_args(mrb, "*!", &argv, &argc);
+ if (argc == 2) {
+ mrb_get_args(mrb, "oi", &sub, &pos);
+ }
+ else {
+ pos = 0;
+ if (argc > 0)
+ sub = argv[0];
+ else
+ sub = mrb_nil_value();
+ }
+ mrb_regexp_check(mrb, sub);
+ clen = RSTRING_CHAR_LEN(str);
+ if (pos < 0) {
+ pos += clen;
+ if (pos < 0) {
+ return mrb_nil_value();
+ }
+ }
+ if (pos > clen) return mrb_nil_value();
+ pos = chars2bytes(str, 0, pos);
+
+ switch (mrb_type(sub)) {
+ default: {
+ mrb_value tmp;
+
+ tmp = mrb_check_string_type(mrb, sub);
+ if (mrb_nil_p(tmp)) {
+ mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
+ }
+ sub = tmp;
+ }
+ /* fall through */
+ case MRB_TT_STRING:
+ pos = str_index_str(mrb, str, sub, pos);
+ break;
+ }
+
+ if (pos == -1) return mrb_nil_value();
+ pos = bytes2chars(RSTRING_PTR(str), pos);
+ BYTES_ALIGN_CHECK(pos);
+ return mrb_fixnum_value(pos);
+}
+
+#define STR_REPLACE_SHARED_MIN 10
+
+/* 15.2.10.5.24 */
+/* 15.2.10.5.28 */
+/*
+ * call-seq:
+ * str.replace(other_str) => str
+ *
+ * s = "hello" #=> "hello"
+ * s.replace "world" #=> "world"
+ */
+static mrb_value
+mrb_str_replace(mrb_state *mrb, mrb_value str)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "S", &str2);
+ return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
+}
+
+/* 15.2.10.5.23 */
+/*
+ * call-seq:
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+static mrb_value
+mrb_str_init(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str2;
+
+ if (mrb_get_args(mrb, "|S", &str2) == 0) {
+ struct RString *s = str_new(mrb, 0, 0);
+ str2 = mrb_obj_value(s);
+ }
+ str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
+ return self;
+}
+
+/* 15.2.10.5.25 */
+/* 15.2.10.5.41 */
+/*
+ * call-seq:
+ * str.intern => symbol
+ * str.to_sym => symbol
+ *
+ * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
+ * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
+ *
+ * "Koala".intern #=> :Koala
+ * s = 'cat'.to_sym #=> :cat
+ * s == :cat #=> true
+ * s = '@cat'.to_sym #=> :@cat
+ * s == :@cat #=> true
+ *
+ * This can also be used to create symbols that cannot be represented using the
+ * <code>:xxx</code> notation.
+ *
+ * 'cat and dog'.to_sym #=> :"cat and dog"
+ */
+MRB_API mrb_value
+mrb_str_intern(mrb_state *mrb, mrb_value self)
+{
+ return mrb_symbol_value(mrb_intern_str(mrb, self));
+}
+/* ---------------------------------- */
+MRB_API mrb_value
+mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value str;
+
+ if (mrb_string_p(obj)) {
+ return obj;
+ }
+ str = mrb_funcall(mrb, obj, "to_s", 0);
+ if (!mrb_string_p(str))
+ return mrb_any_to_s(mrb, obj);
+ return str;
+}
+
+MRB_API mrb_value
+mrb_ptr_to_str(mrb_state *mrb, void *p)
+{
+ struct RString *p_str;
+ char *p1;
+ char *p2;
+ uintptr_t n = (uintptr_t)p;
+
+ p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
+ p1 = RSTR_PTR(p_str);
+ *p1++ = '0';
+ *p1++ = 'x';
+ p2 = p1;
+
+ do {
+ *p2++ = mrb_digitmap[n % 16];
+ n /= 16;
+ } while (n > 0);
+ *p2 = '\0';
+ RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str)));
+
+ while (p1 < p2) {
+ const char c = *p1;
+ *p1++ = *--p2;
+ *p2 = c;
+ }
+
+ return mrb_obj_value(p_str);
+}
+
+MRB_API mrb_value
+mrb_string_type(mrb_state *mrb, mrb_value str)
+{
+ return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
+}
+
+MRB_API mrb_value
+mrb_check_string_type(mrb_state *mrb, mrb_value str)
+{
+ return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
+}
+
+/* 15.2.10.5.30 */
+/*
+ * call-seq:
+ * str.reverse! => str
+ *
+ * Reverses <i>str</i> in place.
+ */
+static mrb_value
+mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
+{
+#ifdef MRB_UTF8_STRING
+ mrb_int utf8_len = RSTRING_CHAR_LEN(str);
+ mrb_int len = RSTRING_LEN(str);
+
+ if (utf8_len == len) goto bytes;
+ if (utf8_len > 1) {
+ char *buf;
+ char *p, *e, *r;
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ len = RSTRING_LEN(str);
+ buf = (char*)mrb_malloc(mrb, (size_t)len);
+ p = buf;
+ e = buf + len;
+
+ memcpy(buf, RSTRING_PTR(str), len);
+ r = RSTRING_PTR(str) + len;
+
+ while (p<e) {
+ mrb_int clen = utf8len(p, e);
+ r -= clen;
+ memcpy(r, p, clen);
+ p += clen;
+ }
+ mrb_free(mrb, buf);
+ }
+ return str;
+
+ bytes:
+#endif
+ {
+ struct RString *s = mrb_str_ptr(str);
+ char *p, *e;
+ char c;
+
+ mrb_str_modify(mrb, s);
+ if (RSTR_LEN(s) > 1) {
+ p = RSTR_PTR(s);
+ e = p + RSTR_LEN(s) - 1;
+ while (p < e) {
+ c = *p;
+ *p++ = *e;
+ *e-- = c;
+ }
+ }
+ return str;
+ }
+}
+
+/* ---------------------------------- */
+/* 15.2.10.5.29 */
+/*
+ * call-seq:
+ * str.reverse => new_str
+ *
+ * Returns a new string with the characters from <i>str</i> in reverse order.
+ *
+ * "stressed".reverse #=> "desserts"
+ */
+static mrb_value
+mrb_str_reverse(mrb_state *mrb, mrb_value str)
+{
+ mrb_value str2 = mrb_str_dup(mrb, str);
+ mrb_str_reverse_bang(mrb, str2);
+ return str2;
+}
+
+/* 15.2.10.5.31 */
+/*
+ * call-seq:
+ * str.rindex(substring [, fixnum]) => fixnum or nil
+ * str.rindex(fixnum [, fixnum]) => fixnum or nil
+ * str.rindex(regexp [, fixnum]) => fixnum or nil
+ *
+ * Returns the index of the last occurrence of the given <i>substring</i>,
+ * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ * <code>nil</code> if not found. If the second parameter is present, it
+ * specifies the position in the string to end the search---characters beyond
+ * this point will not be considered.
+ *
+ * "hello".rindex('e') #=> 1
+ * "hello".rindex('l') #=> 3
+ * "hello".rindex('a') #=> nil
+ * "hello".rindex(101) #=> 1
+ * "hello".rindex(/[aeiou]/, -2) #=> 1
+ */
+static mrb_value
+mrb_str_rindex(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ mrb_int argc;
+ mrb_value sub;
+ mrb_int pos, len = RSTRING_CHAR_LEN(str);
+
+ mrb_get_args(mrb, "*!", &argv, &argc);
+ if (argc == 2) {
+ mrb_get_args(mrb, "oi", &sub, &pos);
+ if (pos < 0) {
+ pos += len;
+ if (pos < 0) {
+ mrb_regexp_check(mrb, sub);
+ return mrb_nil_value();
+ }
+ }
+ if (pos > len) pos = len;
+ }
+ else {
+ pos = len;
+ if (argc > 0)
+ sub = argv[0];
+ else
+ sub = mrb_nil_value();
+ }
+ pos = chars2bytes(str, 0, pos);
+ mrb_regexp_check(mrb, sub);
+
+ switch (mrb_type(sub)) {
+ default: {
+ mrb_value tmp;
+
+ tmp = mrb_check_string_type(mrb, sub);
+ if (mrb_nil_p(tmp)) {
+ mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
+ }
+ sub = tmp;
+ }
+ /* fall through */
+ case MRB_TT_STRING:
+ pos = str_rindex(mrb, str, sub, pos);
+ if (pos >= 0) {
+ pos = bytes2chars(RSTRING_PTR(str), pos);
+ BYTES_ALIGN_CHECK(pos);
+ return mrb_fixnum_value(pos);
+ }
+ break;
+
+ } /* end of switch (TYPE(sub)) */
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.35 */
+
+/*
+ * call-seq:
+ * str.split(pattern="\n", [limit]) => anArray
+ *
+ * Divides <i>str</i> into substrings based on a delimiter, returning an array
+ * of these substrings.
+ *
+ * If <i>pattern</i> is a <code>String</code>, then its contents are used as
+ * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
+ * space, <i>str</i> is split on whitespace, with leading whitespace and runs
+ * of contiguous whitespace characters ignored.
+ *
+ * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
+ * pattern matches. Whenever the pattern matches a zero-length string,
+ * <i>str</i> is split into individual characters.
+ *
+ * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
+ * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
+ * split on whitespace as if ' ' were specified.
+ *
+ * If the <i>limit</i> parameter is omitted, trailing null fields are
+ * suppressed. If <i>limit</i> is a positive number, at most that number of
+ * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
+ * string is returned as the only entry in an array). If negative, there is no
+ * limit to the number of fields returned, and trailing null fields are not
+ * suppressed.
+ *
+ * " now's the time".split #=> ["now's", "the", "time"]
+ * " now's the time".split(' ') #=> ["now's", "the", "time"]
+ * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
+ * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
+ * "hello".split(//, 3) #=> ["h", "e", "llo"]
+ *
+ * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
+ * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
+ * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
+ * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
+ */
+
+static mrb_value
+mrb_str_split_m(mrb_state *mrb, mrb_value str)
+{
+ int argc;
+ mrb_value spat = mrb_nil_value();
+ enum {awk, string, regexp} split_type = string;
+ mrb_int i = 0;
+ mrb_int beg;
+ mrb_int end;
+ mrb_int lim = 0;
+ mrb_bool lim_p;
+ mrb_value result, tmp;
+
+ argc = mrb_get_args(mrb, "|oi", &spat, &lim);
+ lim_p = (lim > 0 && argc == 2);
+ if (argc == 2) {
+ if (lim == 1) {
+ if (RSTRING_LEN(str) == 0)
+ return mrb_ary_new_capa(mrb, 0);
+ return mrb_ary_new_from_values(mrb, 1, &str);
+ }
+ i = 1;
+ }
+
+ if (argc == 0 || mrb_nil_p(spat)) {
+ split_type = awk;
+ }
+ else {
+ if (mrb_string_p(spat)) {
+ split_type = string;
+ if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
+ split_type = awk;
+ }
+ }
+ else {
+ mrb_noregexp(mrb, str);
+ }
+ }
+
+ result = mrb_ary_new(mrb);
+ beg = 0;
+ if (split_type == awk) {
+ mrb_bool skip = TRUE;
+ mrb_int idx = 0;
+ mrb_int str_len = RSTRING_LEN(str);
+ unsigned int c;
+ int ai = mrb_gc_arena_save(mrb);
+
+ idx = end = beg;
+ while (idx < str_len) {
+ c = (unsigned char)RSTRING_PTR(str)[idx++];
+ if (skip) {
+ if (ISSPACE(c)) {
+ beg = idx;
+ }
+ else {
+ end = idx;
+ skip = FALSE;
+ if (lim_p && lim <= i) break;
+ }
+ }
+ else if (ISSPACE(c)) {
+ mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
+ mrb_gc_arena_restore(mrb, ai);
+ skip = TRUE;
+ beg = idx;
+ if (lim_p) ++i;
+ }
+ else {
+ end = idx;
+ }
+ }
+ }
+ else if (split_type == string) {
+ mrb_int str_len = RSTRING_LEN(str);
+ mrb_int pat_len = RSTRING_LEN(spat);
+ mrb_int idx = 0;
+ int ai = mrb_gc_arena_save(mrb);
+
+ while (idx < str_len) {
+ if (pat_len > 0) {
+ end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
+ if (end < 0) break;
+ }
+ else {
+ end = chars2bytes(str, idx, 1);
+ }
+ mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
+ mrb_gc_arena_restore(mrb, ai);
+ idx += end + pat_len;
+ if (lim_p && lim <= ++i) break;
+ }
+ beg = idx;
+ }
+ else {
+ mrb_noregexp(mrb, str);
+ }
+ if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
+ if (RSTRING_LEN(str) == beg) {
+ tmp = mrb_str_new_empty(mrb, str);
+ }
+ else {
+ tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ }
+ mrb_ary_push(mrb, result, tmp);
+ }
+ if (!lim_p && lim == 0) {
+ mrb_int len;
+ while ((len = RARRAY_LEN(result)) > 0 &&
+ (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
+ mrb_ary_pop(mrb, result);
+ }
+
+ return result;
+}
+
+MRB_API mrb_value
+mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, int base, int badcheck)
+{
+ const char *p = str;
+ const char *pend = str + len;
+ char sign = 1;
+ int c;
+ uint64_t n = 0;
+ mrb_int val;
+
+#define conv_digit(c) \
+ (ISDIGIT(c) ? ((c) - '0') : \
+ ISLOWER(c) ? ((c) - 'a' + 10) : \
+ ISUPPER(c) ? ((c) - 'A' + 10) : \
+ -1)
+
+ if (!p) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ while (p<pend && ISSPACE(*p))
+ p++;
+
+ if (p[0] == '+') {
+ p++;
+ }
+ else if (p[0] == '-') {
+ p++;
+ sign = 0;
+ }
+ if (base <= 0) {
+ if (p[0] == '0') {
+ switch (p[1]) {
+ case 'x': case 'X':
+ base = 16;
+ break;
+ case 'b': case 'B':
+ base = 2;
+ break;
+ case 'o': case 'O':
+ base = 8;
+ break;
+ case 'd': case 'D':
+ base = 10;
+ break;
+ default:
+ base = 8;
+ break;
+ }
+ }
+ else if (base < -1) {
+ base = -base;
+ }
+ else {
+ base = 10;
+ }
+ }
+ switch (base) {
+ case 2:
+ if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) {
+ p += 2;
+ }
+ break;
+ case 3:
+ break;
+ case 8:
+ if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) {
+ p += 2;
+ }
+ case 4: case 5: case 6: case 7:
+ break;
+ case 10:
+ if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) {
+ p += 2;
+ }
+ case 9: case 11: case 12: case 13: case 14: case 15:
+ break;
+ case 16:
+ if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) {
+ p += 2;
+ }
+ break;
+ default:
+ if (base < 2 || 36 < base) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ }
+ break;
+ } /* end of switch (base) { */
+ if (p>=pend) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ if (*p == '0') { /* squeeze preceding 0s */
+ p++;
+ while (p<pend) {
+ c = *p++;
+ if (c == '_') {
+ if (p<pend && *p == '_') {
+ if (badcheck) goto bad;
+ break;
+ }
+ continue;
+ }
+ if (c != '0') {
+ p--;
+ break;
+ }
+ }
+ if (*(p - 1) == '0')
+ p--;
+ }
+ if (p == pend) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ for ( ;p<pend;p++) {
+ if (*p == '_') {
+ p++;
+ if (p==pend) {
+ if (badcheck) goto bad;
+ continue;
+ }
+ if (*p == '_') {
+ if (badcheck) goto bad;
+ break;
+ }
+ }
+ if (badcheck && *p == '\0') {
+ goto nullbyte;
+ }
+ c = conv_digit(*p);
+ if (c < 0 || c >= base) {
+ break;
+ }
+ n *= base;
+ n += c;
+ if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer",
+ mrb_str_new(mrb, str, pend-str));
+ }
+ }
+ val = (mrb_int)n;
+ if (badcheck) {
+ if (p == str) goto bad; /* no number */
+ while (p<pend && ISSPACE(*p)) p++;
+ if (p<pend) goto bad; /* trailing garbage */
+ }
+
+ return mrb_fixnum_value(sign ? val : -val);
+ nullbyte:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ /* not reached */
+ bad:
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)",
+ mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str)));
+ /* not reached */
+ return mrb_fixnum_value(0);
+}
+
+MRB_API mrb_value
+mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+{
+ return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
+}
+
+MRB_API const char*
+mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
+{
+ mrb_value str = mrb_str_to_str(mrb, *ptr);
+ struct RString *ps = mrb_str_ptr(str);
+ mrb_int len = mrb_str_strlen(mrb, ps);
+ char *p = RSTR_PTR(ps);
+
+ if (!p || p[len] != '\0') {
+ if (MRB_FROZEN_P(ps)) {
+ *ptr = str = mrb_str_dup(mrb, str);
+ ps = mrb_str_ptr(str);
+ }
+ mrb_str_modify(mrb, ps);
+ return RSTR_PTR(ps);
+ }
+ return p;
+}
+
+MRB_API mrb_value
+mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
+{
+ const char *s;
+ mrb_int len;
+
+ s = mrb_string_value_ptr(mrb, str);
+ len = RSTRING_LEN(str);
+ return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
+}
+
+/* 15.2.10.5.38 */
+/*
+ * call-seq:
+ * str.to_i(base=10) => integer
+ *
+ * Returns the result of interpreting leading characters in <i>str</i> as an
+ * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
+ * end of a valid number are ignored. If there is not a valid number at the
+ * start of <i>str</i>, <code>0</code> is returned. This method never raises an
+ * exception.
+ *
+ * "12345".to_i #=> 12345
+ * "99 red balloons".to_i #=> 99
+ * "0a".to_i #=> 0
+ * "0a".to_i(16) #=> 10
+ * "hello".to_i #=> 0
+ * "1100101".to_i(2) #=> 101
+ * "1100101".to_i(8) #=> 294977
+ * "1100101".to_i(10) #=> 1100101
+ * "1100101".to_i(16) #=> 17826049
+ */
+static mrb_value
+mrb_str_to_i(mrb_state *mrb, mrb_value self)
+{
+ mrb_int base = 10;
+
+ mrb_get_args(mrb, "|i", &base);
+ if (base < 0) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ }
+ return mrb_str_to_inum(mrb, self, base, FALSE);
+}
+
+MRB_API double
+mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
+{
+ char *end;
+ char buf[DBL_DIG * 4 + 10];
+ double d;
+
+ enum {max_width = 20};
+
+ if (!p) return 0.0;
+ while (ISSPACE(*p)) p++;
+
+ if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ return 0.0;
+ }
+ d = mrb_float_read(p, &end);
+ if (p == end) {
+ if (badcheck) {
+bad:
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
+ /* not reached */
+ }
+ return d;
+ }
+ if (*end) {
+ char *n = buf;
+ char *e = buf + sizeof(buf) - 1;
+ char prev = 0;
+
+ while (p < end && n < e) prev = *n++ = *p++;
+ while (*p) {
+ if (*p == '_') {
+ /* remove underscores between digits */
+ if (badcheck) {
+ if (n == buf || !ISDIGIT(prev)) goto bad;
+ ++p;
+ if (!ISDIGIT(*p)) goto bad;
+ }
+ else {
+ while (*++p == '_');
+ continue;
+ }
+ }
+ prev = *p++;
+ if (n < e) *n++ = prev;
+ }
+ *n = '\0';
+ p = buf;
+
+ if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ return 0.0;
+ }
+
+ d = mrb_float_read(p, &end);
+ if (badcheck) {
+ if (!end || p == end) goto bad;
+ while (*end && ISSPACE(*end)) end++;
+ if (*end) goto bad;
+ }
+ }
+ return d;
+}
+
+MRB_API double
+mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
+{
+ char *s;
+ mrb_int len;
+
+ str = mrb_str_to_str(mrb, str);
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+ if (s) {
+ if (badcheck && memchr(s, '\0', len)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
+ }
+ if (s[len]) { /* no sentinel somehow */
+ struct RString *temp_str = str_new(mrb, s, len);
+ s = RSTR_PTR(temp_str);
+ }
+ }
+ return mrb_cstr_to_dbl(mrb, s, badcheck);
+}
+
+/* 15.2.10.5.39 */
+/*
+ * call-seq:
+ * str.to_f => float
+ *
+ * Returns the result of interpreting leading characters in <i>str</i> as a
+ * floating point number. Extraneous characters past the end of a valid number
+ * are ignored. If there is not a valid number at the start of <i>str</i>,
+ * <code>0.0</code> is returned. This method never raises an exception.
+ *
+ * "123.45e1".to_f #=> 1234.5
+ * "45.67 degrees".to_f #=> 45.67
+ * "thx1138".to_f #=> 0.0
+ */
+static mrb_value
+mrb_str_to_f(mrb_state *mrb, mrb_value self)
+{
+ return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE));
+}
+
+/* 15.2.10.5.40 */
+/*
+ * call-seq:
+ * str.to_s => str
+ * str.to_str => str
+ *
+ * Returns the receiver.
+ */
+static mrb_value
+mrb_str_to_s(mrb_state *mrb, mrb_value self)
+{
+ if (mrb_obj_class(mrb, self) != mrb->string_class) {
+ return mrb_str_dup(mrb, self);
+ }
+ return self;
+}
+
+/* 15.2.10.5.43 */
+/*
+ * call-seq:
+ * str.upcase! => str or nil
+ *
+ * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
+ * were made.
+ */
+static mrb_value
+mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+ char *p, *pend;
+ mrb_bool modify = FALSE;
+
+ mrb_str_modify(mrb, s);
+ p = RSTRING_PTR(str);
+ pend = RSTRING_END(str);
+ while (p < pend) {
+ if (ISLOWER(*p)) {
+ *p = TOUPPER(*p);
+ modify = TRUE;
+ }
+ p++;
+ }
+
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.42 */
+/*
+ * call-seq:
+ * str.upcase => new_str
+ *
+ * Returns a copy of <i>str</i> with all lowercase letters replaced with their
+ * uppercase counterparts. The operation is locale insensitive---only
+ * characters 'a' to 'z' are affected.
+ *
+ * "hEllO".upcase #=> "HELLO"
+ */
+static mrb_value
+mrb_str_upcase(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_upcase_bang(mrb, str);
+ return str;
+}
+
+#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
+
+/*
+ * call-seq:
+ * str.dump -> new_str
+ *
+ * Produces a version of <i>str</i> with all nonprinting characters replaced by
+ * <code>\nnn</code> notation and all special characters escaped.
+ */
+mrb_value
+mrb_str_dump(mrb_state *mrb, mrb_value str)
+{
+ mrb_int len;
+ const char *p, *pend;
+ char *q;
+ struct RString *result;
+
+ len = 2; /* "" */
+ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
+ while (p < pend) {
+ unsigned char c = *p++;
+ switch (c) {
+ case '"': case '\\':
+ case '\n': case '\r':
+ case '\t': case '\f':
+ case '\013': case '\010': case '\007': case '\033':
+ len += 2;
+ break;
+
+ case '#':
+ len += IS_EVSTR(p, pend) ? 2 : 1;
+ break;
+
+ default:
+ if (ISPRINT(c)) {
+ len++;
+ }
+ else {
+ len += 4; /* \NNN */
+ }
+ break;
+ }
+ }
+
+ result = str_new(mrb, 0, len);
+ str_with_class(mrb, result, str);
+ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
+ q = RSTR_PTR(result);
+ *q++ = '"';
+ while (p < pend) {
+ unsigned char c = *p++;
+
+ switch (c) {
+ case '"':
+ case '\\':
+ *q++ = '\\';
+ *q++ = c;
+ break;
+
+ case '\n':
+ *q++ = '\\';
+ *q++ = 'n';
+ break;
+
+ case '\r':
+ *q++ = '\\';
+ *q++ = 'r';
+ break;
+
+ case '\t':
+ *q++ = '\\';
+ *q++ = 't';
+ break;
+
+ case '\f':
+ *q++ = '\\';
+ *q++ = 'f';
+ break;
+
+ case '\013':
+ *q++ = '\\';
+ *q++ = 'v';
+ break;
+
+ case '\010':
+ *q++ = '\\';
+ *q++ = 'b';
+ break;
+
+ case '\007':
+ *q++ = '\\';
+ *q++ = 'a';
+ break;
+
+ case '\033':
+ *q++ = '\\';
+ *q++ = 'e';
+ break;
+
+ case '#':
+ if (IS_EVSTR(p, pend)) *q++ = '\\';
+ *q++ = '#';
+ break;
+
+ default:
+ if (ISPRINT(c)) {
+ *q++ = c;
+ }
+ else {
+ *q++ = '\\';
+ q[2] = '0' + c % 8; c /= 8;
+ q[1] = '0' + c % 8; c /= 8;
+ q[0] = '0' + c % 8;
+ q += 3;
+ }
+ }
+ }
+ *q = '"';
+ return mrb_obj_value(result);
+}
+
+MRB_API mrb_value
+mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+{
+ struct RString *s = mrb_str_ptr(str);
+ size_t capa;
+ size_t total;
+ ptrdiff_t off = -1;
+
+ if (len == 0) return str;
+ mrb_str_modify(mrb, s);
+ if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
+ off = ptr - RSTR_PTR(s);
+ }
+
+ capa = RSTR_CAPA(s);
+ total = RSTR_LEN(s)+len;
+ if (total >= MRB_INT_MAX) {
+ size_error:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ if (capa <= total) {
+ if (capa == 0) capa = 1;
+ while (capa <= total) {
+ if (capa <= MRB_INT_MAX / 2) {
+ capa *= 2;
+ }
+ else {
+ capa = total+1;
+ }
+ }
+ if (capa <= total || capa > MRB_INT_MAX) {
+ goto size_error;
+ }
+ resize_capa(mrb, s, capa);
+ }
+ if (off != -1) {
+ ptr = RSTR_PTR(s) + off;
+ }
+ memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
+ mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX);
+ RSTR_SET_LEN(s, total);
+ RSTR_PTR(s)[total] = '\0'; /* sentinel */
+ return str;
+}
+
+MRB_API mrb_value
+mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
+{
+ return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+}
+
+MRB_API mrb_value
+mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
+{
+ return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
+}
+
+MRB_API mrb_value
+mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ str2 = mrb_str_to_str(mrb, str2);
+ return mrb_str_cat_str(mrb, str1, str2);
+}
+
+#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
+
+/*
+ * call-seq:
+ * str.inspect -> string
+ *
+ * Returns a printable version of _str_, surrounded by quote marks,
+ * with special characters escaped.
+ *
+ * str = "hello"
+ * str[3] = "\b"
+ * str.inspect #=> "\"hel\\bo\""
+ */
+mrb_value
+mrb_str_inspect(mrb_state *mrb, mrb_value str)
+{
+ const char *p, *pend;
+ char buf[CHAR_ESC_LEN + 1];
+ mrb_value result = mrb_str_new_lit(mrb, "\"");
+
+ p = RSTRING_PTR(str); pend = RSTRING_END(str);
+ for (;p < pend; p++) {
+ unsigned char c, cc;
+#ifdef MRB_UTF8_STRING
+ mrb_int clen;
+
+ clen = utf8len(p, pend);
+ if (clen > 1) {
+ mrb_int i;
+
+ for (i=0; i<clen; i++) {
+ buf[i] = p[i];
+ }
+ mrb_str_cat(mrb, result, buf, clen);
+ p += clen-1;
+ continue;
+ }
+#endif
+ c = *p;
+ if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
+ buf[0] = '\\'; buf[1] = c;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ if (ISPRINT(c)) {
+ buf[0] = c;
+ mrb_str_cat(mrb, result, buf, 1);
+ continue;
+ }
+ switch (c) {
+ case '\n': cc = 'n'; break;
+ case '\r': cc = 'r'; break;
+ case '\t': cc = 't'; break;
+ case '\f': cc = 'f'; break;
+ case '\013': cc = 'v'; break;
+ case '\010': cc = 'b'; break;
+ case '\007': cc = 'a'; break;
+ case 033: cc = 'e'; break;
+ default: cc = 0; break;
+ }
+ if (cc) {
+ buf[0] = '\\';
+ buf[1] = (char)cc;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ else {
+ buf[0] = '\\';
+ buf[3] = '0' + c % 8; c /= 8;
+ buf[2] = '0' + c % 8; c /= 8;
+ buf[1] = '0' + c % 8;
+ mrb_str_cat(mrb, result, buf, 4);
+ continue;
+ }
+ }
+ mrb_str_cat_lit(mrb, result, "\"");
+
+ return result;
+}
+
+/*
+ * call-seq:
+ * str.bytes -> array of fixnums
+ *
+ * Returns an array of bytes in _str_.
+ *
+ * str = "hello"
+ * str.bytes #=> [104, 101, 108, 108, 111]
+ */
+static mrb_value
+mrb_str_bytes(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+ mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s));
+ unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s);
+
+ while (p < pend) {
+ mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
+ p++;
+ }
+ return a;
+}
+
+/* ---------------------------*/
+void
+mrb_init_string(mrb_state *mrb)
+{
+ struct RClass *s;
+
+ mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
+
+ mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
+ MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
+
+ mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
+ mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
+ mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
+ mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
+ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
+ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
+ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
+ mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
+ mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
+ mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
+ mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
+ mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
+ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
+ mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
+
+ mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
+ mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
+ mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
+ mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
+ mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
+ mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
+ mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
+ mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
+ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
+ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
+ mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
+ mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
+ mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
+ mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
+
+ mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
+ mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
+ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
+ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
+ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
+ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
+ mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
+ mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
+}
+
+/*
+ * Source code for the "strtod" library procedure.
+ *
+ * Copyright (c) 1988-1993 The Regents of the University of California.
+ * Copyright (c) 1994 Sun Microsystems, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software and its documentation for any purpose and without
+ * fee is hereby granted, provided that the above copyright
+ * notice appear in all copies. The University of California
+ * makes no representations about the suitability of this
+ * software for any purpose. It is provided "as is" without
+ * express or implied warranty.
+ *
+ * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $
+ */
+
+#include <ctype.h>
+#include <errno.h>
+
+static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
+ * exponent larger than this will already
+ * produce underflow or overflow, so there's
+ * no need to worry about additional digits.
+ */
+static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */
+ 10., /* is 10^2^i. Used to convert decimal */
+ 100., /* exponents into floating-point numbers. */
+ 1.0e4,
+ 1.0e8,
+ 1.0e16,
+ 1.0e32,
+ 1.0e64,
+ 1.0e128,
+ 1.0e256
+};
+
+MRB_API double
+mrb_float_read(const char *string, char **endPtr)
+/* const char *string; A decimal ASCII floating-point number,
+ * optionally preceded by white space.
+ * Must have form "-I.FE-X", where I is the
+ * integer part of the mantissa, F is the
+ * fractional part of the mantissa, and X
+ * is the exponent. Either of the signs
+ * may be "+", "-", or omitted. Either I
+ * or F may be omitted, or both. The decimal
+ * point isn't necessary unless F is present.
+ * The "E" may actually be an "e". E and X
+ * may both be omitted (but not just one).
+ */
+/* char **endPtr; If non-NULL, store terminating character's
+ * address here. */
+{
+ int sign, expSign = FALSE;
+ double fraction, dblExp;
+ const double *d;
+ register const char *p;
+ register int c;
+ int exp = 0; /* Exponent read from "EX" field. */
+ int fracExp = 0; /* Exponent that derives from the fractional
+ * part. Under normal circumstatnces, it is
+ * the negative of the number of digits in F.
+ * However, if I is very long, the last digits
+ * of I get dropped (otherwise a long I with a
+ * large negative exponent could cause an
+ * unnecessary overflow on I alone). In this
+ * case, fracExp is incremented one for each
+ * dropped digit. */
+ int mantSize; /* Number of digits in mantissa. */
+ int decPt; /* Number of mantissa digits BEFORE decimal
+ * point. */
+ const char *pExp; /* Temporarily holds location of exponent
+ * in string. */
+
+ /*
+ * Strip off leading blanks and check for a sign.
+ */
+
+ p = string;
+ while (isspace(*p)) {
+ p += 1;
+ }
+ if (*p == '-') {
+ sign = TRUE;
+ p += 1;
+ }
+ else {
+ if (*p == '+') {
+ p += 1;
+ }
+ sign = FALSE;
+ }
+
+ /*
+ * Count the number of digits in the mantissa (including the decimal
+ * point), and also locate the decimal point.
+ */
+
+ decPt = -1;
+ for (mantSize = 0; ; mantSize += 1)
+ {
+ c = *p;
+ if (!isdigit(c)) {
+ if ((c != '.') || (decPt >= 0)) {
+ break;
+ }
+ decPt = mantSize;
+ }
+ p += 1;
+ }
+
+ /*
+ * Now suck up the digits in the mantissa. Use two integers to
+ * collect 9 digits each (this is faster than using floating-point).
+ * If the mantissa has more than 18 digits, ignore the extras, since
+ * they can't affect the value anyway.
+ */
+
+ pExp = p;
+ p -= mantSize;
+ if (decPt < 0) {
+ decPt = mantSize;
+ }
+ else {
+ mantSize -= 1; /* One of the digits was the point. */
+ }
+ if (mantSize > 18) {
+ if (decPt - 18 > 29999) {
+ fracExp = 29999;
+ }
+ else {
+ fracExp = decPt - 18;
+ }
+ mantSize = 18;
+ }
+ else {
+ fracExp = decPt - mantSize;
+ }
+ if (mantSize == 0) {
+ fraction = 0.0;
+ p = string;
+ goto done;
+ }
+ else {
+ int frac1, frac2;
+ frac1 = 0;
+ for ( ; mantSize > 9; mantSize -= 1)
+ {
+ c = *p;
+ p += 1;
+ if (c == '.') {
+ c = *p;
+ p += 1;
+ }
+ frac1 = 10*frac1 + (c - '0');
+ }
+ frac2 = 0;
+ for (; mantSize > 0; mantSize -= 1)
+ {
+ c = *p;
+ p += 1;
+ if (c == '.') {
+ c = *p;
+ p += 1;
+ }
+ frac2 = 10*frac2 + (c - '0');
+ }
+ fraction = (1.0e9 * frac1) + frac2;
+ }
+
+ /*
+ * Skim off the exponent.
+ */
+
+ p = pExp;
+ if ((*p == 'E') || (*p == 'e')) {
+ p += 1;
+ if (*p == '-') {
+ expSign = TRUE;
+ p += 1;
+ }
+ else {
+ if (*p == '+') {
+ p += 1;
+ }
+ expSign = FALSE;
+ }
+ while (isdigit(*p)) {
+ exp = exp * 10 + (*p - '0');
+ if (exp > 19999) {
+ exp = 19999;
+ }
+ p += 1;
+ }
+ }
+ if (expSign) {
+ exp = fracExp - exp;
+ }
+ else {
+ exp = fracExp + exp;
+ }
+
+ /*
+ * Generate a floating-point number that represents the exponent.
+ * Do this by processing the exponent one bit at a time to combine
+ * many powers of 2 of 10. Then combine the exponent with the
+ * fraction.
+ */
+
+ if (exp < 0) {
+ expSign = TRUE;
+ exp = -exp;
+ }
+ else {
+ expSign = FALSE;
+ }
+ if (exp > maxExponent) {
+ exp = maxExponent;
+ errno = ERANGE;
+ }
+ dblExp = 1.0;
+ for (d = powersOf10; exp != 0; exp >>= 1, d += 1) {
+ if (exp & 01) {
+ dblExp *= *d;
+ }
+ }
+ if (expSign) {
+ fraction /= dblExp;
+ }
+ else {
+ fraction *= dblExp;
+ }
+
+done:
+ if (endPtr != NULL) {
+ *endPtr = (char *) p;
+ }
+
+ if (sign) {
+ return -fraction;
+ }
+ return fraction;
+}