diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 1999-01-20 04:59:39 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 1999-01-20 04:59:39 +0000 |
commit | 210367ec889f5910e270d6ea2c7ddb8a8d939e61 (patch) | |
tree | feb35473da45947378fbc02defe39bcd79ef600e /string.c | |
parent | 9c5b1986a36c7a700b4c76817e35aa874ba7907c (diff) | |
download | ruby-210367ec889f5910e270d6ea2c7ddb8a8d939e61.tar.gz |
This commit was generated by cvs2svn to compensate for changes in r372,
which included commits to RCS files with non-trunk default branches.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@373 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 2015 |
1 files changed, 1117 insertions, 898 deletions
@@ -6,7 +6,7 @@ $Date$ created at: Mon Aug 9 17:12:58 JST 1993 - Copyright (C) 1993-1996 Yukihiro Matsumoto + Copyright (C) 1993-1998 Yukihiro Matsumoto ************************************************/ @@ -16,32 +16,28 @@ #define BEG(no) regs->beg[no] #define END(no) regs->end[no] -#include <stdio.h> #include <ctype.h> + #ifdef HAVE_UNISTD_H #include <unistd.h> #endif -VALUE cString; - -#define STRLEN(s) RSTRING(s)->len +VALUE rb_cString; #define STR_FREEZE FL_USER1 -#define STR_TAINT FL_USER2 -void reg_prepare_re _((VALUE)); -void kcode_reset_option _((void)); +#define STR_NO_ORIG FL_USER3 + +extern VALUE rb_rs; VALUE -str_new(ptr, len) - UCHAR *ptr; - UINT len; +rb_str_new(ptr, len) + char *ptr; + size_t len; { NEWOBJ(str, struct RString); - OBJSETUP(str, cString, T_STRING); + OBJSETUP(str, rb_cString, T_STRING); - if (rb_safe_level() >= 3) { - FL_SET(str, STR_TAINT); - } + str->ptr = 0; str->len = len; str->orig = 0; str->ptr = ALLOC_N(char,len+1); @@ -53,57 +49,98 @@ str_new(ptr, len) } VALUE -str_new2(ptr) - UCHAR *ptr; +rb_str_new2(ptr) + char *ptr; +{ + return rb_str_new(ptr, strlen(ptr)); +} + +VALUE +rb_tainted_str_new(ptr, len) + char *ptr; + size_t len; { - return str_new(ptr, strlen(ptr)); + return rb_obj_taint(rb_str_new(ptr, len)); } VALUE -str_new3(str) +rb_tainted_str_new2(ptr) + char *ptr; +{ + return rb_obj_taint(rb_str_new2(ptr)); +} + +VALUE +rb_str_new3(str) VALUE str; { NEWOBJ(str2, struct RString); - OBJSETUP(str2, cString, T_STRING); + OBJSETUP(str2, rb_cString, T_STRING); str2->len = RSTRING(str)->len; str2->ptr = RSTRING(str)->ptr; str2->orig = str; - if (rb_safe_level() >= 3) { - FL_SET(str2, STR_TAINT); - } - return (VALUE)str2; } VALUE -str_new4(orig) +rb_str_new4(orig) VALUE orig; { - NEWOBJ(str, struct RString); - OBJSETUP(str, cString, T_STRING); - - str->len = RSTRING(orig)->len; - str->ptr = RSTRING(orig)->ptr; - if (RSTRING(orig)->orig) { - str->orig = RSTRING(orig)->orig; + if (FL_TEST(orig, STR_FREEZE)) { + return orig; + } + else if (RSTRING(orig)->orig && !FL_TEST(orig, STR_NO_ORIG)) { + return rb_str_freeze(RSTRING(orig)->orig); } else { + NEWOBJ(str, struct RString); + OBJSETUP(str, rb_cString, T_STRING); + + str->len = RSTRING(orig)->len; + str->ptr = RSTRING(orig)->ptr; RSTRING(orig)->orig = (VALUE)str; str->orig = 0; + if (rb_safe_level() >= 3) { + FL_SET(str, FL_TAINT); + } + return (VALUE)str; } - if (rb_safe_level() >= 3) { - FL_SET(str, STR_TAINT); - } +} - return (VALUE)str; +VALUE +rb_str_to_str(str) + VALUE str; +{ + return rb_convert_type(str, T_STRING, "String", "to_str"); +} + +static void +rb_str_assign(str, str2) + VALUE str, str2; +{ + if (str == str2) return; + if (NIL_P(str2)) { + RSTRING(str)->ptr = 0; + RSTRING(str)->len = 0; + RSTRING(str)->orig = 0; + return; + } + if ((!RSTRING(str)->orig||FL_TEST(str, STR_NO_ORIG))&&RSTRING(str)->ptr) + free(RSTRING(str)->ptr); + RSTRING(str)->ptr = RSTRING(str2)->ptr; + RSTRING(str)->len = RSTRING(str2)->len; + RSTRING(str)->orig = RSTRING(str2)->orig; + RSTRING(str2)->ptr = 0; /* abandon str2 */ + RSTRING(str2)->len = 0; + if (OBJ_TAINTED(str2)) OBJ_TAINT(str); } -static ID pr_str; +static ID to_str; VALUE -obj_as_string(obj) +rb_obj_as_string(obj) VALUE obj; { VALUE str; @@ -111,111 +148,130 @@ obj_as_string(obj) if (TYPE(obj) == T_STRING) { return obj; } - str = rb_funcall(obj, pr_str, 0); + str = rb_funcall(obj, to_str, 0); if (TYPE(str) != T_STRING) - return any_to_s(obj); + return rb_any_to_s(obj); + if (OBJ_TAINTED(obj)) OBJ_TAINT(str); return str; } static VALUE -str_clone(orig) +rb_str_clone(orig) VALUE orig; { VALUE str; - if (RSTRING(orig)->orig) - str = str_new3(RSTRING(orig)->orig); + if (RSTRING(orig)->orig && !FL_TEST(orig, STR_NO_ORIG)) + str = rb_str_new3(RSTRING(orig)->orig); else - str = str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); + str = rb_str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); + if (RSTRING(orig)->orig && FL_TEST(orig, STR_NO_ORIG)) + RSTRING(str)->orig = RSTRING(orig)->orig; CLONESETUP(str, orig); return str; } VALUE -str_dup(str) +rb_str_dup(str) VALUE str; { - VALUE s = str_new(RSTRING(str)->ptr, RSTRING(str)->len); - if (str_tainted(str)) s = str_taint(s); + VALUE s; + + if (TYPE(str) != T_STRING) str = rb_str_to_str(str); + s = rb_str_new(RSTRING(str)->ptr, RSTRING(str)->len); + if (OBJ_TAINTED(str)) OBJ_TAINT(s); + return s; } static VALUE -str_s_new(class, orig) - VALUE class; +rb_str_s_new(klass, orig) + VALUE klass; VALUE orig; { NEWOBJ(str, struct RString); - OBJSETUP(str, class, T_STRING); + OBJSETUP(str, klass, T_STRING); - orig = obj_as_string(orig); + str->orig = 0; + orig = rb_obj_as_string(orig); str->len = RSTRING(orig)->len; - str->ptr = ALLOC_N(char, RSTRING(orig)->len+1); - if (str->ptr) { + if (RSTRING(orig)->ptr) { + str->ptr = ALLOC_N(char, RSTRING(orig)->len+1); memcpy(str->ptr, RSTRING(orig)->ptr, RSTRING(orig)->len); + str->ptr[RSTRING(orig)->len] = '\0'; } - str->ptr[RSTRING(orig)->len] = '\0'; - str->orig = 0; if (rb_safe_level() >= 3) { - FL_SET(str, STR_TAINT); + FL_SET(str, FL_TAINT); } + rb_obj_call_init((VALUE)str); return (VALUE)str; } static VALUE -str_length(str) +rb_str_length(str) VALUE str; { return INT2FIX(RSTRING(str)->len); } +static VALUE +rb_str_empty(str) + VALUE str; +{ + if (RSTRING(str)->len == 0) + return Qtrue; + return Qfalse; +} + VALUE -str_plus(str1, str2) +rb_str_plus(str1, str2) VALUE str1, str2; { VALUE str3; - str2 = obj_as_string(str2); - str3 = str_new(0, RSTRING(str1)->len+RSTRING(str2)->len); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len); memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len); - memcpy(RSTRING(str3)->ptr+RSTRING(str1)->len, RSTRING(str2)->ptr, RSTRING(str2)->len); + memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len, + RSTRING(str2)->ptr, RSTRING(str2)->len); RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0'; - if (str_tainted(str1) || str_tainted(str2)) - return str_taint(str3); - return (VALUE)str3; + if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) + OBJ_TAINT(str3); + return str3; } VALUE -str_times(str, times) +rb_str_times(str, times) VALUE str; VALUE times; { VALUE str2; - int i, len; + size_t i, len; len = NUM2INT(times); if (len < 0) { - ArgError("negative argument"); + rb_raise(rb_eArgError, "negative argument"); } - str2 = str_new(0, RSTRING(str)->len*len); + str2 = rb_str_new(0, RSTRING(str)->len*len); for (i=0; i<len; i++) { - memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len), RSTRING(str)->ptr, RSTRING(str)->len); + memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len), + RSTRING(str)->ptr, RSTRING(str)->len); } RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0'; - if (str_tainted(str)) { - return str_taint((VALUE)str2); + if (OBJ_TAINTED(str)) { + OBJ_TAINT(str2); } return str2; } -VALUE -str_format(str, arg) +static VALUE +rb_str_format(str, arg) VALUE str, arg; { VALUE *argv; @@ -224,44 +280,47 @@ str_format(str, arg) argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1); argv[0] = str; MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len); - return f_sprintf(RARRAY(arg)->len+1, argv); + return rb_f_sprintf(RARRAY(arg)->len+1, argv); } argv = ALLOCA_N(VALUE, 2); argv[0] = str; argv[1] = arg; - return f_sprintf(2, argv); + return rb_f_sprintf(2, argv); } VALUE -str_substr(str, start, len) +rb_str_substr(str, start, len) VALUE str; - int start, len; + size_t start, len; { - struct RString *str2; + VALUE str2; if (start < 0) { start = RSTRING(str)->len + start; } if (RSTRING(str)->len <= start || len < 0) { - return str_new(0,0); + return rb_str_new(0,0); } if (RSTRING(str)->len < start + len) { len = RSTRING(str)->len - start; } - return str_new(RSTRING(str)->ptr+start, len); + str2 = rb_str_new(RSTRING(str)->ptr+start, len); + if (OBJ_TAINTED(str)) OBJ_TAINT(str2); + + return str2; } static VALUE -str_subseq(str, beg, end) +rb_str_subseq(str, beg, end) VALUE str; - int beg, end; + size_t beg, end; { - int len; + size_t len; if ((beg > 0 && end > 0 || beg < 0 && end < 0) && beg > end) { - IndexError("end smaller than beg [%d..%d]", beg, end); + rb_raise(rb_eIndexError, "end smaller than beg [%d..%d]", beg, end); } if (beg < 0) { @@ -277,7 +336,7 @@ str_subseq(str, beg, end) } if (beg >= RSTRING(str)->len) { - return str_new(0, 0); + return rb_str_new(0, 0); } len = end - beg + 1; @@ -285,24 +344,20 @@ str_subseq(str, beg, end) len = 0; } - return str_substr(str, beg, len); + return rb_str_substr(str, beg, len); } -extern VALUE ignorecase; - void -str_modify(str) +rb_str_modify(str) VALUE str; { - UCHAR *ptr; + char *ptr; - if (rb_safe_level() >= 5) { - extern VALUE eSecurityError; - Raise(eSecurityError, "cannot change string status"); - } if (FL_TEST(str, STR_FREEZE)) - TypeError("can't modify frozen string"); - if (!RSTRING(str)->orig) return; + rb_raise(rb_eTypeError, "can't modify frozen string"); + if (rb_safe_level() >= 4 && !FL_TEST(str, FL_TAINT)) + rb_raise(rb_eSecurityError, "Insecure: can't modify string"); + if (!RSTRING(str)->orig || FL_TEST(str, STR_NO_ORIG)) return; ptr = RSTRING(str)->ptr; RSTRING(str)->ptr = ALLOC_N(char, RSTRING(str)->len+1); if (RSTRING(str)->ptr) { @@ -313,7 +368,7 @@ str_modify(str) } VALUE -str_freeze(str) +rb_str_freeze(str) VALUE str; { FL_SET(str, STR_FREEZE); @@ -321,48 +376,32 @@ str_freeze(str) } static VALUE -str_frozen_p(str) +rb_str_frozen_p(str) VALUE str; { if (FL_TEST(str, STR_FREEZE)) - return TRUE; - return FALSE; -} - -VALUE -str_dup_freezed(str) - VALUE str; -{ - str = str_dup(str); - str_freeze(str); - return str; + return Qtrue; + return Qfalse; } VALUE -str_taint(str) +rb_str_dup_frozen(str) VALUE str; { - if (TYPE(str) == T_STRING) { - FL_SET(str, STR_TAINT); + if (RSTRING(str)->orig && !FL_TEST(str, STR_NO_ORIG)) { + return rb_str_freeze(RSTRING(str)->orig); } - return str; -} - -VALUE -str_tainted(str) - VALUE str; -{ - if (FL_TEST(str, STR_TAINT)) - return TRUE; - return FALSE; + if (FL_TEST(str, STR_FREEZE)) + return str; + return rb_str_freeze(rb_str_dup(str)); } VALUE -str_resize(str, len) +rb_str_resize(str, len) VALUE str; - int len; + size_t len; { - str_modify(str); + rb_str_modify(str); if (len >= 0) { if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) { @@ -371,17 +410,17 @@ str_resize(str, len) RSTRING(str)->len = len; RSTRING(str)->ptr[len] = '\0'; /* sentinel */ } - return (VALUE)str; + return str; } VALUE -str_cat(str, ptr, len) +rb_str_cat(str, ptr, len) VALUE str; - UCHAR *ptr; - UINT len; + char *ptr; + size_t len; { if (len > 0) { - str_modify(str); + rb_str_modify(str); REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len + len + 1); if (ptr) memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len); @@ -391,24 +430,30 @@ str_cat(str, ptr, len) return str; } -static VALUE -str_concat(str1, str2) +VALUE +rb_str_concat(str1, str2) VALUE str1, str2; { - str2 = obj_as_string(str2); - str_cat(str1, RSTRING(str2)->ptr, RSTRING(str2)->len); - return str1; + if (FIXNUM_P(str2)) { + int i = FIX2INT(str2); + if (0 <= i && i <= 0xff) { /* byte */ + char c = i; + return rb_str_cat(str1, &c, 1); + } + } + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + return rb_str_cat(str1, RSTRING(str2)->ptr, RSTRING(str2)->len); } int -str_hash(str) +rb_str_hash(str) VALUE str; { - register int len = RSTRING(str)->len; - register UCHAR *p = RSTRING(str)->ptr; + register size_t len = RSTRING(str)->len; + register char *p = RSTRING(str)->ptr; register int key = 0; - if (RTEST(ignorecase)) { + if (ruby_ignorecase) { while (len--) { key = key*65599 + toupper(*p); p++; @@ -424,75 +469,79 @@ str_hash(str) } static VALUE -str_hash_method(str) +rb_str_hash_method(str) VALUE str; { - int key = str_hash(str); + int key = rb_str_hash(str); return INT2FIX(key); } -#define min(a,b) (((a)>(b))?(b):(a)) +#define lesser(a,b) (((a)>(b))?(b):(a)) int -str_cmp(str1, str2) +rb_str_cmp(str1, str2) VALUE str1, str2; { - UINT len; + size_t len; int retval; - if (RTEST(ignorecase)) { - return str_cicmp(str1, str2); + if (ruby_ignorecase) { + return rb_str_cicmp(str1, str2); } - len = min(RSTRING(str1)->len, RSTRING(str2)->len); + len = lesser(RSTRING(str1)->len, RSTRING(str2)->len); retval = memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len); if (retval == 0) { - return RSTRING(str1)->ptr[len] - RSTRING(str2)->ptr[len]; + if (RSTRING(str1)->len == RSTRING(str2)->len) return 0; + if (RSTRING(str1)->len > RSTRING(str2)->len) return 1; + return -1; } - return retval; + if (retval == 0) return 0; + if (retval > 0) return 1; + return -1; } static VALUE -str_equal(str1, str2) +rb_str_equal(str1, str2) VALUE str1, str2; { if (TYPE(str2) != T_STRING) - return FALSE; + return Qfalse; if (RSTRING(str1)->len == RSTRING(str2)->len - && str_cmp(str1, str2) == 0) { - return TRUE; + && rb_str_cmp(str1, str2) == 0) { + return Qtrue; } - return FALSE; + return Qfalse; } static VALUE -str_cmp_method(str1, str2) +rb_str_cmp_method(str1, str2) VALUE str1, str2; { int result; - str2 = obj_as_string(str2); - result = str_cmp(str1, str2); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + result = rb_str_cmp(str1, str2); return INT2FIX(result); } static VALUE -str_match(x, y) +rb_str_match(x, y) VALUE x, y; { VALUE reg; - int start; + size_t start; switch (TYPE(y)) { case T_REGEXP: - return reg_match(y, x); + return rb_reg_match(y, x); case T_STRING: - reg = reg_regcomp(y); - start = reg_search(reg, x, 0, 0); + reg = rb_reg_regcomp(y); + start = rb_reg_search(reg, x, 0, 0); if (start == -1) { - return FALSE; + return Qfalse; } return INT2FIX(start); @@ -502,19 +551,19 @@ str_match(x, y) } static VALUE -str_match2(str) +rb_str_match2(str) VALUE str; { - return reg_match2(reg_regcomp(str)); + return rb_reg_match2(rb_reg_regcomp(str)); } -static int -str_index(str, sub, offset) +static size_t +rb_str_index(str, sub, offset) VALUE str, sub; - int offset; + size_t offset; { - UCHAR *s, *e, *p; - int len; + char *s, *e, *p; + size_t len; if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1; s = RSTRING(str)->ptr+offset; @@ -527,21 +576,21 @@ str_index(str, sub, offset) } s++; } - return -1; + return (size_t)-1; } static VALUE -str_index_method(argc, argv, str) +rb_str_index_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE sub; VALUE initpos; - int pos; + size_t pos; if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { - pos = NUM2INT(initpos); + pos = NUM2UINT(initpos); } else { pos = 0; @@ -549,17 +598,17 @@ str_index_method(argc, argv, str) switch (TYPE(sub)) { case T_REGEXP: - pos = reg_search(sub, str, pos, (struct re_registers *)-1); + pos = rb_reg_search(sub, str, pos, 0); break; case T_STRING: - pos = str_index(str, sub, pos); + pos = rb_str_index(str, sub, pos); break; case T_FIXNUM: { int c = FIX2INT(sub); - int len = RSTRING(str)->len; + size_t len = RSTRING(str)->len; char *p = RSTRING(str)->ptr; for (;pos<len;pos++) { @@ -569,7 +618,8 @@ str_index_method(argc, argv, str) } default: - TypeError("Type mismatch: %s given", rb_class2name(CLASS_OF(sub))); + rb_raise(rb_eTypeError, "Type mismatch: %s given", + rb_class2name(CLASS_OF(sub))); } if (pos == -1) return Qnil; @@ -577,18 +627,18 @@ str_index_method(argc, argv, str) } static VALUE -str_rindex(argc, argv, str) +rb_str_rindex(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE sub; VALUE initpos; - int pos, len; - UCHAR *s, *sbeg, *t; + size_t pos, len; + char *s, *sbeg, *t; if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { - pos = NUM2INT(initpos); + pos = NUM2UINT(initpos); if (pos >= RSTRING(str)->len) pos = RSTRING(str)->len; } else { @@ -597,11 +647,7 @@ str_rindex(argc, argv, str) switch (TYPE(sub)) { case T_REGEXP: - reg_prepare_re(sub); - pos = re_search(RREGEXP(sub)->ptr, - RSTRING(str)->ptr, RSTRING(str)->len, - pos, -pos, 0); - kcode_reset_option(); + pos = rb_reg_search(sub, str, pos, 1); if (pos >= 0) return INT2FIX(pos); break; @@ -631,14 +677,15 @@ str_rindex(argc, argv, str) } default: - TypeError("Type mismatch: %s given", rb_class2name(CLASS_OF(sub))); + rb_raise(rb_eTypeError, "Type mismatch: %s given", + rb_class2name(CLASS_OF(sub))); } return Qnil; } -static UCHAR +static char succ_char(s) - UCHAR *s; + char *s; { char c = *s; @@ -662,19 +709,19 @@ succ_char(s) } static VALUE -str_succ(orig) +rb_str_succ(orig) VALUE orig; { VALUE str, str2; - UCHAR *sbeg, *s; + char *sbeg, *s; char c = -1; - str = str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); + str = rb_str_new(RSTRING(orig)->ptr, RSTRING(orig)->len); sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1; while (sbeg <= s) { - if (isalnum(*s) && (c = succ_char(s)) == 0) break; + if (ISALNUM(*s) && (c = succ_char(s)) == 0) break; s--; } if (s < sbeg) { @@ -682,35 +729,45 @@ str_succ(orig) RSTRING(str)->ptr[RSTRING(str)->len-1] += 1; } else { - str2 = str_new(0, RSTRING(str)->len+1); + str2 = rb_str_new(0, RSTRING(str)->len+1); RSTRING(str2)->ptr[0] = c; memcpy(RSTRING(str2)->ptr+1, RSTRING(str)->ptr, RSTRING(str)->len); str = str2; } } - if (str_tainted(orig)) { - return str_taint(str); + if (OBJ_TAINTED(orig)) { + OBJ_TAINT(str); } return str; } +static VALUE +rb_str_succ_bang(str) + VALUE str; +{ + rb_str_modify(str); + rb_str_assign(str, rb_str_succ(str)); + + return str; +} + VALUE -str_upto(beg, end) +rb_str_upto(beg, end) VALUE beg, end; { VALUE current; - Check_Type(end, T_STRING); + if (TYPE(end) != T_STRING) end = rb_str_to_str(end); if (RTEST(rb_funcall(beg, '>', 1, end))) return Qnil; current = beg; for (;;) { rb_yield(current); - if (str_equal(current, end)) break; - current = str_succ(current); + if (rb_str_equal(current, end)) break; + current = rb_str_succ(current); if (RSTRING(current)->len > RSTRING(end)->len) break; } @@ -719,11 +776,11 @@ str_upto(beg, end) } static VALUE -str_aref(str, indx) +rb_str_aref(str, indx) VALUE str; VALUE indx; { - int idx; + size_t idx; switch (TYPE(indx)) { case T_FIXNUM: @@ -735,31 +792,32 @@ str_aref(str, indx) if (idx < 0 || RSTRING(str)->len <= idx) { return Qnil; } - return (VALUE)INT2FIX(RSTRING(str)->ptr[idx] & 0xff); + return INT2FIX(RSTRING(str)->ptr[idx] & 0xff); case T_REGEXP: - if (str_match(str, indx)) - return reg_last_match(0); + if (rb_reg_search(indx, str, 0, 0) >= 0) + return rb_reg_last_match(rb_backref_get()); return Qnil; case T_STRING: - if (str_index(str, indx, 0) != -1) return indx; + if (rb_str_index(str, indx, 0) != (size_t)-1) return indx; return Qnil; default: /* check if indx is Range */ { - int beg, end; - if (range_beg_end(indx, &beg, &end)) { - return str_subseq(str, beg, end); + size_t beg, end; + if (rb_range_beg_end(indx, &beg, &end)) { + return rb_str_subseq(str, beg, end); } } - IndexError("Invalid index for string"); + rb_raise(rb_eIndexError, "Invalid index for string"); } + return Qnil; /* not reached */ } static VALUE -str_aref_method(argc, argv, str) +rb_str_aref_method(argc, argv, str) int argc; VALUE *argv; VALUE str; @@ -767,15 +825,15 @@ str_aref_method(argc, argv, str) VALUE arg1, arg2; if (rb_scan_args(argc, argv, "11", &arg1, &arg2) == 2) { - return str_substr(str, NUM2INT(arg1), NUM2INT(arg2)); + return rb_str_substr(str, NUM2INT(arg1), NUM2INT(arg2)); } - return str_aref(str, arg1); + return rb_str_aref(str, arg1); } static void -str_replace(str, beg, len, val) +rb_str_replace(str, beg, len, val) VALUE str, val; - int beg, len; + size_t beg, len; { if (len < RSTRING(val)->len) { /* expand string */ @@ -783,25 +841,28 @@ str_replace(str, beg, len, val) } if (len != RSTRING(val)->len) { - memmove(RSTRING(str)->ptr+beg+RSTRING(val)->len, - RSTRING(str)->ptr+beg+len, - RSTRING(str)->len-(beg+len)); + memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len, + RSTRING(str)->ptr + beg + len, + RSTRING(str)->len - (beg + len)); + } + if (RSTRING(str)->len < beg && len < 0) { + MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len); } memcpy(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len); RSTRING(str)->len += RSTRING(val)->len - len; RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; } -/* str_replace2() understands negatice offset */ +/* rb_str_replace2() understands negatice offset */ static void -str_replace2(str, beg, end, val) - VALUE str, *val; - int beg, end; +rb_str_replace2(str, beg, end, val) + VALUE str, val; + size_t beg, end; { - int len; + size_t len; if ((beg > 0 && end > 0 || beg < 0 && end < 0) && beg > end) { - IndexError("end smaller than beg [%d..%d]", beg, end); + rb_raise(rb_eIndexError, "end smaller than beg [%d..%d]", beg, end); } if (beg < 0) { @@ -827,307 +888,295 @@ str_replace2(str, beg, end, val) len = 0; } - str_replace(str, beg, len, val); + rb_str_replace(str, beg, len, val); } -static VALUE -str_sub_s(str, pat, val, once) - VALUE str, pat, val; - int once; -{ - VALUE result, repl; - int beg, offset, n; - struct re_registers *regs; - - switch (TYPE(pat)) { - case T_REGEXP: - break; - - case T_STRING: - pat = reg_regcomp(pat); - break; - - default: - /* type failed */ - Check_Type(pat, T_REGEXP); - } - - val = obj_as_string(val); - result = str_new(0,0); - offset=0; n=0; - while ((beg=reg_search(pat, str, offset, 0)) >= 0) { - n++; - - regs = RMATCH(backref_get())->regs; - str_cat(result, RSTRING(str)->ptr+offset, beg-offset); - - repl = reg_regsub(val, str, regs); - str_cat(result, RSTRING(repl)->ptr, RSTRING(repl)->len); - if (END(0) == offset) { - /* - * Always consume at least one character of the input string - * in order to prevent infinite loops. - */ - if (RSTRING(str)->len > 0) { - str_cat(result, RSTRING(str)->ptr+END(0), 1); - } - offset = END(0)+1; - } - else { - offset = END(0); - } - - if (once) break; - if (offset >= STRLEN(str)) break; - } - if (n == 0) return Qnil; - if (RSTRING(str)->len > offset) { - str_cat(result, RSTRING(str)->ptr+offset, RSTRING(str)->len-offset); - } - - if (str_tainted(val)) str_taint(result); - return result; -} +static VALUE rb_str_sub_bang _((int, VALUE*, VALUE)); static VALUE -str_sub_f(str, pat, val, once) - VALUE str; - VALUE pat; - VALUE val; - int once; -{ - VALUE result; - - str_modify(str); - result = str_sub_s(str, pat, val, once); - - if (NIL_P(result)) return Qnil; - str_resize(str, RSTRING(result)->len); - memcpy(RSTRING(str)->ptr, RSTRING(result)->ptr, RSTRING(result)->len); - if (str_tainted(result)) str_taint(str); - - return (VALUE)str; -} - -static VALUE -str_sub_iter_s(str, pat, once) - VALUE str; - VALUE pat; - int once; -{ - VALUE val, result; - int beg, offset, n, null; - struct re_registers *regs; - - if (!iterator_p()) { - ArgError("Wrong # of arguments(1 for 2)"); - } - - switch (TYPE(pat)) { - case T_REGEXP: - break; - - case T_STRING: - pat = reg_regcomp(pat); - break; - - default: - /* type failed */ - Check_Type(pat, T_REGEXP); - } - - result = str_new(0,0); - n = 0; offset = 0; - while ((beg=reg_search(pat, str, offset, 0)) >= 0) { - n++; - - null = 0; - str_cat(result, RSTRING(str)->ptr+offset, beg-offset); - - regs = RMATCH(backref_get())->regs; - if (END(0) == offset) { - null = 1; - offset = END(0)+1; - } - else { - offset = END(0); - } - - val = rb_yield(reg_nth_match(0, backref_get())); - val = obj_as_string(val); - str_cat(result, RSTRING(val)->ptr, RSTRING(val)->len); - if (null && RSTRING(str)->len) { - str_cat(result, RSTRING(str)->ptr+offset-1, 1); - } - - if (once) break; - if (offset >= STRLEN(str)) break; - } - if (n == 0) return Qnil; - if (RSTRING(str)->len > offset) { - str_cat(result, RSTRING(str)->ptr+offset, RSTRING(str)->len-offset); - } - - return result; -} - -static VALUE -str_sub_iter_f(str, pat, once) - VALUE str; - VALUE pat; - int once; -{ - VALUE result; - - str_modify(str); - result = str_sub_iter_s(str, pat, once); - - if (NIL_P(result)) return Qnil; - str_resize(str, RSTRING(result)->len); - memcpy(RSTRING(str)->ptr, RSTRING(result)->ptr, RSTRING(result)->len); - - return (VALUE)str; -} - -static VALUE -str_aset(str, indx, val) +rb_str_aset(str, indx, val) VALUE str; VALUE indx, val; { - int idx, beg, end, offset; + size_t idx, beg, end; switch (TYPE(indx)) { case T_FIXNUM: - idx = NUM2INT(indx); + idx = NUM2UINT(indx); if (idx < 0) { idx = RSTRING(str)->len + idx; } if (idx < 0 || RSTRING(str)->len <= idx) { - IndexError("index %d out of range [0..%d]", idx, RSTRING(str)->len-1); + rb_raise(rb_eIndexError, "index %d out of range [0..%d]", idx, + RSTRING(str)->len - 1); + } + if (TYPE(val) == T_STRING) { + rb_str_replace(str, idx, 1, val); + } + else { + RSTRING(str)->ptr[idx] = NUM2INT(val) & 0xff; } - RSTRING(str)->ptr[idx] = FIX2INT(val) & 0xff; return val; case T_REGEXP: - str_sub_f(str, indx, val, 0); + { + VALUE args[2]; + args[0] = indx; + args[1] = val; + rb_str_sub_bang(2, args, str); + } return val; case T_STRING: - for (offset=0; - (beg=str_index(str, indx, offset)) >= 0; - offset=beg+STRLEN(val)) { - end = beg + STRLEN(indx) - 1; - str_replace2(str, beg, end, val); + beg = rb_str_index(str, indx, 0); + if (beg != (size_t)-1) { + end = beg + RSTRING(indx)->len - 1; + rb_str_replace2(str, beg, end, val); } - if (offset == 0) return Qnil; return val; default: /* check if indx is Range */ { - int beg, end; - if (range_beg_end(indx, &beg, &end)) { - str_replace2(str, beg, end, val); + size_t beg, end; + if (rb_range_beg_end(indx, &beg, &end)) { + if (TYPE(val) != T_STRING) val = rb_str_to_str(val); + rb_str_replace2(str, beg, end, val); return val; } } - IndexError("Invalid index for string"); + rb_raise(rb_eIndexError, "Invalid index for string"); } } static VALUE -str_aset_method(argc, argv, str) +rb_str_aset_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE arg1, arg2, arg3; - str_modify(str); + rb_str_modify(str); if (rb_scan_args(argc, argv, "21", &arg1, &arg2, &arg3) == 3) { - int beg, len; - - Check_Type(arg3, T_STRING); + size_t beg, len; - beg = NUM2INT(arg1); + if (TYPE(arg3) != T_STRING) arg3 = rb_str_to_str(arg3); + beg = NUM2UINT(arg1); if (beg < 0) { beg = RSTRING(str)->len + beg; if (beg < 0) beg = 0; } - len = NUM2INT(arg2); - if (len < 0) IndexError("negative length %d", len); + len = NUM2UINT(arg2); + if (len < 0) rb_raise(rb_eIndexError, "negative length %d", len); if (beg + len > RSTRING(str)->len) { len = RSTRING(str)->len - beg; } - str_replace(str, beg, len, arg3); + rb_str_replace(str, beg, len, arg3); return arg3; } - return str_aset(str, arg1, arg2); + return rb_str_aset(str, arg1, arg2); } static VALUE -str_sub_bang(argc, argv, str) - int argc; - VALUE *argv; - VALUE str; +get_pat(pat) + VALUE pat; { - VALUE pat, val; + switch (TYPE(pat)) { + case T_REGEXP: + break; + + case T_STRING: + pat = rb_reg_regcomp(pat); + break; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(str, pat, 1); + default: + /* type failed */ + Check_Type(pat, T_REGEXP); } - return str_sub_f(str, pat, val, 1); + return pat; } static VALUE -str_sub(argc, argv, str) +rb_str_sub_bang(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val, v; + VALUE pat, repl, match; + struct re_registers *regs; + int iter = 0; + size_t plen; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(str, pat, 1); + if (argc == 1 && rb_iterator_p()) { + iter = 1; + } + else if (argc == 2) { + repl = rb_obj_as_string(argv[1]);; } else { - v = str_sub_s(str, pat, val, 1); + rb_raise(rb_eArgError, "Wrong # of arguments(%d for 2)", argc); } - if (NIL_P(v)) return str_dup(str); - return v; + + pat = get_pat(argv[0]); + if (rb_reg_search(pat, str, 0, 0) >= 0) { + rb_str_modify(str); + match = rb_backref_get(); + regs = RMATCH(match)->regs; + + if (iter) { + repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + repl = rb_reg_regsub(repl, str, regs); + } + plen = END(0) - BEG(0); + if (RSTRING(repl)->len > plen) { + REALLOC_N(RSTRING(str)->ptr, char, + RSTRING(str)->len + RSTRING(repl)->len - plen + 1); + } + if (RSTRING(repl)->len != plen) { + memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len, + RSTRING(str)->ptr + BEG(0) + plen, + RSTRING(str)->len - BEG(0) - plen); + } + memcpy(RSTRING(str)->ptr + BEG(0), + RSTRING(repl)->ptr, RSTRING(repl)->len); + RSTRING(str)->len += RSTRING(repl)->len - plen; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + return Qnil; } static VALUE -str_gsub_bang(argc, argv, str) +rb_str_sub(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val; + VALUE val = rb_str_sub_bang(argc, argv, str = rb_str_dup(str)); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(str, pat, 0); - } - return str_sub_f(str, pat, val, 0); + if (NIL_P(val)) return str; + return val; } static VALUE -str_gsub(argc, argv, str) +rb_str_gsub_bang(argc, argv, str) int argc; VALUE *argv; VALUE str; { - VALUE pat, val, v; + VALUE pat, val, repl, match; + struct re_registers *regs; + int beg, offset, n; + int iter = 0; + char *buf, *bp, *cp; + size_t blen, len; - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(str, pat, 0); + if (argc == 1 && rb_iterator_p()) { + iter = 1; + } + else if (argc == 2) { + repl = rb_obj_as_string(argv[1]);; } else { - v = str_sub_s(str, pat, val, 0); + rb_raise(rb_eArgError, "Wrong # of arguments(%d for 2)", argc); + } + + pat = get_pat(argv[0]); + offset=0; n=0; + beg = rb_reg_search(pat, str, 0, 0); + if (beg < 0) return Qnil; /* no match, no substitution */ + + blen = RSTRING(str)->len + 30; /* len + margin */ + buf = ALLOC_N(char, blen); + bp = buf; + cp = RSTRING(str)->ptr; + + while (beg >= 0) { + n++; + match = rb_backref_get(); + regs = RMATCH(match)->regs; + if (iter) { + val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + val = rb_reg_regsub(repl, str, regs); + } + len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3; + if (blen < len) { + while (blen < len) blen *= 2; + len = bp - buf; + REALLOC_N(buf, char, blen); + bp = buf + len; + } + len = beg - offset; /* copy pre-match substr */ + memcpy(bp, cp, len); + bp += len; + memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len); + bp += RSTRING(val)->len; + if (BEG(0) == END(0)) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + len = mbclen(RSTRING(str)->ptr[END(0)]); + if (RSTRING(str)->len > END(0)) { + memcpy(bp, RSTRING(str)->ptr+END(0), len); + bp += len; + } + offset = END(0) + len; + } + else { + offset = END(0); + } + cp = RSTRING(str)->ptr + offset; + if (offset > RSTRING(str)->len) break; + beg = rb_reg_search(pat, str, offset, 0); + } + if (RSTRING(str)->len > offset) { + len = bp - buf; + if (blen - len < RSTRING(str)->len - offset) { + REALLOC_N(buf, char, len + RSTRING(str)->len - offset + 1); + bp = buf + len; + } + memcpy(bp, cp, RSTRING(str)->len - offset); + bp += RSTRING(str)->len - offset; } - if (NIL_P(v)) return str_dup(str); - return v; + rb_str_modify(str); + free(RSTRING(str)->ptr); + RSTRING(str)->ptr = buf; + RSTRING(str)->len = len = bp - buf; + RSTRING(str)->ptr[len] = '\0'; + + return str; +} + +static VALUE +rb_str_gsub(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE val = rb_str_gsub_bang(argc, argv, str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; +} + +static VALUE +rb_str_replace_method(str, str2) + VALUE str, str2; +{ + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); + rb_str_modify(str); + rb_str_resize(str, RSTRING(str2)->len); + memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len); + if (OBJ_TAINTED(str2)) OBJ_TAINT(str); + + return str; } static VALUE @@ -1135,108 +1184,86 @@ uscore_get() { VALUE line; - line = lastline_get(); + line = rb_lastline_get(); if (TYPE(line) != T_STRING) { - TypeError("$_ value need to be String (%s given)", - rb_class2name(CLASS_OF(line))); + rb_raise(rb_eTypeError, "$_ value need to be String (%s given)", + NIL_P(line)?"nil":rb_class2name(CLASS_OF(line))); } return line; } static VALUE -f_sub_bang(argc, argv) +rb_f_sub_bang(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line; - - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(line, pat, 1); - } - return str_sub_f(line, pat, val, 1); + return rb_str_sub_bang(argc, argv, uscore_get()); } static VALUE -f_sub(argc, argv) +rb_f_sub(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line, v; + VALUE line, v; - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(line, pat, 1); - } - else { - v = str_sub_s(line, pat, val, 1); - } - if (!NIL_P(v)) { - lastline_set(v); - return v; - } - return line; + line = rb_str_dup(uscore_get()); + v = rb_str_sub_bang(argc, argv, line); + if (NIL_P(v)) return line; + rb_lastline_set(v); + return v; } static VALUE -f_gsub_bang(argc, argv) +rb_f_gsub_bang(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line; - - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - return str_sub_iter_f(line, pat, 0); - } - return str_sub_f(line, pat, val, 0); + return rb_str_gsub_bang(argc, argv, uscore_get()); } static VALUE -f_gsub(argc, argv) +rb_f_gsub(argc, argv) int argc; VALUE *argv; { - VALUE pat, val, line, v; - - line = uscore_get(); - if (rb_scan_args(argc, argv, "11", &pat, &val) == 1) { - v = str_sub_iter_s(line, pat, 0); - } - else { - v = str_sub_s(line, pat, val, 0); - } - if (NIL_P(v)) v = str_dup(line); - lastline_set(v); + VALUE line, v; + line = rb_str_dup(uscore_get()); + v = rb_str_gsub_bang(argc, argv, line); + if (NIL_P(v)) return line; + rb_lastline_set(v); return v; } static VALUE -str_reverse_bang(str) +rb_str_reverse_bang(str) VALUE str; { - UCHAR *s, *e, *p; + char *s, *e, *p, *q; s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1; - p = ALLOCA_N(char, RSTRING(str)->len); + p = q = ALLOCA_N(char, RSTRING(str)->len); while (e >= s) { *p++ = *e--; } - MEMCPY(RSTRING(str)->ptr, p, char, RSTRING(str)->len); + MEMCPY(RSTRING(str)->ptr, q, char, RSTRING(str)->len); - return (VALUE)str; + return str; } static VALUE -str_reverse(str) +rb_str_reverse(str) VALUE str; { - VALUE obj = str_new(0, RSTRING(str)->len); - UCHAR *s, *e, *p; + VALUE obj; + char *s, *e, *p; + + if (RSTRING(str)->len <= 1) return str; + obj = rb_str_new(0, RSTRING(str)->len); s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1; p = RSTRING(obj)->ptr; @@ -1248,14 +1275,14 @@ str_reverse(str) } static VALUE -str_include(str, arg) +rb_str_include(str, arg) VALUE str, arg; { - int i; + size_t i; if (FIXNUM_P(arg)) { int c = FIX2INT(arg); - int len = RSTRING(str)->len; + size_t len = RSTRING(str)->len; char *p = RSTRING(str)->ptr; for (i=0; i<len; i++) { @@ -1263,51 +1290,47 @@ str_include(str, arg) return INT2FIX(i); } } - return FALSE; + return Qfalse; } - Check_Type(arg, T_STRING); - i = str_index(str, arg, 0); + if (TYPE(arg) != T_STRING) arg = rb_str_to_str(arg); + i = rb_str_index(str, arg, 0); - if (i == -1) return FALSE; + if (i == (size_t)-1) return Qfalse; return INT2FIX(i); } static VALUE -str_to_i(str) +rb_str_to_i(str) VALUE str; { - return str2inum(RSTRING(str)->ptr, 10); + return rb_str2inum(RSTRING(str)->ptr, 10); } -#ifndef atof -double atof(); -#endif - static VALUE -str_to_f(str) +rb_str_to_f(str) VALUE str; { double f = atof(RSTRING(str)->ptr); - return float_new(f); + return rb_float_new(f); } static VALUE -str_to_s(str) +rb_str_to_s(str) VALUE str; { return str; } VALUE -str_inspect(str) +rb_str_inspect(str) VALUE str; { #define STRMAX 80 - UCHAR buf[STRMAX]; - UCHAR *p, *pend; - UCHAR *b; + char buf[STRMAX]; + char *p, *pend; + char *b; p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; b = buf; @@ -1322,23 +1345,26 @@ str_inspect(str) } while (p < pend) { - UCHAR c = *p++; + char c = *p++; if (ismbchar(c) && p < pend) { - CHECK(2); + int len = mbclen(c)-1; + + CHECK(len); *b++ = c; - *b++ = *p++; + while (len--) { + *b++ = *p++; + } } - else if (c == '"') { - CHECK(2); - *b++ = '\\'; - *b++ = '"'; + else if ((c & 0x80) && rb_kcode() != MBCTYPE_EUC) { + CHECK(1); + *b++ = c; } - else if (c == '\\') { + else if (c == '"'|| c == '\\') { CHECK(2); *b++ = '\\'; - *b++ = '\\'; + *b++ = c; } - else if (isprint(c)) { + else if (ISPRINT(c)) { CHECK(1); *b++ = c; } @@ -1362,7 +1388,7 @@ str_inspect(str) *b++ = '\\'; *b++ = 'f'; } - else if (c == '\13') { + else if (c == '\013') { CHECK(2); *b++ = '\\'; *b++ = 'v'; @@ -1380,123 +1406,248 @@ str_inspect(str) else { CHECK(4); *b++ = '\\'; - sprintf(b, "%03o", c); + sprintf(b, "%03o", c & 0377); b += 3; } } *b++ = '"'; - return str_new(buf, b - buf); + return rb_str_new(buf, b - buf); +} + +static VALUE +rb_str_dump(str) + VALUE str; +{ + size_t len; + char *p, *pend; + char *q, *qend; + VALUE result; + + len = 2; /* "" */ + p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; + while (p < pend) { + char c = *p++; + switch (c) { + case '"': case '\\': + case '\n': case '\r': + case '\t': case '\f': + case '\013': case '\007': case '\033': + len += 2; + break; + + default: + if (ISPRINT(c)) { + len++; + } + else { + len += 4; /* \nnn */ + } + break; + } + } + + result = rb_str_new(0, len); + p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; + q = RSTRING(result)->ptr; qend = q + len; + + *q++ = '"'; + while (p < pend) { + char c = *p++; + + if (c == '"' || c == '\\') { + *q++ = '\\'; + *q++ = c; + } + else if (ISPRINT(c)) { + *q++ = c; + } + else if (c == '\n') { + *q++ = '\\'; + *q++ = 'n'; + } + else if (c == '\r') { + *q++ = '\\'; + *q++ = 'r'; + } + else if (c == '\t') { + *q++ = '\\'; + *q++ = 't'; + } + else if (c == '\f') { + *q++ = '\\'; + *q++ = 'f'; + } + else if (c == '\013') { + *q++ = '\\'; + *q++ = 'v'; + } + else if (c == '\007') { + *q++ = '\\'; + *q++ = 'a'; + } + else if (c == '\033') { + *q++ = '\\'; + *q++ = 'e'; + } + else { + *q++ = '\\'; + sprintf(q, "%03o", c&0xff); + q += 3; + } + } + *q++ = '"'; + + return result; } static VALUE -str_upcase_bang(str) +rb_str_upcase_bang(str) VALUE str; { - UCHAR *s, *send; + char *s, *send; + int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (islower(*s)) { + if (ismbchar(*s)) { + s+=mbclen(*s); + } + else if (islower(*s)) { *s = toupper(*s); + modify = 1; } s++; } - return (VALUE)str; + if (modify) return str; + return Qnil; } static VALUE -str_upcase(str) +rb_str_upcase(str) VALUE str; { - return str_upcase_bang(str_dup(str)); + VALUE val = rb_str_upcase_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -str_downcase_bang(str) +rb_str_downcase_bang(str) VALUE str; { - UCHAR *s, *send; + char *s, *send; + int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s+=mbclen(*s); + } + else if (ISUPPER(*s)) { *s = tolower(*s); + modify = 1; } s++; } - return (VALUE)str; + if (modify) return str; + return Qnil; } static VALUE -str_downcase(str) +rb_str_downcase(str) VALUE str; { - return str_downcase_bang(str_dup(str)); + VALUE val = rb_str_downcase_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -str_capitalize_bang(str) +rb_str_capitalize_bang(str) VALUE str; { - UCHAR *s, *send; + char *s, *send; + int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; - if (islower(*s)) + if (ISLOWER(*s)) { *s = toupper(*s); + modify = 1; + } while (++s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s+=mbclen(*s); + } + else if (ISUPPER(*s)) { *s = tolower(*s); + modify = 1; } } - return (VALUE)str; + if (modify) return str; + return Qnil; } static VALUE -str_capitalize(str) +rb_str_capitalize(str) VALUE str; { - return str_capitalize_bang(str_dup(str)); + VALUE val = rb_str_capitalize_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -str_swapcase_bang(str) +rb_str_swapcase_bang(str) VALUE str; { - UCHAR *s, *send; + char *s, *send; + int modify = 0; - str_modify(str); + rb_str_modify(str); s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; while (s < send) { - if (isupper(*s)) { + if (ismbchar(*s)) { + s+=mbclen(*s); + } + else if (ISUPPER(*s)) { *s = tolower(*s); + modify = 1; } - else if (islower(*s)) { + else if (ISLOWER(*s)) { *s = toupper(*s); + modify = 1; } s++; } - return (VALUE)str; + if (modify) return str; + return Qnil; } static VALUE -str_swapcase(str) +rb_str_swapcase(str) VALUE str; { - return str_swapcase_bang(str_dup(str)); + VALUE val = rb_str_swapcase_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } -typedef UCHAR *USTR; +typedef unsigned char *USTR; -static struct tr { +struct tr { int gen, now, max; - UCHAR *p, *pend; -} trsrc, trrepl; + char *p, *pend; +}; static int trnext(t) @@ -1505,7 +1656,7 @@ trnext(t) for (;;) { if (!t->gen) { if (t->p == t->pend) return -1; - t->now = *t->p++; + t->now = *(USTR)t->p++; if (t->p < t->pend && *t->p == '-') { t->p++; if (t->p < t->pend) { @@ -1529,7 +1680,7 @@ trnext(t) } } -static VALUE str_delete_bang(); +static VALUE rb_str_delete_bang _((VALUE,VALUE)); static VALUE tr_trans(str, src, repl, sflag) @@ -1538,19 +1689,21 @@ tr_trans(str, src, repl, sflag) { struct tr trsrc, trrepl; int cflag = 0; - UCHAR trans[256]; - int i, c, c0; - UCHAR *s, *send, *t; + char trans[256]; + int i, c, c0, modify = 0; + char *s, *send; - Check_Type(src, T_STRING); + rb_str_modify(str); + if (TYPE(src) != T_STRING) src = rb_str_to_str(src); trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len; if (RSTRING(src)->len > 2 && RSTRING(src)->ptr[0] == '^') { cflag++; trsrc.p++; } - Check_Type(repl, T_STRING); - if (RSTRING(repl)->len == 0) return str_delete_bang(str, src); - trrepl.p = RSTRING(repl)->ptr; trrepl.pend = trrepl.p + RSTRING(repl)->len; + if (TYPE(repl) != T_STRING) repl = rb_str_to_str(repl); + if (RSTRING(repl)->len == 0) return rb_str_delete_bang(str, src); + trrepl.p = RSTRING(repl)->ptr; + trrepl.pend = trrepl.p + RSTRING(repl)->len; trsrc.gen = trrepl.gen = 0; trsrc.now = trrepl.now = 0; trsrc.max = trrepl.max = 0; @@ -1590,48 +1743,64 @@ tr_trans(str, src, repl, sflag) } } - str_modify(str); - t = s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; c0 = -1; if (sflag) { + char *t = s; + while (s < send) { c = trans[*s++ & 0xff] & 0xff; if (s[-1] == c || c != c0) { c0 = (s[-1] == c)?-1:c; + if (*t != c) { + *t = c; + modify = 1; + } *t++ = c; } } + if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) { + RSTRING(str)->len = (t - RSTRING(str)->ptr); + modify = 1; + *t = '\0'; + } } else { while (s < send) { - c = trans[*s++ & 0xff] & 0xff; - *t++ = c; + c = trans[*s & 0xff] & 0xff; + if (*s != c) { + *s = c; + modify = 1; + } + s++; } } - *t = '\0'; - if (sflag) RSTRING(str)->len = (t - RSTRING(str)->ptr); - return (VALUE)str; + if (modify) return str; + return Qnil; } static VALUE -str_tr_bang(str, src, repl) +rb_str_tr_bang(str, src, repl) VALUE str, src, repl; { return tr_trans(str, src, repl, 0); } static VALUE -str_tr(str, src, repl) +rb_str_tr(str, src, repl) VALUE str, src, repl; { - return tr_trans(str_dup(str), src, repl, 0); + VALUE val = tr_trans(str = rb_str_dup(str), src, repl, 0); + + if (NIL_P(val)) return str; + return val; } static void tr_setup_table(str, table) VALUE str; - UCHAR table[256]; + char table[256]; { struct tr tr; int i, cflag = 0; @@ -1653,45 +1822,51 @@ tr_setup_table(str, table) } static VALUE -str_delete_bang(str1, str2) - VALUE str1, *str2; +rb_str_delete_bang(str1, str2) + VALUE str1, str2; { - UCHAR *s, *send, *t; - UCHAR squeez[256]; + char *s, *send, *t; + char squeez[256]; + int modify = 0; - Check_Type(str2, T_STRING); + if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); tr_setup_table(str2, squeez); - str_modify(str1); + rb_str_modify(str1); s = t = RSTRING(str1)->ptr; send = s + RSTRING(str1)->len; while (s < send) { - if (!squeez[*s & 0xff]) { + if (squeez[*s & 0xff]) + modify = 1; + else *t++ = *s; - } s++; } *t = '\0'; RSTRING(str1)->len = t - RSTRING(str1)->ptr; - return (VALUE)str1; + if (modify) return str1; + return Qnil; } static VALUE -str_delete(str1, str2) - VALUE str1, *str2; +rb_str_delete(str1, str2) + VALUE str1, str2; { - return str_delete_bang(str_dup(str1), str2); + VALUE val = rb_str_delete_bang(str1 = rb_str_dup(str1), str2); + + if (NIL_P(val)) return str1; + return val; } static VALUE tr_squeeze(str1, str2) VALUE str1, str2; { - UCHAR squeez[256]; - UCHAR *s, *send, *t; - char c, save; + char squeez[256]; + char *s, *send, *t; + char c, save, modify = 0; if (!NIL_P(str2)) { tr_setup_table(str2, squeez); @@ -1704,7 +1879,7 @@ tr_squeeze(str1, str2) } } - str_modify(str1); + rb_str_modify(str1); s = t = RSTRING(str1)->ptr; send = s + RSTRING(str1)->len; @@ -1713,78 +1888,82 @@ tr_squeeze(str1, str2) c = *s++ & 0xff; if (c != save || !squeez[c & 0xff]) { *t++ = save = c; + modify = 1; } } *t = '\0'; RSTRING(str1)->len = t - RSTRING(str1)->ptr; - return (VALUE)str1; + if (modify) return str1; + return Qnil; } static VALUE -str_squeeze_bang(argc, argv, str1) +rb_str_squeeze_bang(argc, argv, str1) int argc; VALUE *argv; VALUE str1; { VALUE str2; - if (rb_scan_args(argc, argv, "01", &str2) == 1) { - Check_Type(str2, T_STRING); + if (rb_scan_args(argc, argv, "01", &str2) == 1 && TYPE(str2) != T_STRING) { + str2 = rb_str_to_str(str2); } return tr_squeeze(str1, str2); } static VALUE -str_squeeze(argc, argv, str) +rb_str_squeeze(argc, argv, str) int argc; VALUE *argv; VALUE str; { - return str_squeeze_bang(argc, argv, str_dup(str)); + VALUE val = rb_str_squeeze_bang(argc, argv, str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -str_tr_s_bang(str, src, repl) +rb_str_tr_s_bang(str, src, repl) VALUE str, src, repl; { - Check_Type(src, T_STRING); - Check_Type(repl, T_STRING); - return tr_trans(str, src, repl, 1); } static VALUE -str_tr_s(str, src, repl) +rb_str_tr_s(str, src, repl) VALUE str, src, repl; { - return str_tr_s_bang(str_dup(str), src, repl); + VALUE val = tr_trans(str = rb_str_dup(str), src, repl, 1); + + if (NIL_P(val)) return str; + return val; } static VALUE -str_split_method(argc, argv, str) +rb_str_split_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { - extern VALUE FS; VALUE spat; VALUE limit; - char char_sep = 0; - int beg, end, lim, i; + int char_sep = -1; + int beg, end, i; + int lim = 0; VALUE result, tmp; - rb_scan_args(argc, argv, "02", &spat, &limit); - if (!NIL_P(limit)) { + if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { lim = NUM2INT(limit); - if (lim == 0) limit = Qnil; - else if (lim == 1) return ary_new3(1, str); + if (lim <= 0) limit = Qnil; + else if (lim == 1) return rb_ary_new3(1, str); i = 1; } - if (NIL_P(spat)) { - if (!NIL_P(FS)) { - spat = FS; + if (argc == 0) { + if (!NIL_P(rb_fs)) { + spat = rb_fs; goto fs_set; } char_sep = ' '; @@ -1793,33 +1972,33 @@ str_split_method(argc, argv, str) switch (TYPE(spat)) { case T_STRING: fs_set: - if (STRLEN(spat) == 1) { - char_sep = RSTRING(spat)->ptr[0]; + if (RSTRING(spat)->len == 1) { + char_sep = (unsigned char)RSTRING(spat)->ptr[0]; } else { - spat = reg_regcomp(spat); + spat = rb_reg_regcomp(spat); } break; case T_REGEXP: break; default: - ArgError("split(): bad separator"); + rb_raise(rb_eArgError, "bad separator"); } } - result = ary_new(); + result = rb_ary_new(); beg = 0; - if (char_sep != 0) { - UCHAR *ptr = RSTRING(str)->ptr; - int len = RSTRING(str)->len; - UCHAR *eptr = ptr + len; + if (char_sep >= 0) { + char *ptr = RSTRING(str)->ptr; + size_t len = RSTRING(str)->len; + char *eptr = ptr + len; if (char_sep == ' ') { /* AWK emulation */ int skip = 1; for (end = beg = 0; ptr<eptr; ptr++) { if (skip) { - if (isspace(*ptr)) { + if (ISSPACE(*ptr)) { beg++; } else { @@ -1828,8 +2007,8 @@ str_split_method(argc, argv, str) } } else { - if (isspace(*ptr)) { - ary_push(result, str_substr(str, beg, end-beg)); + if (ISSPACE(*ptr)) { + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); skip = 1; beg = end + 1; if (!NIL_P(limit) && lim <= ++i) break; @@ -1842,8 +2021,8 @@ str_split_method(argc, argv, str) } else { for (end = beg = 0; ptr<eptr; ptr++) { - if (*ptr == char_sep) { - ary_push(result, str_substr(str, beg, end-beg)); + if (*ptr == (char)char_sep) { + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); beg = end + 1; if (!NIL_P(limit) && lim <= ++i) break; } @@ -1852,29 +2031,26 @@ str_split_method(argc, argv, str) } } else { - int start = beg; + size_t start = beg; int last_null = 0; - int idx; + size_t idx; struct re_registers *regs; - while ((end = reg_search(spat, str, start, 0)) >= 0) { - regs = RMATCH(backref_get())->regs; + while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { + regs = RMATCH(rb_backref_get())->regs; if (start == end && BEG(0) == END(0)) { if (last_null == 1) { - if (ismbchar(RSTRING(str)->ptr[beg])) - ary_push(result, str_substr(str, beg, 2)); - else - ary_push(result, str_substr(str, beg, 1)); + rb_ary_push(result, rb_str_substr(str, beg, mbclen(RSTRING(str)->ptr[beg]))); beg = start; } else { - start += ismbchar(RSTRING(str)->ptr[start])?2:1; + start += mbclen(RSTRING(str)->ptr[start]); last_null = 1; continue; } } else { - ary_push(result, str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); beg = start = END(0); } last_null = 0; @@ -1882,67 +2058,69 @@ str_split_method(argc, argv, str) for (idx=1; idx < regs->num_regs; idx++) { if (BEG(idx) == -1) continue; if (BEG(idx) == END(idx)) - tmp = str_new(0, 0); + tmp = rb_str_new(0, 0); else - tmp = str_subseq(str, BEG(idx), END(idx)-1); - ary_push(result, tmp); + tmp = rb_str_subseq(str, BEG(idx), END(idx)-1); + rb_ary_push(result, tmp); } if (!NIL_P(limit) && lim <= ++i) break; } } - if (RSTRING(str)->len > beg) { - ary_push(result, str_subseq(str, beg, -1)); + if (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0) { + rb_ary_push(result, rb_str_subseq(str, beg, -1)); + } + if (NIL_P(limit) && lim == 0) { + while (RARRAY(result)->len > 0 && + RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0) + rb_ary_pop(result); } return result; } VALUE -str_split(str, sep0) +rb_str_split(str, sep0) VALUE str; char *sep0; { VALUE sep; - Check_Type(str, T_STRING); - sep = str_new2(sep0); - return str_split_method(1, &sep, str); + if (TYPE(str) != T_STRING) str = rb_str_to_str(str); + sep = rb_str_new2(sep0); + return rb_str_split_method(1, &sep, str); } static VALUE -f_split(argc, argv) +rb_f_split(argc, argv) int argc; VALUE *argv; { - return str_split_method(argc, argv, uscore_get()); + return rb_str_split_method(argc, argv, uscore_get()); } static VALUE -str_each_line(argc, argv, str) +rb_str_each_line(argc, argv, str) int argc; VALUE *argv; VALUE str; { - extern VALUE RS; VALUE rs; int newline; int rslen; - UCHAR *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s; - UCHAR *ptr = p; - int len = RSTRING(str)->len; + char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s; + char *ptr = p; + size_t len = RSTRING(str)->len; VALUE line; - if (rb_scan_args(argc, argv, "01", &rs) == 1) { - if (!NIL_P(rs)) Check_Type(rs, T_STRING); - } - else { - rs = RS; + if (rb_scan_args(argc, argv, "01", &rs) == 0) { + rs = rb_rs; } if (NIL_P(rs)) { rb_yield(str); return Qnil; } + if (TYPE(rs) != T_STRING) rs = rb_str_to_str(rs); rslen = RSTRING(rs)->len; if (rslen == 0) { @@ -1954,25 +2132,22 @@ str_each_line(argc, argv, str) for (s = p, p += rslen; p < pend; p++) { if (rslen == 0 && *p == '\n') { - if (*(p+1) != '\n') continue; + if (p[1] != '\n') continue; while (*p == '\n') p++; - p--; } - if (*p == newline && + if (p[-1] == newline && (rslen <= 1 || - memcmp(RSTRING(rs)->ptr, p-rslen+1, rslen) == 0)) { - line = str_new(s, p - s + 1); - lastline_set(line); + memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) { + line = rb_str_new(s, p - s); rb_yield(line); if (RSTRING(str)->ptr != ptr || RSTRING(str)->len != len) - Fail("string modified"); - s = p + 1; + rb_raise(rb_eArgError, "string modified"); + s = p; } } if (s != pend) { - line = str_new(s, p - s); - lastline_set(line); + line = rb_str_new(s, p - s); rb_yield(line); } @@ -1980,10 +2155,10 @@ str_each_line(argc, argv, str) } static VALUE -str_each_byte(str) +rb_str_each_byte(str) struct RString* str; { - int i; + size_t i; for (i=0; i<RSTRING(str)->len; i++) { rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff)); @@ -1992,12 +2167,11 @@ str_each_byte(str) } static VALUE -str_chop_bang(str) +rb_str_chop_bang(str) VALUE str; { - str_modify(str); - if (RSTRING(str)->len > 0) { + rb_str_modify(str); RSTRING(str)->len--; if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') { if (RSTRING(str)->len > 0 && @@ -2006,90 +2180,132 @@ str_chop_bang(str) } } RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; } - - return str; + return Qnil; } static VALUE -str_chop(str) +rb_str_chop(str) VALUE str; { - return str_chop_bang(str_dup(str)); + VALUE val = rb_str_chop_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -f_chop_bang(str) +rb_f_chop_bang(str) VALUE str; { - return str_chop_bang(uscore_get()); + return rb_str_chop_bang(uscore_get()); } static VALUE -f_chop() +rb_f_chop() { - return str_chop_bang(str_dup(uscore_get())); + VALUE str = rb_str_dup(uscore_get()); + VALUE val = rb_str_chop_bang(str); + + if (NIL_P(str)) return str; + rb_lastline_set(val); + return val; } static VALUE -str_chomp_bang(str) +rb_str_chomp_bang(argc, argv, str) + int argc; + VALUE *argv; VALUE str; { - str_modify(str); + VALUE rs; + int newline; + int rslen; + char *p = RSTRING(str)->ptr; + size_t len = RSTRING(str)->len; - if (RSTRING(str)->len > 0 && - RSTRING(str)->ptr[RSTRING(str)->len-1] == '\n') { - RSTRING(str)->len--; - if (RSTRING(str)->len > 0 && - RSTRING(str)->ptr[RSTRING(str)->len] == '\r') { - RSTRING(str)->len--; + if (rb_scan_args(argc, argv, "01", &rs) == 0) { + rs = rb_rs; + } + if (NIL_P(rs)) return Qnil; + + if (TYPE(rs) != T_STRING) rs = rb_str_to_str(rs); + rslen = RSTRING(rs)->len; + if (rslen == 0) { + while (len>0 && p[len-1] == '\n') { + len--; + } + if (len < RSTRING(str)->len) { + RSTRING(str)->len = len; + RSTRING(str)->ptr[len] = '\0'; + return str; } + return Qnil; + } + if (rslen > len) return Qnil; + newline = RSTRING(rs)->ptr[rslen-1]; + + if (p[len-1] == newline && + (rslen <= 1 || + memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) { + RSTRING(str)->len -= rslen; RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; } - return str; + return Qnil; } static VALUE -str_chomp(str) +rb_str_chomp(argc, argv, str) + int argc; + VALUE *argv; VALUE str; { - return str_chomp_bang(str_dup(str)); + VALUE val = rb_str_chomp_bang(argc, argv, str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE -f_chomp_bang(str) - VALUE str; +rb_f_chomp_bang(argc, argv) + int argc; + VALUE *argv; { - return str_chomp_bang(uscore_get()); + return rb_str_chomp_bang(argc, argv, uscore_get()); } static VALUE -f_chomp() +rb_f_chomp(argc, argv) + int argc; + VALUE *argv; { - return str_chomp_bang(str_dup(uscore_get())); + VALUE str = rb_str_dup(uscore_get()); + VALUE val = rb_str_chomp_bang(argc, argv, str); + return val; } static VALUE -str_strip_bang(str) +rb_str_strip_bang(str) VALUE str; { - UCHAR *s, *t, *e; - - str_modify(str); + char *s, *t, *e; + rb_str_modify(str); s = RSTRING(str)->ptr; e = t = s + RSTRING(str)->len; /* remove spaces at head */ - while (s < t && isspace(*s)) s++; + while (s < t && ISSPACE(*s)) s++; /* remove trailing spaces */ t--; - while (s <= t && isspace(*t)) t--; + while (s <= t && ISSPACE(*t)) t--; t++; RSTRING(str)->len = t-s; if (s > RSTRING(str)->ptr) { - UCHAR *p = RSTRING(str)->ptr; + char *p = RSTRING(str)->ptr; RSTRING(str)->ptr = ALLOC_N(char, RSTRING(str)->len+1); memcpy(RSTRING(str)->ptr, s, RSTRING(str)->len); @@ -2099,38 +2315,50 @@ str_strip_bang(str) else if (t < e) { RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; } + else { + return Qnil; + } - return (VALUE)str; + return str; +} + +static VALUE +rb_str_strip(str) + VALUE str; +{ + VALUE val = rb_str_strip_bang(str = rb_str_dup(str)); + + if (NIL_P(val)) return str; + return val; } static VALUE scan_once(str, pat, start) VALUE str, pat; - int *start; + size_t *start; { - VALUE result; + VALUE result, match; struct re_registers *regs; - int i; + size_t i; - if (reg_search(pat, str, *start, 0) >= 0) { - regs = RMATCH(backref_get())->regs; - if (END(0) == *start) { - *start = END(0)+1; + if (rb_reg_search(pat, str, *start, 0) >= 0) { + match = rb_backref_get(); + regs = RMATCH(match)->regs; + if (BEG(0) == END(0)) { + /* + * Always consume at least one character of the input string + */ + *start = END(0)+mbclen(RSTRING(str)->ptr[END(0)]); } else { *start = END(0); } if (regs->num_regs == 1) { - return str_substr(str, BEG(0), END(0)-BEG(0)); + return rb_reg_nth_match(0, match); } - result = ary_new2(regs->num_regs); + result = rb_ary_new2(regs->num_regs); for (i=1; i < regs->num_regs; i++) { - if (BEG(i) == -1) { - ary_push(result, Qnil); - } - else { - ary_push(result, str_substr(str, BEG(i), END(i)-BEG(i))); - } + rb_ary_push(result, rb_reg_nth_match(i, match)); } return result; @@ -2139,27 +2367,18 @@ scan_once(str, pat, start) } static VALUE -str_scan(str, pat) +rb_str_scan(str, pat) VALUE str, pat; { VALUE result; - int start = 0; - - switch (TYPE(pat)) { - case T_STRING: - pat = reg_regcomp(pat); - break; - case T_REGEXP: - break; - default: - Check_Type(pat, T_REGEXP); - } + size_t start = 0; - if (!iterator_p()) { - VALUE ary = ary_new(); + pat = get_pat(pat); + if (!rb_iterator_p()) { + VALUE ary = rb_ary_new(); while (!NIL_P(result = scan_once(str, pat, &start))) { - ary_push(ary, result); + rb_ary_push(ary, result); } return ary; } @@ -2171,65 +2390,66 @@ str_scan(str, pat) } static VALUE -str_strip(str) +rb_str_hex(str) VALUE str; { - return str_strip_bang(str_dup(str)); + return rb_str2inum(RSTRING(str)->ptr, 16); } static VALUE -str_hex(str) +rb_str_oct(str) VALUE str; { - return str2inum(RSTRING(str)->ptr, 16); -} + int base = 8; -static VALUE -str_oct(str) - VALUE str; -{ - return str2inum(RSTRING(str)->ptr, 8); + if (RSTRING(str)->len > 2 && RSTRING(str)->ptr[0] == '0' && + (RSTRING(str)->ptr[1] == 'x' || RSTRING(str)->ptr[1] == 'X')) { + base = 16; + } + return rb_str2inum(RSTRING(str)->ptr, base); } static VALUE -str_crypt(str, salt) +rb_str_crypt(str, salt) VALUE str, salt; { extern char *crypt(); - salt = obj_as_string(salt); + + if (TYPE(salt) != T_STRING) salt = rb_str_to_str(salt); if (RSTRING(salt)->len < 2) - ArgError("salt too short(need >2 bytes)"); - return str_new2(crypt(RSTRING(str)->ptr, RSTRING(salt)->ptr)); + rb_raise(rb_eArgError, "salt too short(need >2 bytes)"); + return rb_str_new2(crypt(RSTRING(str)->ptr, RSTRING(salt)->ptr)); } static VALUE -str_intern(str) +rb_str_intern(str) VALUE str; { ID id; if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len) - ArgError("string contains `\0'"); + rb_raise(rb_eArgError, "string contains `\\0'"); id = rb_intern(RSTRING(str)->ptr); return INT2FIX(id); } static VALUE -str_sum(argc, argv, str) +rb_str_sum(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE vbits; int bits; - UCHAR *p, *pend; + char *p, *pend; - rb_scan_args(argc, argv, "01", &vbits); - if (NIL_P(vbits)) bits = 16; + if (rb_scan_args(argc, argv, "01", &vbits) == 0) { + bits = 16; + } else bits = NUM2INT(vbits); p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; - if (bits > 32) { + if (bits > sizeof(long)*CHAR_BIT) { VALUE res = INT2FIX(0); VALUE mod; @@ -2237,36 +2457,39 @@ str_sum(argc, argv, str) mod = rb_funcall(mod, '-', 1, INT2FIX(1)); while (p < pend) { - res = rb_funcall(res, '+', 1, INT2FIX((UINT)*p)); - res = rb_funcall(res, '%', 1, mod); + res = rb_funcall(res, '+', 1, INT2FIX((unsigned int)*p)); p++; } + res = rb_funcall(res, '&', 1, mod); return res; } else { - UINT res = 0; - UINT mod = (1<<bits)-1; + unsigned int res = 0; + unsigned int mod = (1<<bits)-1; + if (mod == 0) { + mod = -1; + } while (p < pend) { - res += (UINT)*p; - res %= mod; + res += (unsigned int)*p; p++; } - return int2inum(res); + res &= mod; + return rb_int2inum(res); } } static VALUE -str_ljust(str, w) +rb_str_ljust(str, w) VALUE str; VALUE w; { - int width = NUM2INT(w); + size_t width = NUM2UINT(w); VALUE res; - UCHAR *p, *pend; + char *p, *pend; - if (RSTRING(str)->len >= width) return (VALUE)str; - res = str_new(0, width); + if (width < 0 || RSTRING(str)->len >= width) return str; + res = rb_str_new(0, width); memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len); p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width; while (p < pend) { @@ -2276,16 +2499,16 @@ str_ljust(str, w) } static VALUE -str_rjust(str, w) +rb_str_rjust(str, w) VALUE str; VALUE w; { - int width = NUM2INT(w); + size_t width = NUM2UINT(w); VALUE res; - UCHAR *p, *pend; + char *p, *pend; - if (RSTRING(str)->len >= width) return (VALUE)str; - res = str_new(0, width); + if (width < 0 || RSTRING(str)->len >= width) return str; + res = rb_str_new(0, width); p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len; while (p < pend) { *p++ = ' '; @@ -2295,17 +2518,17 @@ str_rjust(str, w) } static VALUE -str_center(str, w) +rb_str_center(str, w) VALUE str; VALUE w; { - int width = NUM2INT(w); + size_t width = NUM2UINT(w); VALUE res; - UCHAR *p, *pend; - int n; + char *p, *pend; + size_t n; - if (RSTRING(str)->len >= width) return (VALUE)str; - res = str_new(0, width); + if (width < 0 || RSTRING(str)->len >= width) return str; + res = rb_str_new(0, width); n = (width - RSTRING(str)->len)/2; p = RSTRING(res)->ptr; pend = p + n; while (p < pend) { @@ -2319,122 +2542,118 @@ str_center(str, w) return res; } -extern VALUE mKernel; -extern VALUE mComparable; -extern VALUE mEnumerable; -extern VALUE eGlobalExit; - void Init_String() { - cString = rb_define_class("String", cObject); - rb_include_module(cString, mComparable); - rb_include_module(cString, mEnumerable); - rb_define_singleton_method(cString, "new", str_s_new, 1); - rb_define_method(cString, "clone", str_clone, 0); - rb_define_method(cString, "dup", str_dup, 0); - rb_define_method(cString, "<=>", str_cmp_method, 1); - rb_define_method(cString, "==", str_equal, 1); - rb_define_method(cString, "===", str_equal, 1); - rb_define_method(cString, "eql?", str_equal, 1); - rb_define_method(cString, "hash", str_hash_method, 0); - rb_define_method(cString, "+", str_plus, 1); - rb_define_method(cString, "*", str_times, 1); - rb_define_method(cString, "%", str_format, 1); - rb_define_method(cString, "[]", str_aref_method, -1); - rb_define_method(cString, "[]=", str_aset_method, -1); - rb_define_method(cString, "length", str_length, 0); - rb_define_alias(cString, "size", "length"); - rb_define_method(cString, "=~", str_match, 1); - rb_define_method(cString, "~", str_match2, 0); - rb_define_method(cString, "succ", str_succ, 0); - rb_define_method(cString, "upto", str_upto, 1); - rb_define_method(cString, "index", str_index_method, -1); - rb_define_method(cString, "rindex", str_rindex, -1); - - rb_define_method(cString, "freeze", str_freeze, 0); - rb_define_method(cString, "frozen?", str_frozen_p, 0); - - rb_define_method(cString, "taint", str_taint, 0); - rb_define_method(cString, "tainted?", str_tainted, 0); - - rb_define_method(cString, "to_i", str_to_i, 0); - rb_define_method(cString, "to_f", str_to_f, 0); - rb_define_method(cString, "to_s", str_to_s, 0); - rb_define_method(cString, "inspect", str_inspect, 0); - - rb_define_method(cString, "upcase", str_upcase, 0); - rb_define_method(cString, "downcase", str_downcase, 0); - rb_define_method(cString, "capitalize", str_capitalize, 0); - rb_define_method(cString, "swapcase", str_swapcase, 0); - - rb_define_method(cString, "upcase!", str_upcase_bang, 0); - rb_define_method(cString, "downcase!", str_downcase_bang, 0); - rb_define_method(cString, "capitalize!", str_capitalize_bang, 0); - rb_define_method(cString, "swapcase!", str_swapcase_bang, 0); - - rb_define_method(cString, "hex", str_hex, 0); - rb_define_method(cString, "oct", str_oct, 0); - rb_define_method(cString, "split", str_split_method, -1); - rb_define_method(cString, "reverse", str_reverse, 0); - rb_define_method(cString, "reverse!", str_reverse_bang, 0); - rb_define_method(cString, "concat", str_concat, 1); - rb_define_method(cString, "<<", str_concat, 1); - rb_define_method(cString, "crypt", str_crypt, 1); - rb_define_method(cString, "intern", str_intern, 0); - - rb_define_method(cString, "include?", str_include, 1); - - rb_define_method(cString, "scan", str_scan, 1); - - rb_define_method(cString, "ljust", str_ljust, 1); - rb_define_method(cString, "rjust", str_rjust, 1); - rb_define_method(cString, "center", str_center, 1); - - rb_define_method(cString, "sub", str_sub, -1); - rb_define_method(cString, "gsub", str_gsub, -1); - rb_define_method(cString, "chop", str_chop, 0); - rb_define_method(cString, "chomp", str_chomp, 0); - rb_define_method(cString, "strip", str_strip, 0); - - rb_define_method(cString, "sub!", str_sub_bang, -1); - rb_define_method(cString, "gsub!", str_gsub_bang, -1); - rb_define_method(cString, "strip!", str_strip_bang, 0); - rb_define_method(cString, "chop!", str_chop_bang, 0); - rb_define_method(cString, "chomp!", str_chomp_bang, 0); - - rb_define_method(cString, "tr", str_tr, 2); - rb_define_method(cString, "tr_s", str_tr_s, 2); - rb_define_method(cString, "delete", str_delete, 1); - rb_define_method(cString, "squeeze", str_squeeze, -1); - - rb_define_method(cString, "tr!", str_tr_bang, 2); - rb_define_method(cString, "tr_s!", str_tr_s_bang, 2); - rb_define_method(cString, "delete!", str_delete_bang, 1); - rb_define_method(cString, "squeeze!", str_squeeze_bang, -1); - - rb_define_method(cString, "each_line", str_each_line, -1); - rb_define_method(cString, "each", str_each_line, -1); - rb_define_method(cString, "each_byte", str_each_byte, 0); - - rb_define_method(cString, "sum", str_sum, -1); - - rb_define_global_function("sub", f_sub, -1); - rb_define_global_function("gsub", f_gsub, -1); - - rb_define_global_function("sub!", f_sub_bang, -1); - rb_define_global_function("gsub!", f_gsub_bang, -1); - - rb_define_global_function("chop", f_chop, 0); - rb_define_global_function("chop!", f_chop_bang, 0); - - rb_define_global_function("chomp", f_chomp, 0); - rb_define_global_function("chomp!", f_chomp_bang, 0); - - rb_define_global_function("split", f_split, -1); - - pr_str = rb_intern("to_s"); - - /* Fix-up initialize ordering */ - RCLASS(eGlobalExit)->super = cString; + rb_cString = rb_define_class("String", rb_cObject); + rb_include_module(rb_cString, rb_mComparable); + rb_include_module(rb_cString, rb_mEnumerable); + rb_define_singleton_method(rb_cString, "new", rb_str_s_new, 1); + rb_define_method(rb_cString, "clone", rb_str_clone, 0); + rb_define_method(rb_cString, "dup", rb_str_dup, 0); + rb_define_method(rb_cString, "<=>", rb_str_cmp_method, 1); + rb_define_method(rb_cString, "==", rb_str_equal, 1); + rb_define_method(rb_cString, "===", rb_str_equal, 1); + rb_define_method(rb_cString, "eql?", rb_str_equal, 1); + rb_define_method(rb_cString, "hash", rb_str_hash_method, 0); + rb_define_method(rb_cString, "+", rb_str_plus, 1); + rb_define_method(rb_cString, "*", rb_str_times, 1); + rb_define_method(rb_cString, "%", rb_str_format, 1); + rb_define_method(rb_cString, "[]", rb_str_aref_method, -1); + rb_define_method(rb_cString, "[]=", rb_str_aset_method, -1); + rb_define_method(rb_cString, "length", rb_str_length, 0); + rb_define_alias(rb_cString, "size", "length"); + rb_define_method(rb_cString, "empty?", rb_str_empty, 0); + rb_define_method(rb_cString, "=~", rb_str_match, 1); + rb_define_method(rb_cString, "~", rb_str_match2, 0); + rb_define_method(rb_cString, "succ", rb_str_succ, 0); + rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "next", rb_str_succ, 0); + rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "upto", rb_str_upto, 1); + rb_define_method(rb_cString, "index", rb_str_index_method, -1); + rb_define_method(rb_cString, "rindex", rb_str_rindex, -1); + rb_define_method(rb_cString, "replace", rb_str_replace_method, 1); + + rb_define_method(rb_cString, "freeze", rb_str_freeze, 0); + rb_define_method(rb_cString, "frozen?", rb_str_frozen_p, 0); + + rb_define_method(rb_cString, "to_i", rb_str_to_i, 0); + rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); + rb_define_method(rb_cString, "to_s", rb_str_to_s, 0); + rb_define_method(rb_cString, "to_str", rb_str_to_s, 0); + rb_define_method(rb_cString, "inspect", rb_str_inspect, 0); + rb_define_method(rb_cString, "dump", rb_str_dump, 0); + + rb_define_method(rb_cString, "upcase", rb_str_upcase, 0); + rb_define_method(rb_cString, "downcase", rb_str_downcase, 0); + rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0); + rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0); + + rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0); + rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0); + rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0); + rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0); + + rb_define_method(rb_cString, "hex", rb_str_hex, 0); + rb_define_method(rb_cString, "oct", rb_str_oct, 0); + rb_define_method(rb_cString, "split", rb_str_split_method, -1); + rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); + rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); + rb_define_method(rb_cString, "concat", rb_str_concat, 1); + rb_define_method(rb_cString, "<<", rb_str_concat, 1); + rb_define_method(rb_cString, "crypt", rb_str_crypt, 1); + rb_define_method(rb_cString, "intern", rb_str_intern, 0); + + rb_define_method(rb_cString, "include?", rb_str_include, 1); + + rb_define_method(rb_cString, "scan", rb_str_scan, 1); + + rb_define_method(rb_cString, "ljust", rb_str_ljust, 1); + rb_define_method(rb_cString, "rjust", rb_str_rjust, 1); + rb_define_method(rb_cString, "center", rb_str_center, 1); + + rb_define_method(rb_cString, "sub", rb_str_sub, -1); + rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); + rb_define_method(rb_cString, "chop", rb_str_chop, 0); + rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); + rb_define_method(rb_cString, "strip", rb_str_strip, 0); + + rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1); + rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); + rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); + rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); + + rb_define_method(rb_cString, "tr", rb_str_tr, 2); + rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2); + rb_define_method(rb_cString, "delete", rb_str_delete, 1); + rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1); + + rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2); + rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2); + rb_define_method(rb_cString, "delete!", rb_str_delete_bang, 1); + rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1); + + rb_define_method(rb_cString, "each_line", rb_str_each_line, -1); + rb_define_method(rb_cString, "each", rb_str_each_line, -1); + rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0); + + rb_define_method(rb_cString, "sum", rb_str_sum, -1); + + rb_define_global_function("sub", rb_f_sub, -1); + rb_define_global_function("gsub", rb_f_gsub, -1); + + rb_define_global_function("sub!", rb_f_sub_bang, -1); + rb_define_global_function("gsub!", rb_f_gsub_bang, -1); + + rb_define_global_function("chop", rb_f_chop, 0); + rb_define_global_function("chop!", rb_f_chop_bang, 0); + + rb_define_global_function("chomp", rb_f_chomp, -1); + rb_define_global_function("chomp!", rb_f_chomp_bang, -1); + + rb_define_global_function("split", rb_f_split, -1); + + to_str = rb_intern("to_s"); } |