Skip to content

Commit 62ede28

Browse files
committed
string.c: reduce memory copy
* string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by copying necessary part only. * string.c (rb_str_strip_bang, rb_str_strip): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48277 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 8615857 commit 62ede28

File tree

2 files changed

+115
-50
lines changed

2 files changed

+115
-50
lines changed

ChangeLog

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
Wed Nov 5 12:13:54 2014 Nobuyoshi Nakada <[email protected]>
2+
3+
* string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by
4+
copying necessary part only.
5+
6+
* string.c (rb_str_strip_bang, rb_str_strip): ditto.
7+
8+
Wed Nov 5 12:13:48 2014 Nobuyoshi Nakada <[email protected]>
9+
10+
* string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by
11+
copying necessary part only.
12+
13+
* string.c (rb_str_strip_bang, rb_str_strip): ditto.
14+
115
Wed Nov 5 10:54:19 2014 Nobuyoshi Nakada <[email protected]>
216

317
* string.c (rb_str_lstrip_bang, rb_str_rstrip_bang): terminate

string.c

Lines changed: 101 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,6 +7182,23 @@ rb_str_chomp(int argc, VALUE *argv, VALUE str)
71827182
return str;
71837183
}
71847184

7185+
static long
7186+
lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
7187+
{
7188+
const char *const start = s;
7189+
7190+
if (!s || s >= e) return 0;
7191+
/* remove spaces at head */
7192+
while (s < e) {
7193+
int n;
7194+
unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
7195+
7196+
if (!rb_isspace(cc)) break;
7197+
s += n;
7198+
}
7199+
return s - start;
7200+
}
7201+
71857202
/*
71867203
* call-seq:
71877204
* str.lstrip! -> self or nil
@@ -7198,24 +7215,16 @@ static VALUE
71987215
rb_str_lstrip_bang(VALUE str)
71997216
{
72007217
rb_encoding *enc;
7201-
char *start, *s, *t, *e;
7218+
char *start, *s;
7219+
long olen, loffset;
72027220

72037221
str_modify_keep_cr(str);
72047222
enc = STR_ENC_GET(str);
7205-
start = s = RSTRING_PTR(str);
7206-
if (!s || RSTRING_LEN(str) == 0) return Qnil;
7207-
e = t = RSTRING_END(str);
7208-
/* remove spaces at head */
7209-
while (s < e) {
7210-
int n;
7211-
unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
7212-
7213-
if (!rb_isspace(cc)) break;
7214-
s += n;
7215-
}
7216-
7217-
if (s > RSTRING_PTR(str)) {
7218-
long len = t - s;
7223+
RSTRING_GETMEM(str, start, olen);
7224+
loffset = lstrip_offset(str, start, start+olen, enc);
7225+
if (loffset > 0) {
7226+
long len = olen-loffset;
7227+
s = start + loffset;
72197228
memmove(start, s, len);
72207229
STR_SET_LEN(str, len);
72217230
TERM_FILL(start+len, rb_enc_mbminlen(enc));
@@ -7239,11 +7248,39 @@ rb_str_lstrip_bang(VALUE str)
72397248
static VALUE
72407249
rb_str_lstrip(VALUE str)
72417250
{
7242-
str = rb_str_dup(str);
7243-
rb_str_lstrip_bang(str);
7244-
return str;
7251+
char *start;
7252+
long len, loffset;
7253+
RSTRING_GETMEM(str, start, len);
7254+
loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
7255+
if (loffset <= 0) return rb_str_dup(str);
7256+
return rb_str_subseq(str, loffset, len - loffset);
72457257
}
72467258

7259+
static long
7260+
rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
7261+
{
7262+
const char *t;
7263+
7264+
rb_str_check_dummy_enc(enc);
7265+
if (!s || s >= e) return 0;
7266+
t = e;
7267+
7268+
/* remove trailing spaces or '\0's */
7269+
if (single_byte_optimizable(str)) {
7270+
unsigned char c;
7271+
while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
7272+
}
7273+
else {
7274+
char *tp;
7275+
7276+
while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
7277+
unsigned int c = rb_enc_codepoint(tp, e, enc);
7278+
if (c && !rb_isspace(c)) break;
7279+
t = tp;
7280+
}
7281+
}
7282+
return e - t;
7283+
}
72477284

72487285
/*
72497286
* call-seq:
@@ -7261,31 +7298,15 @@ static VALUE
72617298
rb_str_rstrip_bang(VALUE str)
72627299
{
72637300
rb_encoding *enc;
7264-
char *start, *s, *t, *e;
7301+
char *start;
7302+
long olen, roffset;
72657303

72667304
str_modify_keep_cr(str);
72677305
enc = STR_ENC_GET(str);
7268-
rb_str_check_dummy_enc(enc);
7269-
start = s = RSTRING_PTR(str);
7270-
if (!s || RSTRING_LEN(str) == 0) return Qnil;
7271-
t = e = RSTRING_END(str);
7272-
7273-
/* remove trailing spaces or '\0's */
7274-
if (single_byte_optimizable(str)) {
7275-
unsigned char c;
7276-
while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
7277-
}
7278-
else {
7279-
char *tp;
7280-
7281-
while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
7282-
unsigned int c = rb_enc_codepoint(tp, e, enc);
7283-
if (c && !rb_isspace(c)) break;
7284-
t = tp;
7285-
}
7286-
}
7287-
if (t < e) {
7288-
long len = t-start;
7306+
RSTRING_GETMEM(str, start, olen);
7307+
roffset = rstrip_offset(str, start, start+olen, enc);
7308+
if (roffset > 0) {
7309+
long len = olen - roffset;
72897310

72907311
STR_SET_LEN(str, len);
72917312
TERM_FILL(start+len, rb_enc_mbminlen(enc));
@@ -7309,9 +7330,16 @@ rb_str_rstrip_bang(VALUE str)
73097330
static VALUE
73107331
rb_str_rstrip(VALUE str)
73117332
{
7312-
str = rb_str_dup(str);
7313-
rb_str_rstrip_bang(str);
7314-
return str;
7333+
rb_encoding *enc;
7334+
char *start;
7335+
long olen, roffset;
7336+
7337+
enc = STR_ENC_GET(str);
7338+
RSTRING_GETMEM(str, start, olen);
7339+
roffset = rstrip_offset(str, start, start+olen, enc);
7340+
7341+
if (roffset <= 0) return rb_str_dup(str);
7342+
return rb_str_subseq(str, 0, olen-roffset);
73157343
}
73167344

73177345

@@ -7326,11 +7354,27 @@ rb_str_rstrip(VALUE str)
73267354
static VALUE
73277355
rb_str_strip_bang(VALUE str)
73287356
{
7329-
VALUE l = rb_str_lstrip_bang(str);
7330-
VALUE r = rb_str_rstrip_bang(str);
7357+
char *start;
7358+
long olen, loffset, roffset;
7359+
rb_encoding *enc;
73317360

7332-
if (NIL_P(l) && NIL_P(r)) return Qnil;
7333-
return str;
7361+
str_modify_keep_cr(str);
7362+
enc = STR_ENC_GET(str);
7363+
RSTRING_GETMEM(str, start, olen);
7364+
loffset = lstrip_offset(str, start, start+olen, enc);
7365+
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
7366+
7367+
if (loffset > 0 || roffset > 0) {
7368+
long len = olen-roffset;
7369+
if (loffset > 0) {
7370+
len -= loffset;
7371+
memmove(start, start + loffset, len);
7372+
}
7373+
STR_SET_LEN(str, len);
7374+
TERM_FILL(start+len, rb_enc_mbminlen(enc));
7375+
return str;
7376+
}
7377+
return Qnil;
73347378
}
73357379

73367380

@@ -7347,9 +7391,16 @@ rb_str_strip_bang(VALUE str)
73477391
static VALUE
73487392
rb_str_strip(VALUE str)
73497393
{
7350-
str = rb_str_dup(str);
7351-
rb_str_strip_bang(str);
7352-
return str;
7394+
char *start;
7395+
long olen, loffset, roffset;
7396+
rb_encoding *enc = STR_ENC_GET(str);
7397+
7398+
RSTRING_GETMEM(str, start, olen);
7399+
loffset = lstrip_offset(str, start, start+olen, enc);
7400+
roffset = rstrip_offset(str, start+loffset, start+olen, enc);
7401+
7402+
if (loffset <= 0 && roffset <= 0) return rb_str_dup(str);
7403+
return rb_str_subseq(str, loffset, olen-loffset-roffset);
73537404
}
73547405

73557406
static VALUE

0 commit comments

Comments
 (0)