Skip to content

Commit f852af0

Browse files
committed
symbol.c: non-ASCII constant names
* symbol.c (rb_sym_constant_char_p): support for non-ASCII constant names. [Feature ruby#13770] * object.c (rb_mod_const_get, rb_mod_const_defined): support for non-ASCII constant names. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63130 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 44c01a5 commit f852af0

File tree

4 files changed

+80
-3
lines changed

4 files changed

+80
-3
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ with all sufficient information, see the ChangeLog file or Redmine
2020

2121
* `else` without `rescue` is now causes a syntax error. [EXPERIMENTAL]
2222

23+
* constant names may start with a non-ASCII capital letter. [Feature #13770]
24+
2325
=== Core classes updates (outstanding ones only)
2426

2527
* Array

object.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,7 +2480,7 @@ rb_mod_const_get(int argc, VALUE *argv, VALUE mod)
24802480
if (!id) {
24812481
part = rb_str_subseq(name, beglen, len);
24822482
OBJ_FREEZE(part);
2483-
if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
2483+
if (!rb_is_const_name(part)) {
24842484
name = part;
24852485
goto wrong_name;
24862486
}
@@ -2633,7 +2633,7 @@ rb_mod_const_defined(int argc, VALUE *argv, VALUE mod)
26332633
if (!id) {
26342634
part = rb_str_subseq(name, beglen, len);
26352635
OBJ_FREEZE(part);
2636-
if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
2636+
if (!rb_is_const_name(part)) {
26372637
name = part;
26382638
goto wrong_name;
26392639
}

symbol.c

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,42 @@ rb_enc_symname_p(const char *name, rb_encoding *enc)
199199
return rb_enc_symname2_p(name, strlen(name), enc);
200200
}
201201

202+
static int
203+
rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
204+
{
205+
int c, len;
206+
const char *end = name + nlen;
207+
208+
if (nlen < 1) return FALSE;
209+
if (ISASCII(*name)) return ISUPPER(*name);
210+
c = rb_enc_precise_mbclen(name, end, enc);
211+
if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
212+
len = MBCLEN_CHARFOUND_LEN(c);
213+
c = rb_enc_mbc_to_codepoint(name, end, enc);
214+
if (ONIGENC_IS_UNICODE(enc)) {
215+
static int ctype_titlecase = 0;
216+
if (rb_enc_isupper(c, enc)) return TRUE;
217+
if (rb_enc_islower(c, enc)) return FALSE;
218+
if (!ctype_titlecase) {
219+
static const UChar cname[] = "titlecaseletter";
220+
static const UChar *const end = cname + sizeof(cname) - 1;
221+
ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
222+
}
223+
if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
224+
}
225+
else {
226+
/* fallback to case-folding */
227+
OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
228+
const OnigUChar *beg = (const OnigUChar *)name;
229+
int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
230+
&beg, (const OnigUChar *)end,
231+
fold, enc);
232+
if (r > 0 && (r != len || memcmp(fold, name, r)))
233+
return TRUE;
234+
}
235+
return FALSE;
236+
}
237+
202238
#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
203239
#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
204240

@@ -279,7 +315,7 @@ rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int a
279315
break;
280316

281317
default:
282-
type = ISUPPER(*m) ? ID_CONST : ID_LOCAL;
318+
type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL;
283319
id:
284320
if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
285321
if (len > 1 && *(e-1) == '=') {

test/ruby/test_parse.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,45 @@ def test_command_def_cmdarg
11571157
end;
11581158
end
11591159

1160+
NONASCII_CONSTANTS = [
1161+
*%W"\u{00de} \u{00C0}".flat_map {|c| [c, c.encode("iso-8859-15")]},
1162+
"\u{1c4}", "\u{1f2}", "\u{1f88}", "\u{370}",
1163+
*%W"\u{391} \u{ff21}".flat_map {|c| [c, c.encode("cp932"), c.encode("euc-jp")]},
1164+
]
1165+
1166+
def assert_nonascii_const
1167+
assert_all_assertions_foreach("NONASCII_CONSTANTS", *NONASCII_CONSTANTS) do |n|
1168+
m = Module.new
1169+
assert_not_operator(m, :const_defined?, n)
1170+
assert_raise_with_message(NameError, /uninitialized/) do
1171+
m.const_get(n)
1172+
end
1173+
assert_nil(eval("defined?(m::#{n})"))
1174+
1175+
v = yield m, n
1176+
1177+
assert_operator(m, :const_defined?, n)
1178+
assert_equal("constant", eval("defined?(m::#{n})"))
1179+
assert_same(v, m.const_get(n))
1180+
1181+
m.__send__(:remove_const, n)
1182+
assert_not_operator(m, :const_defined?, n)
1183+
assert_nil(eval("defined?(m::#{n})"))
1184+
end
1185+
end
1186+
1187+
def test_nonascii_const_set
1188+
assert_nonascii_const do |m, n|
1189+
m.const_set(n, 42)
1190+
end
1191+
end
1192+
1193+
def test_nonascii_constant
1194+
assert_nonascii_const do |m, n|
1195+
m.module_eval("class #{n}; self; end")
1196+
end
1197+
end
1198+
11601199
=begin
11611200
def test_past_scope_variable
11621201
assert_warning(/past scope/) {catch {|tag| eval("BEGIN{throw tag}; tap {a = 1}; a")}}

0 commit comments

Comments
 (0)