From f7e4d73b2663046568c4df5c499f3a3a9fdf63db Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Fri, 13 Jul 2018 06:34:59 +0900 Subject: [PATCH 1/6] add transient_heap --- array.c | 300 ++++++++++++++++++++--- common.mk | 2 + encoding.c | 7 +- gc.c | 69 ++++-- include/ruby/encoding.h | 3 + inits.c | 1 + internal.h | 3 + spec/ruby/optional/capi/encoding_spec.rb | 35 +++ test/drb/drbtest.rb | 7 + test/lib/leakchecker.rb | 1 + 10 files changed, 377 insertions(+), 51 deletions(-) diff --git a/array.c b/array.c index 9061919bf81da8..486e73bcc58fe1 100644 --- a/array.c +++ b/array.c @@ -18,6 +18,10 @@ #include "probes.h" #include "id.h" #include "debug_counter.h" +#include "gc.h" +#include "transient_heap.h" + +// #define ARRAY_DEBUG #ifndef ARRAY_DEBUG # define NDEBUG @@ -53,6 +57,8 @@ VALUE rb_cArray; #define FL_SET_EMBED(a) do { \ assert(!ARY_SHARED_P(a)); \ FL_SET((a), RARRAY_EMBED_FLAG); \ + FL_UNSET_RAW((a), RARRAY_TRANSIENT_FLAG); \ + ary_verify(a); \ } while (0) #define FL_UNSET_EMBED(ary) FL_UNSET((ary), RARRAY_EMBED_FLAG|RARRAY_EMBED_LEN_MASK) #define FL_SET_SHARED(ary) do { \ @@ -130,11 +136,65 @@ VALUE rb_cArray; } while (0) #define FL_SET_SHARED_ROOT(ary) do { \ assert(!ARY_EMBED_P(ary)); \ + assert(!ARY_TRANSIENT_P(ary)); \ FL_SET((ary), RARRAY_SHARED_ROOT_FLAG); \ } while (0) #define ARY_SET(a, i, v) RARRAY_ASET((assert(!ARY_SHARED_P(a)), (a)), (i), (v)) + +#ifdef ARRAY_DEBUG +#define ary_verify(ary) ary_verify_(ary, __FILE__, __LINE__) + +static VALUE +ary_verify_(VALUE ary, const char *file, int line) +{ + assert(RB_TYPE_P(ary, T_ARRAY)); + + if (FL_TEST(ary, ELTS_SHARED)) { + VALUE root = RARRAY(ary)->as.heap.aux.shared; + const VALUE *ptr = RARRAY_CONST_PTR(ary); + const VALUE *root_ptr = RARRAY_CONST_PTR(root); + long len = RARRAY_LEN(ary), root_len = RARRAY_LEN(root); + assert(FL_TEST(root, RARRAY_SHARED_ROOT_FLAG)); + assert(root_ptr <= ptr && ptr + len <= root_ptr + root_len); + ary_verify(root); + } + else if (ARY_EMBED_P(ary)) { + assert(!ARY_TRANSIENT_P(ary)); + assert(!ARY_SHARED_P(ary)); + assert(RARRAY_LEN(ary) <= RARRAY_EMBED_LEN_MAX); + } + else { +#if 1 + const VALUE *ptr = RARRAY_CONST_PTR(ary); + long i, len = RARRAY_LEN(ary); + volatile VALUE v; + if (len > 1) len = 1; // check only HEAD + for (i=0; ias.heap.aux.capa; + + if (ARY_TRANSIENT_P(ary)) { + if (new_capa <= old_capa) { + /* do nothing */ + } + else { + VALUE *new_ptr = rb_transient_heap_alloc(ary, sizeof(VALUE) * new_capa); + + if (new_ptr == NULL) { + new_ptr = ALLOC_N(VALUE, new_capa); + FL_UNSET_RAW(ary, RARRAY_TRANSIENT_FLAG); + } + + MEMCPY(new_ptr, ARY_HEAP_PTR(ary), VALUE, old_capa); + ARY_SET_PTR(ary, new_ptr); + } + } + else { + SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, new_capa, old_capa); + } + ary_verify(ary); +} + +void +rb_ary_transient_heap_promote(VALUE ary, int promote) +{ + if (ARY_TRANSIENT_P(ary)) { + VALUE *new_ptr; + const VALUE *old_ptr = RARRAY_CONST_PTR(ary); + long capa = RARRAY(ary)->as.heap.aux.capa; + long len = RARRAY(ary)->as.heap.len; + if (ARY_SHARED_ROOT_P(ary)) { + capa = len; + } + + assert(ARY_OWNS_HEAP_P(ary)); + assert(ARY_TRANSIENT_P(ary)); + + if (promote) { + new_ptr = ALLOC_N(VALUE, capa); + FL_UNSET_RAW(ary, RARRAY_TRANSIENT_FLAG); + } + else { + new_ptr = ary_heap_alloc(ary, capa); + } + + MEMCPY(new_ptr, old_ptr, VALUE, capa); + /* do not use ARY_SET_PTR() because they assert !frozen */ + RARRAY(ary)->as.heap.ptr = new_ptr; + } + + ary_verify(ary); +} + static void ary_resize_capa(VALUE ary, long capacity) { assert(RARRAY_LEN(ary) <= capacity); assert(!OBJ_FROZEN(ary)); assert(!ARY_SHARED_P(ary)); + + // fprintf(stderr, "ary_resize_capa (%ld): %s\n", capacity, rb_obj_info(ary)); + if (capacity > RARRAY_EMBED_LEN_MAX) { if (ARY_EMBED_P(ary)) { long len = ARY_EMBED_LEN(ary); - VALUE *ptr = ALLOC_N(VALUE, (capacity)); + VALUE *ptr = ary_heap_alloc(ary, capacity); + MEMCPY(ptr, ARY_EMBED_PTR(ary), VALUE, len); FL_UNSET_EMBED(ary); ARY_SET_PTR(ary, ptr); ARY_SET_HEAP_LEN(ary, len); } else { - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, capacity, RARRAY(ary)->as.heap.aux.capa); + // fprintf(stderr, "ary_resize_capa %s\n", rb_obj_info(ary)); + ary_heap_realloc(ary, capacity); } - ARY_SET_CAPA(ary, (capacity)); + ARY_SET_CAPA(ary, capacity); + // fprintf(stderr, "-> ary_resize_capa: %s\n", rb_obj_info(ary)); + + // fprintf(stderr, "ary_resize_capa %p len:%ld capa:%ld - %s\n", (void *)ary, RARRAY_LEN(ary), capacity, rb_obj_info(ary)); } else { if (!ARY_EMBED_P(ary)) { long len = RARRAY_LEN(ary); + long old_capa = RARRAY(ary)->as.heap.aux.capa; const VALUE *ptr = RARRAY_CONST_PTR(ary); - if (len > capacity) len = capacity; MEMCPY((VALUE *)RARRAY(ary)->as.ary, ptr, VALUE, len); + ary_heap_free_ptr(ary, ptr, old_capa); + FL_SET_EMBED(ary); ARY_SET_LEN(ary, len); - ruby_sized_xfree((VALUE *)ptr, RARRAY(ary)->as.heap.aux.capa); + + // fprintf(stderr, "ary_resize_capa: heap->embed %p len:%ld\n", (void *)ary, len); } } + + ary_verify(ary); } static inline void @@ -242,8 +416,9 @@ ary_shrink_capa(VALUE ary) long old_capa = RARRAY(ary)->as.heap.aux.capa; assert(!ARY_SHARED_P(ary)); assert(old_capa >= capacity); - if (old_capa > capacity) - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, capacity, old_capa); + if (old_capa > capacity) ary_heap_realloc(ary, capacity); + + ary_verify(ary); } static void @@ -258,7 +433,10 @@ ary_double_capa(VALUE ary, long min) new_capa = (ARY_MAX_SIZE - min) / 2; } new_capa += min; + // fprintf(stderr, "ary_double_capa: %p %d\n", (void *)ary, FL_TEST(ary, RARRAY_TRANSIENT_FLAG) ? 1 : 0); ary_resize_capa(ary, new_capa); + + ary_verify(ary); } static void @@ -272,6 +450,7 @@ rb_ary_decrement_share(VALUE shared) } else if (num > 0) { ARY_SET_SHARED_NUM(shared, num); + // ary_verify(shared); } } } @@ -313,7 +492,9 @@ rb_ary_set_shared(VALUE ary, VALUE shared) static inline void rb_ary_modify_check(VALUE ary) { + rb_transient_heap_verify(); rb_check_frozen(ary); + ary_verify(ary); } void @@ -323,6 +504,9 @@ rb_ary_modify(VALUE ary) if (ARY_SHARED_P(ary)) { long shared_len, len = RARRAY_LEN(ary); VALUE shared = ARY_SHARED(ary); + + ary_verify(shared); + if (len <= RARRAY_EMBED_LEN_MAX) { const VALUE *ptr = ARY_HEAP_PTR(ary); FL_UNSET_SHARED(ary); @@ -343,7 +527,7 @@ rb_ary_modify(VALUE ary) rb_ary_decrement_share(shared); } else { - VALUE *ptr = ALLOC_N(VALUE, len); + VALUE *ptr = ary_heap_alloc(ary, len); MEMCPY(ptr, RARRAY_CONST_PTR(ary), VALUE, len); rb_ary_unshare(ary); ARY_SET_CAPA(ary, len); @@ -352,6 +536,7 @@ rb_ary_modify(VALUE ary) rb_gc_writebarrier_remember(ary); } + ary_verify(ary); } static VALUE @@ -370,7 +555,10 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (ARY_SHARED_OCCUPIED(shared)) { if (RARRAY_CONST_PTR(ary) - RARRAY_CONST_PTR(shared) + new_len <= RARRAY_LEN(shared)) { rb_ary_modify_check(ary); - return shared; + + ary_verify(ary); + ary_verify(shared); + return shared; } else { /* if array is shared, then it is likely it participate in push/shift pattern */ @@ -379,11 +567,13 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (new_len > capa - (capa >> 6)) { ary_double_capa(ary, new_len); } + ary_verify(ary); return ary; } } } - rb_ary_modify(ary); + ary_verify(ary); + rb_ary_modify(ary); } else { rb_ary_modify_check(ary); @@ -393,6 +583,7 @@ ary_ensure_room_for_push(VALUE ary, long add_len) ary_double_capa(ary, new_len); } + ary_verify(ary); return ary; } @@ -465,7 +656,7 @@ ary_new(VALUE klass, long capa) ary = ary_alloc(klass); if (capa > RARRAY_EMBED_LEN_MAX) { - ptr = ALLOC_N(VALUE, capa); + ptr = ary_heap_alloc(ary, capa); FL_UNSET_EMBED(ary); ARY_SET_PTR(ary, ptr); ARY_SET_CAPA(ary, capa); @@ -529,7 +720,9 @@ rb_ary_new_from_values(long n, const VALUE *elts) VALUE rb_ary_tmp_new(long capa) { - return ary_new(0, capa); + VALUE ary = ary_new(0, capa); + rb_ary_transient_heap_promote(ary, TRUE); + return ary; } VALUE @@ -546,7 +739,7 @@ rb_ary_free(VALUE ary) { if (ARY_OWNS_HEAP_P(ary)) { RB_DEBUG_COUNTER_INC(obj_ary_ptr); - ruby_sized_xfree((void *)ARY_HEAP_PTR(ary), ARY_HEAP_SIZE(ary)); + ary_heap_free(ary); } else { RB_DEBUG_COUNTER_INC(obj_ary_embed); @@ -569,13 +762,15 @@ ary_discard(VALUE ary) { rb_ary_free(ary); RBASIC(ary)->flags |= RARRAY_EMBED_FLAG; - RBASIC(ary)->flags &= ~RARRAY_EMBED_LEN_MASK; + RBASIC(ary)->flags &= ~(RARRAY_EMBED_LEN_MASK | RARRAY_TRANSIENT_FLAG); } static VALUE ary_make_shared(VALUE ary) { assert(!ARY_EMBED_P(ary)); + ary_verify(ary); + if (ARY_SHARED_P(ary)) { return ARY_SHARED(ary); } @@ -583,6 +778,7 @@ ary_make_shared(VALUE ary) return ary; } else if (OBJ_FROZEN(ary)) { + rb_ary_transient_heap_promote(ary, TRUE); ary_shrink_capa(ary); FL_SET_SHARED_ROOT(ary); ARY_SET_SHARED_NUM(ary, 1); @@ -590,18 +786,25 @@ ary_make_shared(VALUE ary) } else { long capa = ARY_CAPA(ary), len = RARRAY_LEN(ary); + const VALUE *ptr; NEWOBJ_OF(shared, struct RArray, 0, T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0)); - FL_UNSET_EMBED(shared); + rb_ary_transient_heap_promote(ary, TRUE); + ptr = ARY_HEAP_PTR(ary); + + FL_UNSET_EMBED(shared); ARY_SET_LEN((VALUE)shared, capa); - ARY_SET_PTR((VALUE)shared, RARRAY_CONST_PTR(ary)); - ary_mem_clear((VALUE)shared, len, capa - len); + ARY_SET_PTR((VALUE)shared, ptr); + ary_mem_clear((VALUE)shared, len, capa - len); FL_SET_SHARED_ROOT(shared); ARY_SET_SHARED_NUM((VALUE)shared, 1); FL_SET_SHARED(ary); ARY_SET_SHARED(ary, (VALUE)shared); OBJ_FREEZE(shared); - return (VALUE)shared; + + ary_verify((VALUE)shared); + ary_verify(ary); + return (VALUE)shared; } } @@ -736,7 +939,7 @@ rb_ary_initialize(int argc, VALUE *argv, VALUE ary) rb_ary_modify(ary); if (argc == 0) { if (ARY_OWNS_HEAP_P(ary) && RARRAY_CONST_PTR(ary) != 0) { - ruby_sized_xfree((void *)RARRAY_CONST_PTR(ary), ARY_HEAP_SIZE(ary)); + ary_heap_free(ary); } rb_ary_unshare_safe(ary); FL_SET_EMBED(ary); @@ -858,6 +1061,9 @@ ary_make_partial(VALUE ary, VALUE klass, long offset, long len) ARY_INCREASE_PTR(result, offset); ARY_SET_LEN(result, len); + + ary_verify(shared); + ary_verify(result); return result; } } @@ -916,12 +1122,13 @@ ary_take_first_or_last(int argc, const VALUE *argv, VALUE ary, enum ary_take_pos VALUE rb_ary_push(VALUE ary, VALUE item) { - long idx = RARRAY_LEN(ary); + long idx = RARRAY_LEN((ary_verify(ary), ary)); VALUE target_ary = ary_ensure_room_for_push(ary, 1); RARRAY_PTR_USE(ary, ptr, { RB_OBJ_WRITE(target_ary, &ptr[idx], item); }); ARY_SET_LEN(ary, idx + 1); + ary_verify(ary); return ary; } @@ -973,6 +1180,7 @@ rb_ary_pop(VALUE ary) } --n; ARY_SET_LEN(ary, n); + ary_verify(ary); return RARRAY_AREF(ary, n); } @@ -1006,6 +1214,7 @@ rb_ary_pop_m(int argc, VALUE *argv, VALUE ary) rb_ary_modify_check(ary); result = ary_take_first_or_last(argc, argv, ary, ARY_TAKE_LAST); ARY_INCREASE_LEN(ary, -RARRAY_LEN(result)); + ary_verify(ary); return result; } @@ -1024,6 +1233,7 @@ rb_ary_shift(VALUE ary) MEMMOVE(ptr, ptr+1, VALUE, len-1); }); /* WB: no new reference */ ARY_INCREASE_LEN(ary, -1); + ary_verify(ary); return top; } assert(!ARY_EMBED_P(ary)); /* ARY_EMBED_LEN_MAX < ARY_DEFAULT_SIZE */ @@ -1037,6 +1247,8 @@ rb_ary_shift(VALUE ary) ARY_INCREASE_PTR(ary, 1); /* shift ptr */ ARY_INCREASE_LEN(ary, -1); + ary_verify(ary); + return top; } @@ -1096,6 +1308,7 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) } ARY_INCREASE_LEN(ary, -n); + ary_verify(ary); return result; } @@ -1129,7 +1342,9 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) /* use shared array for big "queues" */ if (new_len > ARY_DEFAULT_SIZE * 4) { - /* make a room for unshifted items */ + ary_verify(ary); + + /* make a room for unshifted items */ capa = ARY_CAPA(ary); ary_make_shared(ary); @@ -1146,6 +1361,8 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) } ARY_SET_PTR(ary, head - argc); assert(ARY_SHARED_OCCUPIED(ARY_SHARED(ary))); + + ary_verify(ary); return ARY_SHARED(ary); } else { @@ -1154,6 +1371,7 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) MEMMOVE(ptr + argc, ptr, VALUE, len); }); + ary_verify(ary); return ary; } } @@ -1667,11 +1885,12 @@ rb_ary_resize(VALUE ary, long len) } else { if (olen > len + ARY_DEFAULT_SIZE) { - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, len, RARRAY(ary)->as.heap.aux.capa); + ary_heap_realloc(ary, len); ARY_SET_CAPA(ary, len); } ARY_SET_HEAP_LEN(ary, len); } + ary_verify(ary); return ary; } @@ -1814,7 +2033,7 @@ VALUE rb_ary_each(VALUE ary) { long i; - + ary_verify(ary); RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); for (i=0; i #include "ruby_assert.h" #include "debug_counter.h" +#include "transient_heap.h" #include "mjit.h" #undef rb_data_object_wrap @@ -1188,6 +1189,7 @@ RVALUE_PAGE_OLD_UNCOLLECTIBLE_SET(rb_objspace_t *objspace, struct heap_page *pag { MARK_IN_BITMAP(&page->uncollectible_bits[0], obj); objspace->rgengc.old_objects++; + rb_transient_heap_promote(obj); #if RGENGC_PROFILE >= 2 objspace->profile.total_promoted_count++; @@ -2246,7 +2248,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_str_free(obj); break; case T_ARRAY: - rb_ary_free(obj); + rb_ary_free(obj); break; case T_HASH: if (RANY(obj)->as.hash.ntbl) { @@ -4602,16 +4604,24 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) break; case T_ARRAY: - if (FL_TEST(obj, ELTS_SHARED)) { - gc_mark(objspace, any->as.array.as.heap.aux.shared); + if (FL_TEST(obj, ELTS_SHARED)) { + VALUE root = any->as.array.as.heap.aux.shared; + gc_mark(objspace, root); } else { long i, len = RARRAY_LEN(obj); const VALUE *ptr = RARRAY_CONST_PTR(obj); for (i=0; i < len; i++) { - gc_mark(objspace, *ptr++); + gc_mark(objspace, ptr[i]); } - } + + if (objspace->mark_func_data == NULL) { + if (!FL_TEST_RAW(obj, RARRAY_EMBED_FLAG) && + ARY_TRANSIENT_P(obj)) { + rb_transient_heap_mark(obj, ptr); + } + } + } break; case T_HASH: @@ -5602,6 +5612,8 @@ gc_marks_finish(rb_objspace_t *objspace) #endif } + rb_transient_heap_finish_marking(); + gc_event_hook(objspace, RUBY_INTERNAL_EVENT_GC_END_MARK, 0); return TRUE; @@ -6471,6 +6483,7 @@ gc_start(rb_objspace_t *objspace, int reason) objspace->profile.heap_used_at_gc_start = heap_allocated_pages; gc_prof_setup_new_record(objspace, reason); gc_reset_malloc_info(objspace); + rb_transient_heap_start_marking(do_full_mark); gc_event_hook(objspace, RUBY_INTERNAL_EVENT_GC_START, 0 /* TODO: pass minor/immediate flag? */); GC_ASSERT(during_gc); @@ -9454,13 +9467,21 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) #if USE_RGENGC const int age = RVALUE_FLAGS_AGE(RBASIC(obj)->flags); - snprintf(buff, buff_size, "%p [%d%s%s%s%s] %s", - (void *)obj, age, - C(RVALUE_UNCOLLECTIBLE_BITMAP(obj), "L"), - C(RVALUE_MARK_BITMAP(obj), "M"), - C(RVALUE_MARKING_BITMAP(obj), "R"), - C(RVALUE_WB_UNPROTECTED_BITMAP(obj), "U"), - obj_type_name(obj)); + if (is_pointer_to_heap(&rb_objspace, (void *)obj)) { + snprintf(buff, buff_size, "%p [%d%s%s%s%s] %s", + (void *)obj, age, + C(RVALUE_UNCOLLECTIBLE_BITMAP(obj), "L"), + C(RVALUE_MARK_BITMAP(obj), "M"), + C(RVALUE_MARKING_BITMAP(obj), "R"), + C(RVALUE_WB_UNPROTECTED_BITMAP(obj), "U"), + obj_type_name(obj)); + } + else { + /* fake */ + snprintf(buff, buff_size, "%p [%dXXXX] %s", + (void *)obj, age, + obj_type_name(obj)); + } #else snprintf(buff, buff_size, "%p [%s] %s", (void *)obj, @@ -9490,10 +9511,25 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) UNEXPECTED_NODE(rb_raw_obj_info); break; case T_ARRAY: - snprintf(buff, buff_size, "%s [%s%s] len: %d", buff, - C(ARY_EMBED_P(obj), "E"), - C(ARY_SHARED_P(obj), "S"), - (int)RARRAY_LEN(obj)); + if (FL_TEST(obj, ELTS_SHARED)) { + snprintf(buff, buff_size, "%s shared -> %s", buff, + rb_obj_info(RARRAY(obj)->as.heap.aux.shared)); + } + else if (FL_TEST(obj, RARRAY_EMBED_FLAG)) { + snprintf(buff, buff_size, "%s [%s%s] len: %d (embed)", buff, + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + (int)RARRAY_LEN(obj)); + } + else { + snprintf(buff, buff_size, "%s [%s%s%s] len: %d, capa:%d ptr:%p", buff, + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + C(ARY_TRANSIENT_P(obj), "T"), + (int)RARRAY_LEN(obj), + ARY_EMBED_P(obj) ? -1 : (int)RARRAY(obj)->as.heap.aux.capa, + RARRAY_CONST_PTR(obj)); + } break; case T_STRING: { snprintf(buff, buff_size, "%s %s", buff, RSTRING_PTR(obj)); @@ -9855,6 +9891,7 @@ Init_GC(void) /* internal methods */ rb_define_singleton_method(rb_mGC, "verify_internal_consistency", gc_verify_internal_consistency, 0); + rb_define_singleton_method(rb_mGC, "verify_transient_heap_internal_consistency", rb_transient_heap_verify, 0); #if MALLOC_ALLOCATED_SIZE rb_define_singleton_method(rb_mGC, "malloc_allocated_size", gc_malloc_allocated_size, 0); rb_define_singleton_method(rb_mGC, "malloc_allocations", gc_malloc_allocations, 0); diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 93939ee7db0164..a2b352a01a399a 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -41,7 +41,10 @@ enum ruby_encoding_consts { #define ENCODING_SHIFT RUBY_ENCODING_SHIFT #define ENCODING_MASK RUBY_ENCODING_MASK +int rb_enc_capable(VALUE obj); + #define RB_ENCODING_SET_INLINED(obj,i) do {\ + if (!rb_enc_capable(obj)) rb_bug("RB_ENCODING_SET_INLINED: not capable"); \ RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ } while (0) diff --git a/inits.c b/inits.c index 13fa0692363562..f85b90da070ab3 100644 --- a/inits.c +++ b/inits.c @@ -16,6 +16,7 @@ void rb_call_inits(void) { + CALL(TransientHeap); CALL(Method); CALL(RandomSeedCore); CALL(sym); diff --git a/internal.h b/internal.h index 45c499d8690542..221b7db41e7c33 100644 --- a/internal.h +++ b/internal.h @@ -1073,6 +1073,9 @@ VALUE rb_gvar_defined(struct rb_global_entry *); struct vtm; /* defined by timev.h */ /* array.c */ +#define RARRAY_TRANSIENT_FLAG FL_USER13 +#define ARY_TRANSIENT_P(ary) FL_TEST_RAW((ary), RARRAY_TRANSIENT_FLAG) + VALUE rb_ary_last(int, const VALUE *, VALUE); void rb_ary_set_len(VALUE, long); void rb_ary_delete_same(VALUE, VALUE); diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index dc1019e8aeb86b..50e959678639f4 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -1,3 +1,6 @@ + +return + # -*- encoding: utf-8 -*- require_relative 'spec_helper' require_relative 'fixtures/encoding' @@ -12,7 +15,29 @@ it "returns the index of the encoding of a Regexp" do @s.send(@method, /regexp/).should >= 0 end +<<<<<<< Updated upstream end +======= + + it "returns the index of the encoding of an Object" do + obj = mock("rb_enc_get_index string") + @s.rb_enc_set_index(obj, 1) + @s.send(@method, obj).should == 1 + end + + it "returns the index of the dummy encoding of an Object" do + obj = mock("rb_enc_get_index string") + index = Encoding.list.index(Encoding::UTF_16) + @s.rb_enc_set_index(obj, index) + @s.send(@method, obj).should == index + end + + it "returns 0 for an object without an encoding" do + obj = mock("rb_enc_get_index string") + @s.send(@method, obj).should == 0 + end +end if false +>>>>>>> Stashed changes describe :rb_enc_set_index, shared: true do it "sets the object's encoding to the Encoding specified by the index" do @@ -30,7 +55,17 @@ result = @s.send(@method, str, 1) result.first.should == result.last end +<<<<<<< Updated upstream end +======= + + it "associates an encoding with an object" do + obj = mock("rb_enc_set_index string") + result = @s.send(@method, obj, 1) + result.first.should == result.last + end +end if false +>>>>>>> Stashed changes describe "C-API Encoding function" do before :each do diff --git a/test/drb/drbtest.rb b/test/drb/drbtest.rb index 2796100280b43f..6a3700bcb89ff3 100644 --- a/test/drb/drbtest.rb +++ b/test/drb/drbtest.rb @@ -276,16 +276,23 @@ def test_10_yield_undumped end def test_11_remote_no_method_error + #tp = TracePoint.new(:line){ GC.verify_transient_heap_internal_consistency } + #tp.enable do assert_raise(DRb::DRbRemoteError) do + GC.verify_transient_heap_internal_consistency @there.remote_no_method_error end begin + GC.verify_transient_heap_internal_consistency @there.remote_no_method_error rescue + GC.verify_transient_heap_internal_consistency error = $! assert_match(/^undefined method .*\(NoMethodError\)/, error.message) assert_equal('NoMethodError', error.reason) + GC.verify_transient_heap_internal_consistency end + #end end end diff --git a/test/lib/leakchecker.rb b/test/lib/leakchecker.rb index af9200bf77a284..75ff5da1b0279b 100644 --- a/test/lib/leakchecker.rb +++ b/test/lib/leakchecker.rb @@ -20,6 +20,7 @@ def check(test_name) check_verbose(test_name), ] GC.start if leaks.any? + # GC.verify_internal_consistency end def check_safe test_name From bfaccaf766acc252532f11a970c76f8c88691017 Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Tue, 17 Jul 2018 17:01:24 +0900 Subject: [PATCH 2/6] revert merge failure --- spec/ruby/optional/capi/encoding_spec.rb | 35 ------------------------ 1 file changed, 35 deletions(-) diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index 50e959678639f4..dc1019e8aeb86b 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -1,6 +1,3 @@ - -return - # -*- encoding: utf-8 -*- require_relative 'spec_helper' require_relative 'fixtures/encoding' @@ -15,29 +12,7 @@ it "returns the index of the encoding of a Regexp" do @s.send(@method, /regexp/).should >= 0 end -<<<<<<< Updated upstream end -======= - - it "returns the index of the encoding of an Object" do - obj = mock("rb_enc_get_index string") - @s.rb_enc_set_index(obj, 1) - @s.send(@method, obj).should == 1 - end - - it "returns the index of the dummy encoding of an Object" do - obj = mock("rb_enc_get_index string") - index = Encoding.list.index(Encoding::UTF_16) - @s.rb_enc_set_index(obj, index) - @s.send(@method, obj).should == index - end - - it "returns 0 for an object without an encoding" do - obj = mock("rb_enc_get_index string") - @s.send(@method, obj).should == 0 - end -end if false ->>>>>>> Stashed changes describe :rb_enc_set_index, shared: true do it "sets the object's encoding to the Encoding specified by the index" do @@ -55,17 +30,7 @@ result = @s.send(@method, str, 1) result.first.should == result.last end -<<<<<<< Updated upstream end -======= - - it "associates an encoding with an object" do - obj = mock("rb_enc_set_index string") - result = @s.send(@method, obj, 1) - result.first.should == result.last - end -end if false ->>>>>>> Stashed changes describe "C-API Encoding function" do before :each do From 2dff2652da54bb5dec9d088bfb7841f2cde81056 Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Tue, 24 Jul 2018 15:00:16 +0900 Subject: [PATCH 3/6] add transient_heap.ch --- transient_heap.c | 827 +++++++++++++++++++++++++++++++++++++++++++++++ transient_heap.h | 13 + 2 files changed, 840 insertions(+) create mode 100644 transient_heap.c create mode 100644 transient_heap.h diff --git a/transient_heap.c b/transient_heap.c new file mode 100644 index 00000000000000..6ce9cd670e5d6f --- /dev/null +++ b/transient_heap.c @@ -0,0 +1,827 @@ +#include "ruby/ruby.h" +#include "ruby/debug.h" +#include "vm_debug.h" +#include "gc.h" +#include "internal.h" +#include +#include +#include "ruby_assert.h" +#include "transient_heap.h" +#include + +/* + * 1: enable assertions + * 2: enable verify + */ +#ifndef TRANSIENT_HEAP_CHECK_MODE +#define TRANSIENT_HEAP_CHECK_MODE 0 +#endif +#define TH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(TRANSIENT_HEAP_CHECK_MODE > 0, expr, #expr) + +/* + * 1: show events + * 2: show dump at events + * 3: show all operations + */ +#define TRANSIENT_HEAP_DEBUG 0 + +/* For Debug: Provide blocks infinitely. + * This mode generates blocks unlimitedly + * and prohibit access free'ed blocks to check invalid access. + */ +#define TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK 0 + +/* For Debug: Prohibit promoting to malloc space. + */ +#define TRANSIENT_HEAP_DEBUG_DONT_PROMOTE 0 + +/* size configuration */ +#define TRANSIENT_HEAP_PROMOTED_DEFAULT_SIZE 1024 + + /* K M */ +#define TRANSIENT_HEAP_BLOCK_SIZE (1024 * 32 ) /* int16_t */ +#define TRANSIENT_HEAP_TOTAL_SIZE (1024 * 1024 * 16) +//#define TRANSIENT_HEAP_TOTAL_SIZE (TRANSIENT_HEAP_BLOCK_SIZE * 2) // (1024 * 1024 * 16) +#define TRANSIENT_HEAP_ALLOC_MAX (1024 * 2 ) +#define TRANSIENT_HEAP_BLOCK_NUM (TRANSIENT_HEAP_TOTAL_SIZE / TRANSIENT_HEAP_BLOCK_SIZE) + +#define TRANSIENT_HEAP_ALLOC_MAGIC 0xfeab +#define TRANSIENT_HEAP_ALLOC_ALIGN RUBY_ALIGNOF(void *) + +#define TRANSIENT_HEAP_ALLOC_MARKING_LAST -1 +#define TRANSIENT_HEAP_ALLOC_MARKING_FREE -2 + + +enum transient_heap_status { + transient_heap_none, + transient_heap_marking, + transient_heap_escaping +}; + +struct transient_heap_block { + struct transient_heap_block_header { + int16_t size; /* sizeof(block) = TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header) */ + int16_t index; + int16_t last_marked_index; + int16_t objects; + struct transient_heap_block *next_block; + } info; + char buff[TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header)]; +}; + +struct transient_heap { + struct transient_heap_block *using_blocks; + struct transient_heap_block *marked_blocks; + struct transient_heap_block *free_blocks; + int total_objects; + int total_marked_objects; + int total_blocks; + enum transient_heap_status status; + + VALUE *promoted_objects; + int promoted_objects_size; + int promoted_objects_index; + + struct transient_heap_block *arena; + int arena_index; /* increment only */ +}; + +struct transient_alloc_header { + uint16_t magic; + uint16_t size; + int16_t next_marked_index; + int16_t dummy; + VALUE obj; +}; + +static struct transient_heap global_transient_heap; + +static void transient_heap_promote_add(struct transient_heap* theap, VALUE obj); +static void *transient_heap_ptr(VALUE obj, int error); +static int transient_header_managed_ptr_p(struct transient_heap* theap, const void *ptr); + + +#define ROUND_UP(v, a) (((size_t)(v) + (a) - 1) & ~((a) - 1)) + +static void +transient_heap_block_dump(struct transient_heap* theap, struct transient_heap_block *block) +{ + int i=0, n=0; + struct transient_alloc_header *header = NULL; + + while (iinfo.index) { + header = (void *)&block->buff[i]; + fprintf(stderr, "%4d %8d %p size:%4d next:%4d %s\n", n, i, header, header->size, header->next_marked_index, rb_obj_info(header->obj)); + i += header->size; + n++; + } +} + +static void +transient_heap_blocks_dump(struct transient_heap* theap, struct transient_heap_block *block, const char *type_str) +{ + while (block) { + fprintf(stderr, "- transient_heap_dump: %s:%p index:%d objects:%d last_marked_index:%d next:%p\n", + type_str, block, block->info.index, block->info.objects, block->info.last_marked_index, block->info.next_block); + + transient_heap_block_dump(theap, block); + block = block->info.next_block; + } +} + +static void +transient_heap_dump(struct transient_heap* theap) +{ + fprintf(stderr, "transient_heap_dump objects:%d marked_objects:%d blocks:%d\n", theap->total_objects, theap->total_marked_objects, theap->total_blocks); + transient_heap_blocks_dump(theap, theap->using_blocks, "using_blocks"); + transient_heap_blocks_dump(theap, theap->marked_blocks, "marked_blocks"); + transient_heap_blocks_dump(theap, theap->free_blocks, "free_blocks"); +} + +void +rb_transient_heap_dump(void) +{ + transient_heap_dump(&global_transient_heap); +} + +#if TRANSIENT_HEAP_CHECK_MODE >= 2 +static void +transient_heap_ptr_check(struct transient_heap *theap, VALUE obj) +{ + if (obj != Qundef) { + void *ptr = transient_heap_ptr(obj, FALSE); + TH_ASSERT(ptr == NULL || transient_header_managed_ptr_p(theap, ptr)); + } +} + +static int +transient_heap_block_verify(struct transient_heap *theap, struct transient_heap_block *block) +{ + int i=0, n=0; + struct transient_alloc_header *header; + + while (iinfo.index) { + header = (void *)&block->buff[i]; + TH_ASSERT(header->magic == TRANSIENT_HEAP_ALLOC_MAGIC); + transient_heap_ptr_check(theap, header->obj); + n ++; + i += header->size; + } + TH_ASSERT(block->info.objects == n); + + return n; +} +#endif + +static void +transient_heap_verify(struct transient_heap *theap) +{ +#if TRANSIENT_HEAP_CHECK_MODE >= 2 + struct transient_heap_block *block; + int n=0, block_num=0; + + // using_blocks + block = theap->using_blocks; + while (block) { + n += transient_heap_block_verify(theap, block); + block_num++; + block = block->info.next_block; + } + + // marked_blocks + block = theap->marked_blocks; + while (block) { + n += transient_heap_block_verify(theap, block); + block_num++; + TH_ASSERT(block->info.index > 0); + block = block->info.next_block; + } + + TH_ASSERT(n == theap->total_objects); + TH_ASSERT(n >= theap->total_marked_objects); + TH_ASSERT(block_num == theap->total_blocks); +#endif +} + +void +rb_transient_heap_verify(void) +{ + transient_heap_verify(&global_transient_heap); +} + +static struct transient_heap* +transient_heap_get(void) +{ + struct transient_heap* theap = &global_transient_heap; + transient_heap_verify(theap); + return theap; +} + +static void +reset_block(struct transient_heap_block *block) +{ + block->info.size = TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header); + block->info.index = 0; + block->info.objects = 0; + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; + block->info.next_block = NULL; +} + +static void +connect_to_free_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->free_blocks; + theap->free_blocks = block; +} + +static void +connect_to_using_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->using_blocks; + theap->using_blocks = block; +} + +#if 0 +static void +connect_to_marked_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->marked_blocks; + theap->marked_blocks = block; +} +#endif + +static void +append_to_marked_blocks(struct transient_heap *theap, struct transient_heap_block *append_blocks) +{ + if (theap->marked_blocks) { + struct transient_heap_block *block = theap->marked_blocks, *last_block = NULL; + while (block) { + last_block = block; + block = block->info.next_block; + } + + TH_ASSERT(last_block->info.next_block == NULL); + last_block->info.next_block = append_blocks; + } + else { + theap->marked_blocks = append_blocks; + } +} + +static struct transient_heap_block * +transient_heap_block_alloc(struct transient_heap* theap) +{ + struct transient_heap_block *block; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = mmap(NULL, TRANSIENT_HEAP_BLOCK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (block == MAP_FAILED) rb_bug("transient_heap_block_alloc: err:%d\n", errno); +#else + if (theap->arena == NULL) { + int err = posix_memalign((void **)&theap->arena, TRANSIENT_HEAP_BLOCK_SIZE, TRANSIENT_HEAP_TOTAL_SIZE); + if (err != 0) rb_bug("transient_heap_block_alloc: posix_memalign error: %d", err); + } + TH_ASSERT(theap->arena_index < TRANSIENT_HEAP_BLOCK_NUM); + block = &theap->arena[theap->arena_index++]; + TH_ASSERT(((intptr_t)block & (TRANSIENT_HEAP_BLOCK_SIZE - 1)) == 0); +#endif + reset_block(block); + + TH_ASSERT(((intptr_t)block->buff & (TRANSIENT_HEAP_ALLOC_ALIGN-1)) == 0); + // fprintf(stderr, "transient_heap_block_alloc: %4d %p\n", theap->total_blocks, block); + return block; +} + + +static struct transient_heap_block * +transient_heap_allocatable_block(struct transient_heap* theap) +{ + struct transient_heap_block *block; + +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = transient_heap_block_alloc(theap); + theap->total_blocks++; +#else + // get one block from free_blocks + block = theap->free_blocks; + if (block) { + theap->free_blocks = block->info.next_block; + block->info.next_block = NULL; + theap->total_blocks++; + } +#endif + + return block; +} + +static struct transient_alloc_header * +transient_heap_allocatable_header(struct transient_heap* theap, size_t size) +{ + struct transient_heap_block *block = theap->using_blocks; + + while (block) { + TH_ASSERT(block->info.size >= block->info.index); + + if (block->info.size - block->info.index >= (int32_t)size) { + struct transient_alloc_header *header = (void *)&block->buff[block->info.index]; + block->info.index += size; + block->info.objects++; + return header; + } + else { + block = transient_heap_allocatable_block(theap); + if (block) connect_to_using_blocks(theap, block); + } + } + + return NULL; +} + +void * +rb_transient_heap_alloc(VALUE obj, size_t req_size) +{ + struct transient_heap* theap = transient_heap_get(); + size_t size = ROUND_UP(req_size + sizeof(struct transient_alloc_header), TRANSIENT_HEAP_ALLOC_ALIGN); + + TH_ASSERT(RB_TYPE_P(obj, T_ARRAY)); /* supported types */ + + if (size > TRANSIENT_HEAP_ALLOC_MAX) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [too big: %ld] %s\n", (long)size, rb_obj_info(obj)); + return NULL; + } +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE == 0 + else if (RB_OBJ_PROMOTED_RAW(obj)) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [promoted object] %s\n", rb_obj_info(obj)); + return NULL; + } +#else + else if (RBASIC_CLASS(obj) == 0) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [hidden object] %s\n", rb_obj_info(obj)); + return NULL; + } +#endif + else { + struct transient_alloc_header *header = transient_heap_allocatable_header(theap, size); + if (header) { + void *ptr; + + header->size = size; + header->magic = TRANSIENT_HEAP_ALLOC_MAGIC; + header->next_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_FREE; + header->obj = obj; // TODO: for verify + + // stat info + theap->total_objects++; + ptr = header + 1; + +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE + if (RB_OBJ_PROMOTED_RAW(obj)) { + transient_heap_promote_add(theap, obj); + } +#endif + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: header:%p ptr:%p size:%d obj:%s\n", header, ptr, (int)size, rb_obj_info(obj)); + return ptr; + } + else { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [no enough space: %ld] %s\n", (long)size, rb_obj_info(obj)); + return NULL; + } + } +} + +void +Init_TransientHeap(void) +{ + int i, block_num; + struct transient_heap* theap = transient_heap_get(); + +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block_num = 0; +#else + TH_ASSERT(TRANSIENT_HEAP_BLOCK_SIZE * TRANSIENT_HEAP_BLOCK_NUM == TRANSIENT_HEAP_TOTAL_SIZE); + block_num = TRANSIENT_HEAP_BLOCK_NUM; +#endif + for (i=0; iusing_blocks = transient_heap_allocatable_block(theap); + + theap->promoted_objects_size = TRANSIENT_HEAP_PROMOTED_DEFAULT_SIZE; + theap->promoted_objects_index = 0; + /* should not use ALLOC_N to be free from GC */ + theap->promoted_objects = malloc(sizeof(VALUE) * theap->promoted_objects_size); + if (theap->promoted_objects == NULL) rb_bug("Init_TransientHeap: malloc failed."); +} + +static struct transient_heap_block * +blocks_alloc_header_to_block(struct transient_heap *theap, struct transient_heap_block *blocks, struct transient_alloc_header *header) +{ + struct transient_heap_block *block = blocks; + + while (block) { + if (block->buff <= (char *)header && (char *)header < block->buff + block->info.size) { + return block; + } + block = block->info.next_block; + } + + return NULL; +} + +static struct transient_heap_block * +alloc_header_to_block_verbose(struct transient_heap *theap, struct transient_alloc_header *header) +{ + struct transient_heap_block *block; + + if ((block = blocks_alloc_header_to_block(theap, theap->marked_blocks, header)) != NULL) { + // if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "alloc_header_to_block: found in marked_blocks\n"); + return block; + } + else if ((block = blocks_alloc_header_to_block(theap, theap->using_blocks, header)) != NULL) { + // if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "alloc_header_to_block: found in using_blocks\n"); + return block; + } + else { + return NULL; + } + return block; +} + +static struct transient_alloc_header * +ptr_to_alloc_header(const void *ptr) +{ + struct transient_alloc_header *header = (void *)ptr; + header -= 1; + return header; +} + +static int +transient_header_managed_ptr_p(struct transient_heap* theap, const void *ptr) +{ + if (alloc_header_to_block_verbose(theap, ptr_to_alloc_header(ptr))) { + return TRUE; + } + else { + return FALSE; + } +} + + +int +rb_transient_heap_managed_ptr_p(const void *ptr) +{ + return transient_header_managed_ptr_p(transient_heap_get(), ptr); +} + +static struct transient_heap_block * +alloc_header_to_block(struct transient_heap *theap, struct transient_alloc_header *header) +{ + struct transient_heap_block *block; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = alloc_header_to_block_verbose(theap, header); + if (block == NULL) { + transient_heap_dump(theap); + rb_bug("alloc_header_to_block: not found in mark_blocks (%p)\n", header); + } +#else + block = (void *)((intptr_t)header & ~(TRANSIENT_HEAP_BLOCK_SIZE-1)); + TH_ASSERT(block == alloc_header_to_block_verbose(theap, header)); +#endif + return block; +} + +void +rb_transient_heap_mark(VALUE obj, const void *ptr) +{ + struct transient_alloc_header *header = ptr_to_alloc_header(ptr); + + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_mark: %s (%p)\n", rb_obj_info(obj), ptr); + +#if TRANSIENT_HEAP_CHECK_MODE > 0 + { + struct transient_heap* theap = transient_heap_get(); + TH_ASSERT(theap->status == transient_heap_marking); + TH_ASSERT(transient_header_managed_ptr_p(theap, ptr)); + + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) { + transient_heap_dump(theap); + rb_bug("rb_transient_heap_mark: magic is broken"); + } + else if (header->obj != obj) { + transient_heap_dump(theap); + rb_bug("rb_transient_heap_mark: unmatch (%s is stored, but %s is given)\n", + rb_obj_info(header->obj), rb_obj_info(obj)); + } + } +#endif + + if (header->next_marked_index != TRANSIENT_HEAP_ALLOC_MARKING_FREE) { + // already marked + return; + } + else { + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block *block = alloc_header_to_block(theap, header); + header->next_marked_index = block->info.last_marked_index; + block->info.last_marked_index = (int)((char *)header - block->buff); + theap->total_marked_objects++; + + transient_heap_verify(theap); + } +} + +static void * +transient_heap_ptr(VALUE obj, int error) +{ + void *ptr; + + switch (BUILTIN_TYPE(obj)) { + case T_ARRAY: + if (ARY_TRANSIENT_P(obj)) { + ptr = (VALUE *)RARRAY_CONST_PTR(obj); + } + else { + ptr = NULL; + } + break; + default: + if (error) { + rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj)); + } + else { + ptr = NULL; + } + } + + return ptr; +} + +static void +transient_heap_promote_add(struct transient_heap* theap, VALUE obj) +{ + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_promote: %s\n", rb_obj_info(obj)); + + if (TRANSIENT_HEAP_DEBUG_DONT_PROMOTE) { + /* duplicate check */ + int i; + for (i=0; ipromoted_objects_index; i++) { + if (theap->promoted_objects[i] == obj) return; + } + } + + if (theap->promoted_objects_size <= theap->promoted_objects_index) { + theap->promoted_objects_size *= 2; + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "rb_transient_heap_promote: expand table to %d\n", theap->promoted_objects_size); + theap->promoted_objects = realloc(theap->promoted_objects, theap->promoted_objects_size * sizeof(VALUE)); + if (theap->promoted_objects == NULL) rb_bug("rb_transient_heap_promote: realloc failed"); + } + theap->promoted_objects[theap->promoted_objects_index++] = obj; +} + +void +rb_transient_heap_promote(VALUE obj) +{ + + if (transient_heap_ptr(obj, FALSE)) { + struct transient_heap* theap = transient_heap_get(); + transient_heap_promote_add(theap, obj); + } + else { + /* ignore */ + } +} + +static struct transient_alloc_header * +alloc_header(struct transient_heap_block* block, int index) +{ + return (void *)&block->buff[index]; +} + +void rb_ary_transient_heap_promote(VALUE ary, int promote); + +static void +transient_heap_reset(void) +{ + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_reset\n"); + + block = theap->marked_blocks; + while (block) { + struct transient_heap_block *next_block = block->info.next_block; + theap->total_objects -= block->info.objects; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + // debug mode + if (madvise(block, TRANSIENT_HEAP_BLOCK_SIZE, MADV_DONTNEED) != 0) { + rb_bug("madvise err:%d", errno); + } + if (mprotect(block, TRANSIENT_HEAP_BLOCK_SIZE, PROT_NONE) != 0) { + rb_bug("mprotect err:%d", errno); + } +#else + reset_block(block); + connect_to_free_blocks(theap, block); +#endif + theap->total_blocks--; + block = next_block; + } + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_reset block_num:%d\n", theap->total_blocks); + + theap->marked_blocks = NULL; + theap->total_marked_objects = 0; +} + +static void +transient_heap_block_escape(struct transient_heap* theap, struct transient_heap_block* block) +{ + int marked_index = block->info.last_marked_index; + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; + + while (marked_index >= 0) { + struct transient_alloc_header *header = alloc_header(block, marked_index); + VALUE obj = header->obj; + + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, " * transient_heap_block_escape %p %s\n", header, rb_obj_info(obj)); + + if (obj != Qnil) { + switch (BUILTIN_TYPE(obj)) { + case T_ARRAY: +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE + rb_ary_transient_heap_promote(obj, FALSE); +#else + rb_ary_transient_heap_promote(obj, TRUE); +#endif + break; + default: + rb_bug("unsupporeted"); + } + header->obj = Qundef; // to verify + } + marked_index = header->next_marked_index; + } +} + +static void +transient_heap_update_status(struct transient_heap* theap, enum transient_heap_status status) +{ + TH_ASSERT(theap->status != status); + theap->status = status; +} + +static void +transient_heap_escape(void *dmy) +{ + struct transient_heap* theap = transient_heap_get(); + + if (theap->status == transient_heap_marking) { + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_escape: skip while transient_heap_marking\n"); + } + else { + VALUE gc_disabled = rb_gc_disable(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) { + int i; + fprintf(stderr, "!! transient_heap_escape start total_blocks:%d\n", theap->total_blocks); + if (TRANSIENT_HEAP_DEBUG >= 4) { + for (i=0; ipromoted_objects_index; i++) fprintf(stderr, "%4d %s\n", i, rb_obj_info(theap->promoted_objects[i])); + } + } + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + TH_ASSERT(theap->status == transient_heap_none); + transient_heap_update_status(theap, transient_heap_escaping); + + // escape marked blocks + block = theap->marked_blocks; + while (block) { + transient_heap_block_escape(theap, block); + block = block->info.next_block; + } + + // escape using blocks + // only affect incremental marking + block = theap->using_blocks; + while (block) { + transient_heap_block_escape(theap, block); + block = block->info.next_block; + } + + // all objects in marked_objects are escaped. + transient_heap_reset(); + + if (TRANSIENT_HEAP_DEBUG > 0) { + fprintf(stderr, "!! transient_heap_escape end total_blocks:%d\n", theap->total_blocks); + // transient_heap_dump(theap); + } + + transient_heap_verify(theap); + transient_heap_update_status(theap, transient_heap_none); + if (gc_disabled != Qtrue) rb_gc_enable(); + } +} + +static void +clear_marked_index(struct transient_heap_block* block) +{ + int marked_index = block->info.last_marked_index; + + while (marked_index != TRANSIENT_HEAP_ALLOC_MARKING_LAST) { + struct transient_alloc_header *header = alloc_header(block, marked_index); + TH_ASSERT(marked_index != TRANSIENT_HEAP_ALLOC_MARKING_FREE); + // fprintf(stderr, "clear_marked_index - block:%p mark_index:%d\n", block, marked_index); + + marked_index = header->next_marked_index; + header->next_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_FREE; + } + + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; +} + +void +rb_transient_heap_start_marking(int full_marking) +{ + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! rb_transient_heap_start_marking objects:%d blocks:%d promtoed:%d full_marking:%d\n", + theap->total_objects, theap->total_blocks, theap->promoted_objects_index, full_marking); + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + // clear marking info + block = theap->marked_blocks; + while (block) { + clear_marked_index(block); + block = block->info.next_block; + } + + block = theap->using_blocks; + while (block) { + clear_marked_index(block); + block = block->info.next_block; + } + + if (theap->using_blocks) { + if (theap->using_blocks->info.objects > 0) { + append_to_marked_blocks(theap, theap->using_blocks); + theap->using_blocks = NULL; + } + else { + append_to_marked_blocks(theap, theap->using_blocks->info.next_block); + theap->using_blocks->info.next_block = NULL; + } + } + + if (theap->using_blocks == NULL) { + theap->using_blocks = transient_heap_allocatable_block(theap); + } + + TH_ASSERT(theap->status == transient_heap_none); + transient_heap_update_status(theap, transient_heap_marking); + theap->total_marked_objects = 0; + + if (full_marking) { + theap->promoted_objects_index = 0; + } + else { /* mark promoted objects */ + int i; + for (i=0; ipromoted_objects_index; i++) { + VALUE obj = theap->promoted_objects[i]; + void *ptr = transient_heap_ptr(obj, TRUE); + if (ptr) { + rb_transient_heap_mark(obj, ptr); + } + } + } + + transient_heap_verify(theap); +} + +void +rb_transient_heap_finish_marking(void) +{ + struct transient_heap* theap = transient_heap_get(); + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! rb_transient_heap_finish_marking objects:%d, marked:%d\n", + theap->total_objects, + theap->total_marked_objects); + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + TH_ASSERT(theap->total_objects >= theap->total_marked_objects); + + TH_ASSERT(theap->status == transient_heap_marking); + transient_heap_update_status(theap, transient_heap_none); + + if (theap->total_marked_objects > 0) { + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "-> rb_transient_heap_finish_marking register escape func.\n"); + rb_postponed_job_register_one(0, transient_heap_escape, NULL); + } + else { + transient_heap_reset(); + } + + transient_heap_verify(theap); +} diff --git a/transient_heap.h b/transient_heap.h new file mode 100644 index 00000000000000..22860413531bb0 --- /dev/null +++ b/transient_heap.h @@ -0,0 +1,13 @@ +#ifndef RUBY_TRANSIENT_HEAP_H +#define RUBY_TRANSIENT_HEAP_H + +void rb_transient_heap_promote(VALUE obj); +void rb_transient_heap_dump(void); +void *rb_transient_heap_alloc(VALUE obj, size_t req_size); +void rb_transient_heap_mark(VALUE obj, const void *ptr); +void rb_transient_heap_start_marking(int full_marking); +void rb_transient_heap_finish_marking(void); +int rb_transient_heap_managed_ptr_p(const void *ptr); +void rb_transient_heap_verify(void); + +#endif From fe5820ab41dc600e635e56dc3ca5ee5a910ce602 Mon Sep 17 00:00:00 2001 From: tacinight Date: Thu, 26 Jul 2018 16:53:37 +0800 Subject: [PATCH 4/6] introduce LinearTable for small hash Signed-off-by: tacinight --- gc.c | 14 + hash.c | 924 ++++++++++++++++++++++++++++++++++++----- include/ruby/st.h | 2 +- internal.h | 22 +- st.c | 20 +- test/ruby/test_time.rb | 1 + thread.c | 8 +- 7 files changed, 868 insertions(+), 123 deletions(-) diff --git a/gc.c b/gc.c index 0bc49467e73357..bdf5be6c41c713 100644 --- a/gc.c +++ b/gc.c @@ -2251,6 +2251,9 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_ary_free(obj); break; case T_HASH: + if (RANY(obj)->as.hash.ltbl) { + free(RANY(obj)->as.hash.ltbl); + } if (RANY(obj)->as.hash.ntbl) { st_free_table(RANY(obj)->as.hash.ntbl); } @@ -3264,6 +3267,9 @@ obj_memsize_of(VALUE obj, int use_all_types) size += rb_ary_memsize(obj); break; case T_HASH: + if (RHASH(obj)->ltbl) { + size += sizeof(li_table); + } if (RHASH(obj)->ntbl) { size += st_memsize(RHASH(obj)->ntbl); } @@ -4158,6 +4164,13 @@ mark_hash(rb_objspace_t *objspace, st_table *tbl) st_foreach(tbl, mark_keyvalue, (st_data_t)objspace); } +static void +mark_hash_linear(rb_objspace_t *objspace, li_table *tbl) +{ + if (!tbl) return; + linear_foreach(tbl, mark_keyvalue, (st_data_t)objspace); +} + void rb_mark_hash(st_table *tbl) { @@ -4625,6 +4638,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) break; case T_HASH: + mark_hash_linear(objspace, any->as.hash.ltbl); mark_hash(objspace, any->as.hash.ntbl); gc_mark(objspace, any->as.hash.ifnone); break; diff --git a/hash.c b/hash.c index c9d60c7a56912c..f9f8780aa3f80a 100644 --- a/hash.c +++ b/hash.c @@ -21,6 +21,7 @@ #include "symbol.h" #include "gc.h" +#include #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H # include @@ -299,6 +300,449 @@ static const struct st_hash_type identhash = { rb_ident_hash, }; +#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) +#define PTR_EQUAL(tab, ptr, hash_val, key_) \ + ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) + +#define RESERVED_HASH_VAL ((st_hash_t) 0) +#define RESERVED_HASH_SUBSTITUTION_VAL (~(st_hash_t) 0) + +#define HASH_HAS_NO_TABLE(hash) !(RHASH(hash)->ltbl) && !(RHASH(hash)->ntbl) +#define HASH_HAS_TABLE(hash) RHASH(hash)->ltbl || RHASH(hash)->ntbl + +#define SET_KEY(entry, _key) (entry)->key = (_key) +#define SET_HASH(entry, _hash) (entry)->hash = (_hash) +#define SET_RECORD(entry, _value) (entry)->record = (_value) + +#define RHASH_TYPE(hash) (RHASH(hash)->ltbl ? RHASH(hash)->ltbl->type : RHASH(hash)->ntbl->type) + +typedef st_data_t st_hash_t; + +static inline st_hash_t +do_hash(st_data_t key, li_table *tab) +{ + st_hash_t hash = (st_hash_t)(tab->type->hash)(key); + + return hash == ((st_hash_t) 0) ? (~(st_hash_t) 0) : hash; +} + +static inline void +set_entry(li_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash) +{ + SET_HASH(entry, hash); + SET_KEY(entry, key); + SET_RECORD(entry, val); +} + +static inline void +clear_entry(li_table_entry* entry) +{ + SET_KEY(entry, Qundef); + SET_RECORD(entry, Qundef); + SET_HASH(entry, 0); +} + +static inline int +empty_entry(li_table_entry *entry) +{ + return entry->hash == 0; +} + +static li_table* +linear_init_table(const struct st_hash_type *type) +{ + li_table *tab; + uint8_t i; + tab = (li_table*)malloc(sizeof(li_table)); + if (tab == NULL) rb_bug("linear_init_table: malloc failed"); + tab->type = type; + tab->num_entries = 0; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) + clear_entry(tab->entries + i); + return tab; +} + +static li_table* +linear_init_identtable(void) +{ + return linear_init_table(&identhash); +} + +static li_table* +linear_init_objtable(void) +{ + return linear_init_table(&objhash); +} + +static st_index_t +find_entry(li_table *tab, st_hash_t hash_value, st_data_t key) +{ + uint8_t i; + li_table_entry *entries; + + if (tab->num_entries == 0) return LINEAR_TABLE_BOUND; + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + if (PTR_EQUAL(tab, &entries[i], hash_value, key)) + return i; + } + return LINEAR_TABLE_BOUND; +} + +static void +try_convert_table(VALUE hash) +{ + st_table *new_tab; + li_table *tab; + li_table_entry *entries; + st_index_t i; + + tab = RHASH(hash)->ltbl; + if (!tab || tab->num_entries < LINEAR_TABLE_MAX_SIZE) + return; + + new_tab = st_init_table_with_size(tab->type, tab->num_entries * 2); + + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + assert(entries[i].hash != 0); + st_add_direct(new_tab, entries[i].key, entries[i].record); + } + free(tab); + RHASH(hash)->ltbl = NULL; + RHASH(hash)->ntbl = new_tab; + return; +} + +static void +force_convert_table(VALUE hash) +{ + st_table *new_tab; + li_table *tab; + li_table_entry *cur_entry, *entries; + uint8_t i; + + if (RHASH(hash)->ntbl) + return; + + tab = RHASH(hash)->ltbl; + if (tab) { + new_tab = st_init_table_with_size(tab->type, tab->num_entries); + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) continue; + st_add_direct(new_tab, cur_entry->key, cur_entry->record); + } + free(tab); + } + else if (!RHASH(hash)->ntbl) { + new_tab = st_init_table(&objhash); + } + RHASH(hash)->ltbl = NULL; + RHASH(hash)->ntbl = new_tab; + return; +} + +static int +compact_table(li_table *tab) +{ + li_table_entry *entries; + uint8_t empty = 0, non_empty = 1; + + entries = tab->entries; + for (; non_empty < LINEAR_TABLE_BOUND; empty++, non_empty++) { + while (!empty_entry(&entries[empty])) { + empty++; + if (empty == LINEAR_TABLE_BOUND - 1) goto done; + } + + if (non_empty <= empty) non_empty = empty + 1; + while (empty_entry(&entries[non_empty])) { + non_empty++; + if (non_empty == LINEAR_TABLE_BOUND) goto done; + } + + entries[empty] = entries[non_empty]; + clear_entry(&entries[non_empty]); + } +done: + assert(empty < LINEAR_TABLE_BOUND); + return empty; +} + +static int +add_direct_with_hash(li_table *tab, st_data_t key, st_data_t val, st_hash_t hash) +{ + uint8_t bin; + li_table_entry *entry; + + if (tab->num_entries >= LINEAR_TABLE_MAX_SIZE) + return 1; + + bin = compact_table(tab); + assert(bin < LINEAR_TABLE_BOUND); + entry = &tab->entries[bin]; + set_entry(entry, key, val, hash); + tab->num_entries++; + return 0; +} + +int +linear_foreach(li_table *tab, int (*func)(ANYARGS), st_data_t arg) +{ + uint8_t i; + li_table_entry *entries, *cur_entry; + enum st_retval retval; + + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) + continue; + retval = (*func)(cur_entry->key, cur_entry->record, arg, 0); + switch (retval) { + case ST_CONTINUE: + break; + case ST_CHECK: + case ST_STOP: + return 0; + case ST_DELETE: + clear_entry(cur_entry); + tab->num_entries--; + break; + } + } + return 0; +} + +static int +linear_foreach_check(li_table *tab, int (*func)(ANYARGS), st_data_t arg, + st_data_t never) +{ + uint8_t i, ret = 0; + li_table_entry *entries, *cur_entry; + enum st_retval retval; + st_data_t key; + st_hash_t hash; + + if (tab->num_entries == 0) return 0; + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) + continue; + key = cur_entry->key; + hash = cur_entry->hash; + retval = (*func)(key, cur_entry->record, arg, 0); + switch (retval) { + case ST_CHECK: { + if (entries[i].key == never && entries[i].hash == 0) + break; + ret = find_entry(tab, hash, key); + if (ret == LINEAR_TABLE_BOUND) { + retval = (*func)(0, 0, arg, 1); + return 2; + } + } + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: { + clear_entry(cur_entry); + tab->num_entries--; + break; + } + } + } + return 0; +} + +static int +linear_update(li_table *tab, st_data_t key, + st_update_callback_func *func, st_data_t arg) +{ + li_table_entry *entry; + int retval, existing; + uint8_t bin; + st_data_t value = 0, old_key; + st_hash_t hash = do_hash(key, tab); + + bin = find_entry(tab, hash, key); + existing = bin != LINEAR_TABLE_BOUND; + entry = &tab->entries[bin]; + if (existing) { + key = entry->key; + value = entry->record; + } + old_key = key; + retval = (*func)(&key, &value, arg, existing); + + switch (retval) { + case ST_CONTINUE: + if (!existing) { + if (add_direct_with_hash(tab, key, value, hash)) + return -1; + break; + } + if (old_key != key) { + entry->key = key; + } + entry->record = value; + break; + case ST_DELETE: + if (existing) { + clear_entry(entry); + tab->num_entries--; + } + break; + } + return existing; +} + +static int +linear_insert(li_table *tab, st_data_t key, st_data_t value) +{ + st_index_t bin; + st_hash_t hash_value; + + hash_value = do_hash(key, tab); + bin = find_entry(tab, hash_value, key); + if (bin == LINEAR_TABLE_BOUND) { + if (tab->num_entries >= LINEAR_TABLE_MAX_SIZE) + return -1; + bin = compact_table(tab); + assert(bin < LINEAR_TABLE_BOUND); + set_entry(&tab->entries[bin], key, value, hash_value); + tab->num_entries++; + return 0; + } + tab->entries[bin].record = value; + return 1; +} + +int +linear_lookup(li_table *tab, st_data_t key, st_data_t *value) +{ + st_index_t bin; + st_hash_t hash_value; + + hash_value = do_hash(key, tab); + bin = find_entry(tab, hash_value, key); + if (bin == LINEAR_TABLE_BOUND) { + return 0; + } + assert(bin < LINEAR_TABLE_BOUND); + if (value != 0) + *value = tab->entries[bin].record; + return 1; +} + +static int +linear_delete(li_table *tab, st_data_t *key, st_data_t *value) +{ + st_index_t bin; + st_hash_t hash_value; + li_table_entry *entry; + + hash_value = do_hash(*key, tab); + bin = find_entry(tab, hash_value, *key); + if (bin == LINEAR_TABLE_BOUND) { + if (value != 0) *value = 0; + return 0; + } + entry = &tab->entries[bin]; + if (value != 0) *value = entry->record; + clear_entry(entry); + tab->num_entries--; + return 1; +} + +static int +linear_shift(li_table *tab, st_data_t *key, st_data_t *value) +{ + uint8_t i; + li_table_entry *entry, *entries; + + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + entry = &entries[i]; + if (!empty_entry(entry)) { + if (value != 0) *value = entry->record; + *key = entry->key; + clear_entry(entry); + tab->num_entries--; + return 1; + } + } + if (value != 0) *value = 0; + return 0; +} + +static int +linear_keys(li_table *tab, st_data_t *keys, st_index_t size) +{ + uint8_t i; + st_data_t *keys_start, *keys_end; + li_table_entry *cur_entry, *entries = tab->entries; + + keys_start = keys; + keys_end = keys + size; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + if (keys == keys_end) + break; + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) + *keys++ = cur_entry->key; + } + + return keys - keys_start; +} + +static int +linear_values(li_table *tab, st_data_t *values, st_index_t size) +{ + uint8_t i; + st_data_t *values_start, *values_end; + li_table_entry *cur_entry, *entries = tab->entries; + + values_start = values; + values_end = values + size; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + if (values == values_end) + break; + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) + *values++ = cur_entry->record; + } + + return values - values_start; +} + +static li_table* +linear_copy(li_table *old_tab) +{ + li_table *new_tab; + new_tab = (li_table*) malloc(sizeof(li_table)); + if (new_tab == NULL) rb_bug("linear_copy: malloc failed"); + *new_tab = *old_tab; + return new_tab; +} + +static void +linear_clear(li_table *tab) +{ + tab->num_entries = 0; + memset(tab->entries, 0, 8 * sizeof(li_table_entry)); +} + +static inline void +linear_free_table(li_table *tab) +{ + free(tab); +} + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -330,7 +774,7 @@ st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a) arg.func = (st_foreach_func *)func; arg.arg = a; if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) { - rb_raise(rb_eRuntimeError, "hash modified during iteration"); + rb_raise(rb_eRuntimeError, "1hash modified during iteration"); } } @@ -342,6 +786,30 @@ struct hash_foreach_arg { VALUE arg; }; +static int +hash_linear_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) +{ + struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp; + int status; + li_table *tbl; + + if (error) return ST_STOP; + tbl = RHASH(arg->hash)->ltbl; + status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); + if (RHASH(arg->hash)->ltbl != tbl) { + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + } + switch (status) { + case ST_DELETE: + return ST_DELETE; + case ST_CONTINUE: + break; + case ST_STOP: + return ST_STOP; + } + return ST_CHECK; +} + static int hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) { @@ -384,8 +852,15 @@ static VALUE hash_foreach_call(VALUE arg) { VALUE hash = ((struct hash_foreach_arg *)arg)->hash; - if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) { - rb_raise(rb_eRuntimeError, "hash modified during iteration"); + int ret = 0; + if (RHASH(hash)->ltbl) + ret = linear_foreach_check(RHASH(hash)->ltbl, hash_linear_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + else if (RHASH(hash)->ntbl) + ret = st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + if (ret) { + rb_raise(rb_eRuntimeError, "ret: %d, hash modified during iteration", ret); } return Qnil; } @@ -395,7 +870,7 @@ rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) { struct hash_foreach_arg arg; - if (!RHASH(hash)->ntbl) + if (!RHASH(hash)->ntbl && !RHASH(hash)->ltbl) return; RHASH_ITER_LEV(hash)++; arg.hash = hash; @@ -447,8 +922,12 @@ MJIT_FUNC_EXPORTED VALUE rb_hash_new_with_size(st_index_t size) { VALUE ret = rb_hash_new(); - if (size) - RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); + if (size) { + if (size <= LINEAR_TABLE_MAX_SIZE) + RHASH(ret)->ltbl = linear_init_objtable(); + else + RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); + } return ret; } @@ -457,8 +936,12 @@ hash_dup(VALUE hash, VALUE klass, VALUE flags) { VALUE ret = hash_alloc_flags(klass, flags, RHASH_IFNONE(hash)); - if (!RHASH_EMPTY_P(hash)) - RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); + if (!RHASH_EMPTY_P(hash)) { + if (RHASH(hash)->ltbl) + RHASH(ret)->ltbl = linear_copy(RHASH(hash)->ltbl); + else + RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); + } return ret; } @@ -479,6 +962,15 @@ rb_hash_modify_check(VALUE hash) rb_check_frozen(hash); } +static li_table * +hash_ltbl(VALUE hash) +{ + if (!RHASH(hash)->ltbl) { + RHASH(hash)->ltbl = linear_init_objtable(); + } + return RHASH(hash)->ltbl; +} + static struct st_table * hash_tbl(VALUE hash) { @@ -492,12 +984,14 @@ struct st_table * rb_hash_tbl(VALUE hash) { OBJ_WB_UNPROTECT(hash); + force_convert_table(hash); return hash_tbl(hash); } MJIT_FUNC_EXPORTED struct st_table * rb_hash_tbl_raw(VALUE hash) { + force_convert_table(hash); return hash_tbl(hash); } @@ -505,7 +999,8 @@ static void rb_hash_modify(VALUE hash) { rb_hash_modify_check(hash); - hash_tbl(hash); + if (HASH_HAS_NO_TABLE(hash)) + hash_ltbl(hash); } NORETURN(static void no_new_key(void)); @@ -558,7 +1053,15 @@ tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg) arg.new_value = 0; arg.old_value = Qundef; - result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + if (RHASH(hash)->ltbl) { + result = linear_update(RHASH(hash)->ltbl, (st_data_t)key, func, (st_data_t)&arg); + if (result == -1) { + try_convert_table(hash); + result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + } + } + else + result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); /* write barrier */ if (arg.new_key) RB_OBJ_WRITTEN(hash, arg.old_key, arg.new_key); @@ -673,10 +1176,13 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) VALUE hash, tmp; if (argc == 1) { - tmp = rb_hash_s_try_convert(Qnil, argv[0]); + tmp = rb_hash_s_try_convert(Qnil, argv[0]); //TODO try_convert if (!NIL_P(tmp)) { hash = hash_alloc(klass); - if (RHASH(tmp)->ntbl) { + if (RHASH(tmp)->ltbl) { + RHASH(hash)->ltbl = linear_copy(RHASH(tmp)->ltbl); + } + else if (RHASH(tmp)->ntbl) { RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); } return hash; @@ -764,6 +1270,15 @@ struct rehash_arg { st_table *tbl; }; +static int +rb_hash_rehash_opt_i(VALUE key, VALUE value, VALUE arg) +{ + li_table *tbl = (li_table *)arg; + + linear_insert(tbl, (st_data_t)key, (st_data_t)value); + return ST_CONTINUE; +} + static int rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg) { @@ -798,22 +1313,32 @@ rb_hash_rehash(VALUE hash) { VALUE tmp; st_table *tbl; + li_table *ltbl; if (RHASH_ITER_LEV(hash) > 0) { rb_raise(rb_eRuntimeError, "rehash during iteration"); } rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - tmp = hash_alloc(0); - tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); - RHASH(tmp)->ntbl = tbl; - - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = tbl; - RHASH(tmp)->ntbl = 0; - + if (RHASH(hash)->ltbl) { + tmp = hash_alloc(0); + ltbl = linear_init_table(RHASH(hash)->ltbl->type); + RHASH(tmp)->ltbl = ltbl; + + rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)ltbl); + linear_free_table(RHASH(hash)->ltbl); + RHASH(hash)->ltbl = ltbl; + RHASH(tmp)->ltbl = 0; + } + else if (RHASH(hash)->ntbl) { + tmp = hash_alloc(0); + tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); + RHASH(tmp)->ntbl = tbl; + + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); + st_free_table(RHASH(hash)->ntbl); + RHASH(hash)->ntbl = tbl; + RHASH(tmp)->ntbl = 0; + } return hash; } @@ -850,10 +1375,13 @@ rb_hash_aref(VALUE hash, VALUE key) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return rb_hash_default_value(hash, key); + if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + return (VALUE)val; } - return (VALUE)val; + else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + return (VALUE)val; + } + return rb_hash_default_value(hash, key); } VALUE @@ -861,10 +1389,13 @@ rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return def; /* without Hash#default */ + if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + return (VALUE)val; + } + else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + return (VALUE)val; } - return (VALUE)val; + return def; /* without Hash#default */ } VALUE @@ -916,19 +1447,22 @@ rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) if (block_given && argc == 2) { rb_warn("block supersedes default value argument"); } - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - if (block_given) return rb_yield(key); - if (argc == 1) { - VALUE desc = rb_protect(rb_inspect, key, 0); - if (NIL_P(desc)) { - desc = rb_any_to_s(key); - } - desc = rb_str_ellipsize(desc, 65); - rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); + if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + return (VALUE)val; + } + else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + return (VALUE)val; + } + if (block_given) return rb_yield(key); + if (argc == 1) { + VALUE desc = rb_protect(rb_inspect, key, 0); + if (NIL_P(desc)) { + desc = rb_any_to_s(key); } - return argv[1]; + desc = rb_str_ellipsize(desc, 65); + rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); } - return (VALUE)val; + return argv[1]; } VALUE @@ -1117,10 +1651,10 @@ rb_hash_delete_entry(VALUE hash, VALUE key) { st_data_t ktmp = (st_data_t)key, val; - if (!RHASH(hash)->ntbl) { - return Qundef; + if (RHASH(hash)->ltbl && linear_delete(RHASH(hash)->ltbl, &ktmp, &val)) { + return (VALUE)val; } - else if (st_delete(RHASH(hash)->ntbl, &ktmp, &val)) { + else if (RHASH(hash)->ntbl && st_delete(RHASH(hash)->ntbl, &ktmp, &val)) { return (VALUE)val; } else { @@ -1219,6 +1753,21 @@ rb_hash_shift(VALUE hash) struct shift_var var; rb_hash_modify_check(hash); + if (RHASH(hash)->ltbl) { + var.key = Qundef; + if (RHASH_ITER_LEV(hash) == 0) { + if (linear_shift(RHASH(hash)->ltbl, &var.key, &var.val)) { + return rb_assoc_new(var.key, var.val); + } + } + else { + rb_hash_foreach(hash, shift_i_safe, (VALUE)&var); + if (var.key != Qundef) { + rb_hash_delete_entry(hash, var.key); + return rb_assoc_new(var.key, var.val); + } + } + } if (RHASH(hash)->ntbl) { var.key = Qundef; if (RHASH_ITER_LEV(hash) == 0) { @@ -1272,7 +1821,7 @@ rb_hash_delete_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (HASH_HAS_TABLE(hash)) rb_hash_foreach(hash, delete_if_i, hash); return hash; } @@ -1296,7 +1845,7 @@ rb_hash_reject_bang(VALUE hash) n = RHASH_SIZE(hash); if (!n) return Qnil; rb_hash_foreach(hash, delete_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1486,11 +2035,10 @@ rb_hash_select_bang(VALUE hash) RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return Qnil; - n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (!n) return Qnil; rb_hash_foreach(hash, keep_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1511,7 +2059,7 @@ rb_hash_keep_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (HASH_HAS_TABLE(hash)) rb_hash_foreach(hash, keep_if_i, hash); return hash; } @@ -1537,9 +2085,13 @@ VALUE rb_hash_clear(VALUE hash) { rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - if (RHASH(hash)->ntbl->num_entries > 0) { + if (RHASH(hash)->ltbl && RHASH(hash)->ltbl->num_entries > 0) { + if (RHASH_ITER_LEV(hash) > 0) + rb_hash_foreach(hash, clear_i, 0); + else + linear_clear(RHASH(hash)->ltbl); + } + else if (RHASH(hash)->ntbl && RHASH(hash)->ntbl->num_entries > 0) { if (RHASH_ITER_LEV(hash) > 0) rb_hash_foreach(hash, clear_i, 0); else @@ -1640,14 +2192,15 @@ VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val) { int iter_lev = RHASH_ITER_LEV(hash); - st_table *tbl = RHASH(hash)->ntbl; + const struct st_hash_type *type; rb_hash_modify(hash); - if (!tbl) { + if (HASH_HAS_NO_TABLE(hash)) { if (iter_lev > 0) no_new_key(); - tbl = hash_tbl(hash); + RHASH(hash)->ltbl = linear_init_objtable(); } - if (tbl->type == &identhash || rb_obj_class(key) != rb_cString) { + type = RHASH_TYPE(hash); + if (type == &identhash || rb_obj_class(key) != rb_cString) { RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset, val); } else { @@ -1669,6 +2222,7 @@ static VALUE rb_hash_initialize_copy(VALUE hash, VALUE hash2) { st_table *ntbl; + li_table *ltbl; rb_hash_modify_check(hash); hash2 = to_hash(hash2); @@ -1677,13 +2231,23 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) if (hash == hash2) return hash; + ltbl = RHASH(hash)->ltbl; ntbl = RHASH(hash)->ntbl; - if (RHASH(hash2)->ntbl) { + if (RHASH(hash2)->ltbl) { + if (ltbl) linear_free_table(ltbl); + RHASH(hash)->ltbl = linear_copy(RHASH(hash2)->ltbl); + if (RHASH(hash)->ltbl->num_entries) + rb_hash_rehash(hash); + } + else if (RHASH(hash2)->ntbl) { if (ntbl) st_free_table(ntbl); RHASH(hash)->ntbl = st_copy(RHASH(hash2)->ntbl); if (RHASH(hash)->ntbl->num_entries) rb_hash_rehash(hash); } + else if (ltbl) { + linear_clear(ltbl); + } else if (ntbl) { st_clear(ntbl); } @@ -1708,18 +2272,20 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) static VALUE rb_hash_replace(VALUE hash, VALUE hash2) { - st_table *table2; - rb_hash_modify_check(hash); if (hash == hash2) return hash; hash2 = to_hash(hash2); COPY_DEFAULT(hash, hash2); - table2 = RHASH(hash2)->ntbl; - rb_hash_clear(hash); - if (table2) hash_tbl(hash)->type = table2->type; + if (RHASH(hash2)->ltbl) { + hash_ltbl(hash)->type = RHASH(hash2)->ltbl->type; + } + else if (RHASH(hash2)->ntbl) { + hash_tbl(hash)->type = RHASH(hash2)->ntbl->type; + } + rb_hash_foreach(hash2, replace_i, hash); return hash; @@ -1941,7 +2507,7 @@ rb_hash_transform_keys_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { + if (HASH_HAS_TABLE(hash)) { long i; VALUE pairs = rb_hash_flatten(0, NULL, hash); rb_hash_clear(hash); @@ -2015,7 +2581,7 @@ rb_hash_transform_values_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (HASH_HAS_TABLE(hash)) rb_hash_foreach(hash, transform_values_i, hash); return hash; } @@ -2165,12 +2731,22 @@ rb_hash_keys(VALUE hash) if (size == 0) return keys; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH(hash)->ltbl) { + li_table *table = RHASH(hash)->ltbl; + + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = linear_keys(table, ptr, size); + }); + } + else { + st_table *table = RHASH(hash)->ntbl; - rb_gc_writebarrier_remember(keys); - RARRAY_PTR_USE(keys, ptr, { - size = st_keys(table, ptr, size); - }); + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = st_keys(table, ptr, size); + }); + } rb_ary_set_len(keys, size); } else { @@ -2209,12 +2785,22 @@ rb_hash_values(VALUE hash) if (size == 0) return values; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH(hash)->ltbl) { + li_table *table = RHASH(hash)->ltbl; + + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = linear_values(table, ptr, size); + }); + } + else { + st_table *table = RHASH(hash)->ntbl; - rb_gc_writebarrier_remember(values); - RARRAY_PTR_USE(values, ptr, { - size = st_values(table, ptr, size); - }); + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = st_values(table, ptr, size); + }); + } rb_ary_set_len(values, size); } else { @@ -2246,9 +2832,10 @@ rb_hash_values(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (st_lookup(RHASH(hash)->ntbl, key, 0)) { + if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, 0)) { + return Qtrue; + } + else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, 0)) { return Qtrue; } return Qfalse; @@ -2313,6 +2900,42 @@ eql_i(VALUE key, VALUE val1, VALUE arg) return ST_CONTINUE; } +struct equal_data_opt { + VALUE result; + li_table *tbl; + int eql; +}; + +static int +eql_opt_i(VALUE key, VALUE val1, VALUE arg) +{ + struct equal_data_opt *data = (struct equal_data_opt *)arg; + st_data_t val2; + + if (!linear_lookup(data->tbl, key, &val2)) { + data->result = Qfalse; + return ST_STOP; + } + if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) { + data->result = Qfalse; + return ST_STOP; + } + return ST_CONTINUE; +} + +static VALUE +recursive_eql_opt(VALUE hash, VALUE dt, int recur) +{ + struct equal_data_opt *data; + + if (recur) return Qtrue; + data = (struct equal_data_opt*)dt; + data->result = Qtrue; + rb_hash_foreach(hash, eql_opt_i, dt); + + return data->result; +} + static VALUE recursive_eql(VALUE hash, VALUE dt, int recur) { @@ -2330,6 +2953,7 @@ static VALUE hash_equal(VALUE hash1, VALUE hash2, int eql) { struct equal_data data; + struct equal_data_opt data_opt; if (hash1 == hash2) return Qtrue; if (!RB_TYPE_P(hash2, T_HASH)) { @@ -2350,19 +2974,28 @@ hash_equal(VALUE hash1, VALUE hash2, int eql) } if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return Qfalse; - if (!RHASH(hash1)->ntbl || !RHASH(hash2)->ntbl) - return Qtrue; - if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) - return Qfalse; + if (RHASH(hash1)->ltbl && RHASH(hash2)->ltbl) { + if (RHASH(hash1)->ltbl->type != RHASH(hash2)->ltbl->type) + return Qfalse; + + data_opt.tbl = RHASH(hash2)->ltbl; + data_opt.eql = eql; + return rb_exec_recursive_paired(recursive_eql_opt, hash1, hash2, (VALUE)&data_opt); + } + else if (RHASH(hash1)->ntbl && RHASH(hash2)->ntbl) { + if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) + return Qfalse; + + data.tbl = RHASH(hash2)->ntbl; + data.eql = eql; + return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + } #if 0 if (!(rb_equal(RHASH_IFNONE(hash1), RHASH_IFNONE(hash2)) && FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) return Qfalse; #endif - - data.tbl = RHASH(hash2)->ntbl; - data.eql = eql; - return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + return Qtrue; } /* @@ -2683,7 +3316,10 @@ static VALUE reset_hash_type(VALUE arg) { struct reset_hash_type_arg *p = (struct reset_hash_type_arg *)arg; - RHASH(p->hash)->ntbl->type = p->orighash; + if (RHASH(p->hash)->ltbl) + RHASH(p->hash)->ltbl->type = p->orighash; + else + RHASH(p->hash)->ntbl->type = p->orighash; return Qundef; } @@ -2717,12 +3353,19 @@ VALUE rb_hash_assoc(VALUE hash, VALUE key) { st_table *table; + li_table *ltable; const struct st_hash_type *orighash; VALUE args[2]; if (RHASH_EMPTY_P(hash)) return Qnil; - table = RHASH(hash)->ntbl; - orighash = table->type; + if (RHASH(hash)->ltbl) { + ltable = RHASH(hash)->ltbl; + orighash = ltable->type; + } + else { + table = RHASH(hash)->ntbl; + orighash = table->type; + } if (orighash != &identhash) { VALUE value; @@ -2731,7 +3374,10 @@ rb_hash_assoc(VALUE hash, VALUE key) assochash.compare = assoc_cmp; assochash.hash = orighash->hash; - table->type = &assochash; + if (ltable) + ltable->type = &assochash; + else + table->type = &assochash; args[0] = hash; args[1] = key; ensure_arg.hash = hash; @@ -2898,11 +3544,12 @@ rb_hash_compact(VALUE hash) static VALUE rb_hash_compact_bang(VALUE hash) { + st_index_t n; rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { - st_index_t n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (n) { rb_hash_foreach(hash, delete_if_nil, hash); - if (n != RHASH(hash)->ntbl->num_entries) + if (n != RHASH_SIZE(hash)) return hash; } return Qnil; @@ -2927,15 +3574,24 @@ rb_hash_compact_bang(VALUE hash) static VALUE rb_hash_compare_by_id(VALUE hash) { - st_table *identtable; if (rb_hash_compare_by_id_p(hash)) return hash; rb_hash_modify_check(hash); - identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)identtable); - if (RHASH(hash)->ntbl) + if (!RHASH(hash)->ntbl) { + li_table *identtable; + identtable = linear_init_identtable(); + rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)identtable); + if (RHASH(hash)->ltbl) + linear_free_table(RHASH(hash)->ltbl); + RHASH(hash)->ltbl = identtable; + } + else { + st_table *identtable; + identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)identtable); st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = identtable; + RHASH(hash)->ntbl = identtable; + } return hash; } @@ -2952,9 +3608,10 @@ rb_hash_compare_by_id(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_compare_by_id_p(VALUE hash) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (RHASH(hash)->ntbl->type == &identhash) { + if (RHASH(hash)->ltbl && RHASH(hash)->ltbl->type == &identhash) { + return Qtrue; + } + if (RHASH(hash)->ntbl && RHASH(hash)->ntbl->type == &identhash) { return Qtrue; } return Qfalse; @@ -3221,13 +3878,74 @@ add_new_i(st_data_t *key, st_data_t *val, st_data_t arg, int existing) * returns non-zero if +key+ was contained. */ int -rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) +rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) //TODO { - st_table *tbl = rb_hash_tbl_raw(hash); + li_table *ltbl; + st_table *tbl; + int ret = 0; VALUE args[2]; args[0] = hash; args[1] = val; + if (!RHASH(hash)->ntbl) { + ltbl = hash_ltbl(hash); + ret = linear_update(ltbl, (st_data_t)key, add_new_i, (st_data_t)args); + if (ret != -1) + return ret; + try_convert_table(hash); + } + tbl = rb_hash_tbl_raw(hash); return st_update(tbl, (st_data_t)key, add_new_i, (st_data_t)args); + +} + +static st_data_t +linear_stringify(VALUE key) +{ + return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ? + rb_hash_key_str(key) : key; +} + +static void +linear_bulk_insert(li_table *tab, long argc, const VALUE *argv, VALUE hash) +{ + long i; + for (i = 0; i < argc; ) { + st_data_t k = linear_stringify(argv[i++]); + st_data_t v = argv[i++]; + linear_insert(tab, k, v); + RB_OBJ_WRITTEN(hash, Qundef, k); + RB_OBJ_WRITTEN(hash, Qundef, v); + } +} + +MJIT_FUNC_EXPORTED void +rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +{ + st_index_t size; + li_table *ltbl = RHASH(hash)->ltbl; + + assert(argc % 2 == 0); + if (! argc) + return; + size = argc / 2; + if (HASH_HAS_NO_TABLE(hash)) { + VALUE tmp = rb_hash_new_with_size(size); + RBASIC_CLEAR_CLASS(tmp); + if (size <= LINEAR_TABLE_MAX_SIZE) { + RHASH(hash)->ltbl = ltbl = RHASH(tmp)->ltbl; + RHASH(tmp)->ltbl = NULL; + } + else { + RHASH(hash)->ntbl = RHASH(tmp)->ntbl; + RHASH(tmp)->ntbl = NULL; + } + } + if (ltbl && (ltbl->num_entries + size <= LINEAR_TABLE_MAX_SIZE)) { + linear_bulk_insert(ltbl, argc, argv, hash); + return; + } + + rb_hash_bulk_insert_into_st_table(argc, argv, hash); } static int path_tainted = -1; diff --git a/include/ruby/st.h b/include/ruby/st.h index ede3ff44567fc0..149e0ebaef3945 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -143,7 +143,7 @@ CONSTFUNC(st_index_t st_hash_end(st_index_t h)); CONSTFUNC(st_index_t st_hash_start(st_index_t h)); #define st_hash_start(h) ((st_index_t)(h)) -void rb_hash_bulk_insert(long, const VALUE *, VALUE); +void rb_hash_bulk_insert_into_st_table(long, const VALUE *, VALUE); RUBY_SYMBOL_EXPORT_END diff --git a/internal.h b/internal.h index 221b7db41e7c33..179bd21d265fdc 100644 --- a/internal.h +++ b/internal.h @@ -670,9 +670,25 @@ struct RComplex { #define RCOMPLEX_SET_IMAG(cmp, i) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->imag,(i)) #endif +#define LINEAR_TABLE_MAX_SIZE 8 +#define LINEAR_TABLE_BOUND LINEAR_TABLE_MAX_SIZE + +typedef struct li_table_entry { + VALUE hash; + VALUE key; + VALUE record; +} li_table_entry; + +typedef struct LinearTable { + const struct st_hash_type *type; + st_index_t num_entries; + li_table_entry entries[LINEAR_TABLE_MAX_SIZE]; +} li_table; + struct RHash { struct RBasic basic; struct st_table *ntbl; /* possibly 0 */ + struct LinearTable *ltbl; int iter_lev; const VALUE ifnone; }; @@ -685,7 +701,8 @@ struct RHash { #undef RHASH_SIZE #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) #define RHASH_IFNONE(h) (RHASH(h)->ifnone) -#define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : (st_index_t)0) +#define RHASH_SIZE_NTBL(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : (st_index_t)0) +#define RHASH_SIZE(h) (RHASH(h)->ltbl ? RHASH(h)->ltbl->num_entries : RHASH_SIZE_NTBL(h)) #endif /* missing/setproctitle.c */ @@ -1359,6 +1376,9 @@ VALUE rb_hash_keys(VALUE hash); VALUE rb_hash_values(VALUE hash); VALUE rb_hash_rehash(VALUE hash); int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val); +int linear_foreach(li_table *, int (*)(ANYARGS), st_data_t); +int linear_lookup(li_table *, st_data_t, st_data_t *); +void rb_hash_bulk_insert(long, const VALUE *, VALUE); #define HASH_PROC_DEFAULT FL_USER2 /* inits.c */ diff --git a/st.c b/st.c index 1a47525707a239..0ed2d2b95db41d 100644 --- a/st.c +++ b/st.c @@ -2281,24 +2281,16 @@ st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash) st_rehash(tab); } -/* Mimics ruby's { foo => bar } syntax. This function is placed here - because it touches table internals and write barriers at once. */ +/* Mimics ruby's { foo => bar } syntax. This function is subpart + of rb_hash_bulk_insert. */ void -rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash) { - st_index_t n; + st_index_t n, size = argc / 2; st_table *tab = RHASH(hash)->ntbl; - st_assert(argc % 2 == 0); - if (! argc) - return; - if (! tab) { - VALUE tmp = rb_hash_new_with_size(argc / 2); - RBASIC_CLEAR_CLASS(tmp); - RHASH(hash)->ntbl = tab = RHASH(tmp)->ntbl; - RHASH(tmp)->ntbl = NULL; - } - n = tab->num_entries + argc / 2; + tab = rb_hash_tbl_raw(hash); + n = tab->num_entries + size; st_expand_table(tab, n); if (UNLIKELY(tab->num_entries)) st_insert_generic(tab, argc, argv, hash); diff --git a/test/ruby/test_time.rb b/test/ruby/test_time.rb index 50ac569c4eee83..0aac07b05d6e4a 100644 --- a/test/ruby/test_time.rb +++ b/test/ruby/test_time.rb @@ -1138,6 +1138,7 @@ def test_memsize case size when 20 then expect = 50 when 40 then expect = 86 + when 48 then expect = 94 else flunk "Unsupported RVALUE_SIZE=#{size}, update test_memsize" end diff --git a/thread.c b/thread.c index 3943cf0fc631e2..0395d0fffb68f2 100644 --- a/thread.c +++ b/thread.c @@ -3506,10 +3506,10 @@ rb_thread_variable_p(VALUE thread, VALUE key) locals = rb_ivar_get(thread, id_locals); - if (!RHASH(locals)->ntbl) - return Qfalse; - - if (st_lookup(RHASH(locals)->ntbl, ID2SYM(id), 0)) { + if (RHASH(locals)->ltbl && linear_lookup(RHASH(locals)->ltbl, ID2SYM(id), 0)) { + return Qtrue; + } + else if (RHASH(locals)->ntbl && st_lookup(RHASH(locals)->ntbl, ID2SYM(id), 0)) { return Qtrue; } From def2875c965cfe18918d89c22d7590d8762b0eca Mon Sep 17 00:00:00 2001 From: tacinight Date: Sat, 28 Jul 2018 11:46:08 +0800 Subject: [PATCH 5/6] add transient heap linear table support Signed-off-by: tacinight --- array.c | 3 +- gc.c | 24 ++--- hash.c | 207 ++++++++++++++++++++++++++++++++---------- include/ruby/intern.h | 1 + internal.h | 4 + transient_heap.c | 20 +++- 6 files changed, 196 insertions(+), 63 deletions(-) diff --git a/array.c b/array.c index 486e73bcc58fe1..fd3838ee045b02 100644 --- a/array.c +++ b/array.c @@ -4382,8 +4382,9 @@ ary_recycle_hash(VALUE hash) if (RHASH(hash)->ntbl) { st_table *tbl = RHASH(hash)->ntbl; st_free_table(tbl); + RHASH(hash)->ntbl = NULL; } - rb_gc_force_recycle(hash); + //rb_gc_force_recycle(hash); } /* diff --git a/gc.c b/gc.c index bdf5be6c41c713..3518c42a890880 100644 --- a/gc.c +++ b/gc.c @@ -2251,12 +2251,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_ary_free(obj); break; case T_HASH: - if (RANY(obj)->as.hash.ltbl) { - free(RANY(obj)->as.hash.ltbl); - } - if (RANY(obj)->as.hash.ntbl) { - st_free_table(RANY(obj)->as.hash.ntbl); - } + rb_hash_free(obj); break; case T_REGEXP: if (RANY(obj)->as.regexp.ptr) { @@ -4165,10 +4160,17 @@ mark_hash(rb_objspace_t *objspace, st_table *tbl) } static void -mark_hash_linear(rb_objspace_t *objspace, li_table *tbl) +mark_hash_linear(rb_objspace_t *objspace, VALUE hash) { - if (!tbl) return; - linear_foreach(tbl, mark_keyvalue, (st_data_t)objspace); + if (RHASH(hash)->ltbl) { + linear_foreach(RHASH(hash)->ltbl, mark_keyvalue, (st_data_t)objspace); + if (objspace->mark_func_data == NULL && RHASH_TRANSIENT_P(hash)) { + rb_transient_heap_mark(hash, RHASH(hash)->ltbl); + } + } + else if (RHASH(hash)->ntbl) + mark_hash(objspace, RHASH(hash)->ntbl); + gc_mark(objspace, RHASH(hash)->ifnone); } void @@ -4638,9 +4640,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) break; case T_HASH: - mark_hash_linear(objspace, any->as.hash.ltbl); - mark_hash(objspace, any->as.hash.ntbl); - gc_mark(objspace, any->as.hash.ifnone); + mark_hash_linear(objspace, obj); break; case T_STRING: diff --git a/hash.c b/hash.c index f9f8780aa3f80a..ded8c2402fc639 100644 --- a/hash.c +++ b/hash.c @@ -20,8 +20,8 @@ #include "id.h" #include "symbol.h" #include "gc.h" - -#include +#include "transient_heap.h" +#include "ruby_assert.h" #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H # include @@ -316,6 +316,50 @@ static const struct st_hash_type identhash = { #define RHASH_TYPE(hash) (RHASH(hash)->ltbl ? RHASH(hash)->ltbl->type : RHASH(hash)->ntbl->type) +#ifndef RHASH_DEBUG +#define RHASH_DEBUG 0 +#endif + +#define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(RHASH_DEBUG > 0, expr, #expr) +#if RHASH_DEBUG > 0 +#define hash_varify(hash) hash_varify_(hash, __FILE__, __LINE__) + +static VALUE +hash_varify_(VALUE hash, const char *file, int line) +{ + HASH_ASSERT(RB_TYPE_P(hash, T_HASH)); + if (RHASH(hash)->ltbl) { + li_table *tab = RHASH(hash)->ltbl; + li_table_entry *cur_entry, *entries; + st_data_t h, k, v; + uint8_t i, n = 0; + HASH_ASSERT(tab->type != NULL); + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) { + h = cur_entry->hash; + k = cur_entry->key; + v = cur_entry->record; + HASH_ASSERT(h != 0); + HASH_ASSERT(k != Qundef); + HASH_ASSERT(v != Qundef); + n++; + } + } + HASH_ASSERT(n == tab->num_entries); + } + + if (RHASH_TRANSIENT_P(hash)) { + HASH_ASSERT(RHASH(hash)->ltbl != NULL); + HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH(hash)->ltbl)); + } + return hash; +} +#else +#define hash_varify(h) ((void)0) +#endif + typedef st_data_t st_hash_t; static inline st_hash_t @@ -349,12 +393,18 @@ empty_entry(li_table_entry *entry) } static li_table* -linear_init_table(const struct st_hash_type *type) +linear_init_table(VALUE hash, const struct st_hash_type *type) { li_table *tab; uint8_t i; - tab = (li_table*)malloc(sizeof(li_table)); - if (tab == NULL) rb_bug("linear_init_table: malloc failed"); + tab = (li_table*)rb_transient_heap_alloc(hash, sizeof(li_table)); + if (tab != NULL) { + FL_SET_RAW(hash, RHASH_TRANSIENT_FLAG); + } + else { + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + tab = (li_table*)malloc(sizeof(li_table)); + } tab->type = type; tab->num_entries = 0; for (i = 0; i < LINEAR_TABLE_BOUND; i++) @@ -363,15 +413,15 @@ linear_init_table(const struct st_hash_type *type) } static li_table* -linear_init_identtable(void) +linear_init_identtable(VALUE hash) { - return linear_init_table(&identhash); + return linear_init_table(hash, &identhash); } static li_table* -linear_init_objtable(void) +linear_init_objtable(VALUE hash) { - return linear_init_table(&objhash); + return linear_init_table(hash, &objhash); } static st_index_t @@ -389,6 +439,34 @@ find_entry(li_table *tab, st_hash_t hash_value, st_data_t key) return LINEAR_TABLE_BOUND; } +static inline void +linear_free_table(VALUE hash, li_table *tab) +{ + if (!RHASH_TRANSIENT_P(hash) && tab) + free(tab); +} + +static void +rb_hash_heap_free(VALUE hash) +{ + // fprintf(stderr, "rb_hash_heap_free: %p\n", (void*)hash); + if (RHASH_TRANSIENT_P(hash)) { + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + } else { + linear_free_table(hash, RHASH(hash)->ltbl); + } +} + +void +rb_hash_free(VALUE hash) +{ + if (RHASH(hash)->ltbl) { + rb_hash_heap_free(hash); + } + else if (RHASH(hash)->ntbl) { + st_free_table(RHASH(hash)->ntbl); + } +} static void try_convert_table(VALUE hash) { @@ -405,10 +483,13 @@ try_convert_table(VALUE hash) entries = tab->entries; for (i = 0; i < LINEAR_TABLE_BOUND; i++) { - assert(entries[i].hash != 0); + HASH_ASSERT(entries[i].hash != 0); st_add_direct(new_tab, entries[i].key, entries[i].record); } - free(tab); + HASH_ASSERT(tab == RHASH(hash)->ltbl); + linear_free_table(hash, tab); + /* converting table means to promote the hash, unset the transient flag anyway*/ + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); RHASH(hash)->ltbl = NULL; RHASH(hash)->ntbl = new_tab; return; @@ -434,7 +515,10 @@ force_convert_table(VALUE hash) if (empty_entry(cur_entry)) continue; st_add_direct(new_tab, cur_entry->key, cur_entry->record); } - free(tab); + HASH_ASSERT(tab == RHASH(hash)->ltbl); + linear_free_table(hash, tab); + /* converting table means to promote the hash, unset the transient flag anyway*/ + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); } else if (!RHASH(hash)->ntbl) { new_tab = st_init_table(&objhash); @@ -467,7 +551,7 @@ compact_table(li_table *tab) clear_entry(&entries[non_empty]); } done: - assert(empty < LINEAR_TABLE_BOUND); + HASH_ASSERT(empty < LINEAR_TABLE_BOUND); return empty; } @@ -481,7 +565,7 @@ add_direct_with_hash(li_table *tab, st_data_t key, st_data_t val, st_hash_t hash return 1; bin = compact_table(tab); - assert(bin < LINEAR_TABLE_BOUND); + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); entry = &tab->entries[bin]; set_entry(entry, key, val, hash); tab->num_entries++; @@ -613,7 +697,7 @@ linear_insert(li_table *tab, st_data_t key, st_data_t value) if (tab->num_entries >= LINEAR_TABLE_MAX_SIZE) return -1; bin = compact_table(tab); - assert(bin < LINEAR_TABLE_BOUND); + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); set_entry(&tab->entries[bin], key, value, hash_value); tab->num_entries++; return 0; @@ -633,7 +717,7 @@ linear_lookup(li_table *tab, st_data_t key, st_data_t *value) if (bin == LINEAR_TABLE_BOUND) { return 0; } - assert(bin < LINEAR_TABLE_BOUND); + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); if (value != 0) *value = tab->entries[bin].record; return 1; @@ -721,11 +805,17 @@ linear_values(li_table *tab, st_data_t *values, st_index_t size) } static li_table* -linear_copy(li_table *old_tab) +linear_copy(VALUE hash, li_table *old_tab) { li_table *new_tab; - new_tab = (li_table*) malloc(sizeof(li_table)); - if (new_tab == NULL) rb_bug("linear_copy: malloc failed"); + new_tab = (li_table*) rb_transient_heap_alloc(hash, sizeof(li_table)); + if (new_tab != NULL) { + FL_SET_RAW(hash, RHASH_TRANSIENT_FLAG); + } + else { + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + new_tab = (li_table*) malloc(sizeof(li_table)); + } *new_tab = *old_tab; return new_tab; } @@ -737,12 +827,33 @@ linear_clear(li_table *tab) memset(tab->entries, 0, 8 * sizeof(li_table_entry)); } -static inline void -linear_free_table(li_table *tab) +void +rb_hash_transient_heap_promote(VALUE hash, int promote) { - free(tab); + if (RHASH_TRANSIENT_P(hash)) { + li_table *new_tab; + li_table *old_tab = RHASH(hash)->ltbl; + if (UNLIKELY(RHASH(hash)->ltbl == NULL)) { + rb_gc_force_recycle(hash); + return; + } + HASH_ASSERT(old_tab != NULL); + if (promote) { + new_tab = malloc(sizeof(li_table)); + FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + } + else { + new_tab = rb_transient_heap_alloc(hash, sizeof(li_table)); + } + *new_tab = *old_tab; + HASH_ASSERT(new_tab->type == old_tab->type); + HASH_ASSERT(new_tab->num_entries == old_tab->num_entries); + RHASH(hash)->ltbl = new_tab; + } + hash_varify(hash); } + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -791,14 +902,15 @@ hash_linear_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int err { struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp; int status; - li_table *tbl; + // li_table *tbl; if (error) return ST_STOP; - tbl = RHASH(arg->hash)->ltbl; + /* linear table will move it's position due to escaping from transient heap */ + // tbl = RHASH(arg->hash)->ltbl; status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); - if (RHASH(arg->hash)->ltbl != tbl) { - rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); - } + // if (RHASH(arg->hash)->ltbl != tbl) { + // rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + // } switch (status) { case ST_DELETE: return ST_DELETE; @@ -821,7 +933,7 @@ hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) tbl = RHASH(arg->hash)->ntbl; status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); if (RHASH(arg->hash)->ntbl != tbl) { - rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); } switch (status) { case ST_DELETE: @@ -877,6 +989,7 @@ rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) arg.func = (rb_foreach_func *)func; arg.arg = farg; rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash); + hash_varify(hash); } static VALUE @@ -924,7 +1037,7 @@ rb_hash_new_with_size(st_index_t size) VALUE ret = rb_hash_new(); if (size) { if (size <= LINEAR_TABLE_MAX_SIZE) - RHASH(ret)->ltbl = linear_init_objtable(); + RHASH(ret)->ltbl = linear_init_objtable(ret); else RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); } @@ -938,7 +1051,7 @@ hash_dup(VALUE hash, VALUE klass, VALUE flags) RHASH_IFNONE(hash)); if (!RHASH_EMPTY_P(hash)) { if (RHASH(hash)->ltbl) - RHASH(ret)->ltbl = linear_copy(RHASH(hash)->ltbl); + RHASH(ret)->ltbl = linear_copy(ret, RHASH(hash)->ltbl); else RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); } @@ -966,7 +1079,7 @@ static li_table * hash_ltbl(VALUE hash) { if (!RHASH(hash)->ltbl) { - RHASH(hash)->ltbl = linear_init_objtable(); + RHASH(hash)->ltbl = linear_init_objtable(hash); } return RHASH(hash)->ltbl; } @@ -1180,7 +1293,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) if (!NIL_P(tmp)) { hash = hash_alloc(klass); if (RHASH(tmp)->ltbl) { - RHASH(hash)->ltbl = linear_copy(RHASH(tmp)->ltbl); + RHASH(hash)->ltbl = linear_copy(hash, RHASH(tmp)->ltbl); } else if (RHASH(tmp)->ntbl) { RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); @@ -1231,7 +1344,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) hash = hash_alloc(klass); rb_hash_bulk_insert(argc, argv, hash); - + hash_varify(hash); return hash; } @@ -1321,11 +1434,11 @@ rb_hash_rehash(VALUE hash) rb_hash_modify_check(hash); if (RHASH(hash)->ltbl) { tmp = hash_alloc(0); - ltbl = linear_init_table(RHASH(hash)->ltbl->type); + ltbl = linear_init_table(hash, RHASH(hash)->ltbl->type); RHASH(tmp)->ltbl = ltbl; rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)ltbl); - linear_free_table(RHASH(hash)->ltbl); + linear_free_table(hash, RHASH(hash)->ltbl); // TODO RHASH(hash)->ltbl = ltbl; RHASH(tmp)->ltbl = 0; } @@ -1339,6 +1452,7 @@ rb_hash_rehash(VALUE hash) RHASH(hash)->ntbl = tbl; RHASH(tmp)->ntbl = 0; } + hash_varify(hash); return hash; } @@ -1381,6 +1495,7 @@ rb_hash_aref(VALUE hash, VALUE key) else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { return (VALUE)val; } + hash_varify(hash); return rb_hash_default_value(hash, key); } @@ -1395,6 +1510,7 @@ rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { return (VALUE)val; } + hash_varify(hash); return def; /* without Hash#default */ } @@ -1462,6 +1578,7 @@ rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) desc = rb_str_ellipsize(desc, 65); rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); } + hash_varify(hash); return argv[1]; } @@ -2197,7 +2314,7 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val) rb_hash_modify(hash); if (HASH_HAS_NO_TABLE(hash)) { if (iter_lev > 0) no_new_key(); - RHASH(hash)->ltbl = linear_init_objtable(); + RHASH(hash)->ltbl = linear_init_objtable(hash); } type = RHASH_TYPE(hash); if (type == &identhash || rb_obj_class(key) != rb_cString) { @@ -2234,8 +2351,8 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) ltbl = RHASH(hash)->ltbl; ntbl = RHASH(hash)->ntbl; if (RHASH(hash2)->ltbl) { - if (ltbl) linear_free_table(ltbl); - RHASH(hash)->ltbl = linear_copy(RHASH(hash2)->ltbl); + if (ltbl) linear_free_table(hash, ltbl); + RHASH(hash)->ltbl = linear_copy(hash, RHASH(hash2)->ltbl); if (RHASH(hash)->ltbl->num_entries) rb_hash_rehash(hash); } @@ -3579,10 +3696,10 @@ rb_hash_compare_by_id(VALUE hash) if (!RHASH(hash)->ntbl) { li_table *identtable; - identtable = linear_init_identtable(); + identtable = linear_init_identtable(hash); rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)identtable); if (RHASH(hash)->ltbl) - linear_free_table(RHASH(hash)->ltbl); + linear_free_table(hash, RHASH(hash)->ltbl); RHASH(hash)->ltbl = identtable; } else { @@ -3924,20 +4041,16 @@ rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) st_index_t size; li_table *ltbl = RHASH(hash)->ltbl; - assert(argc % 2 == 0); + HASH_ASSERT(argc % 2 == 0); if (! argc) return; size = argc / 2; if (HASH_HAS_NO_TABLE(hash)) { - VALUE tmp = rb_hash_new_with_size(size); - RBASIC_CLEAR_CLASS(tmp); if (size <= LINEAR_TABLE_MAX_SIZE) { - RHASH(hash)->ltbl = ltbl = RHASH(tmp)->ltbl; - RHASH(tmp)->ltbl = NULL; + hash_ltbl(hash); } else { - RHASH(hash)->ntbl = RHASH(tmp)->ntbl; - RHASH(tmp)->ntbl = NULL; + hash_tbl(hash); } } if (ltbl && (ltbl->num_entries + size <= LINEAR_TABLE_MAX_SIZE)) { diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 9ecd8ce8e21347..78362890e5dc1f 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -517,6 +517,7 @@ int rb_path_check(const char*); int rb_env_path_tainted(void); VALUE rb_env_clear(void); VALUE rb_hash_size(VALUE); +void rb_hash_free(VALUE); /* io.c */ #define rb_defout rb_stdout RUBY_EXTERN VALUE rb_fs; diff --git a/internal.h b/internal.h index 179bd21d265fdc..5698760357aa9f 100644 --- a/internal.h +++ b/internal.h @@ -1355,6 +1355,9 @@ RUBY_SYMBOL_EXPORT_END #define NEWOBJ_OF(obj,type,klass,flags) RB_NEWOBJ_OF(obj,type,klass,flags) /* hash.c */ +#define RHASH_TRANSIENT_FLAG FL_USER14 +#define RHASH_TRANSIENT_P(hash) FL_TEST_RAW((hash), RHASH_TRANSIENT_FLAG) + struct st_table *rb_hash_tbl_raw(VALUE hash); VALUE rb_hash_new_with_size(st_index_t size); RUBY_SYMBOL_EXPORT_BEGIN @@ -1375,6 +1378,7 @@ VALUE rb_hash_key_str(VALUE); VALUE rb_hash_keys(VALUE hash); VALUE rb_hash_values(VALUE hash); VALUE rb_hash_rehash(VALUE hash); +void rb_hash_free(VALUE hash); int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val); int linear_foreach(li_table *, int (*)(ANYARGS), st_data_t); int linear_lookup(li_table *, st_data_t, st_data_t *); diff --git a/transient_heap.c b/transient_heap.c index 6ce9cd670e5d6f..dcd365dd056352 100644 --- a/transient_heap.c +++ b/transient_heap.c @@ -14,7 +14,7 @@ * 2: enable verify */ #ifndef TRANSIENT_HEAP_CHECK_MODE -#define TRANSIENT_HEAP_CHECK_MODE 0 +#define TRANSIENT_HEAP_CHECK_MODE 1 #endif #define TH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(TRANSIENT_HEAP_CHECK_MODE > 0, expr, #expr) @@ -344,7 +344,7 @@ rb_transient_heap_alloc(VALUE obj, size_t req_size) struct transient_heap* theap = transient_heap_get(); size_t size = ROUND_UP(req_size + sizeof(struct transient_alloc_header), TRANSIENT_HEAP_ALLOC_ALIGN); - TH_ASSERT(RB_TYPE_P(obj, T_ARRAY)); /* supported types */ + TH_ASSERT(RB_TYPE_P(obj, T_ARRAY) || RB_TYPE_P(obj, T_HASH)); /* supported types */ if (size > TRANSIENT_HEAP_ALLOC_MAX) { if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [too big: %ld] %s\n", (long)size, rb_obj_info(obj)); @@ -495,7 +495,7 @@ void rb_transient_heap_mark(VALUE obj, const void *ptr) { struct transient_alloc_header *header = ptr_to_alloc_header(ptr); - + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header, %s (%p)", rb_obj_info(obj), ptr); if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_mark: %s (%p)\n", rb_obj_info(obj), ptr); #if TRANSIENT_HEAP_CHECK_MODE > 0 @@ -545,6 +545,14 @@ transient_heap_ptr(VALUE obj, int error) ptr = NULL; } break; + case T_HASH: + if (RHASH_TRANSIENT_P(obj)) { + ptr = (VALUE *)(RHASH(obj)->ltbl); + } + else { + ptr = NULL; + } + break; default: if (error) { rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj)); @@ -599,6 +607,7 @@ alloc_header(struct transient_heap_block* block, int index) } void rb_ary_transient_heap_promote(VALUE ary, int promote); +void rb_hash_transient_heap_promote(VALUE hash, int promote); static void transient_heap_reset(void) @@ -643,6 +652,8 @@ transient_heap_block_escape(struct transient_heap* theap, struct transient_heap_ while (marked_index >= 0) { struct transient_alloc_header *header = alloc_header(block, marked_index); VALUE obj = header->obj; + TH_ASSERT(header->magic == TRANSIENT_HEAP_ALLOC_MAGIC); + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header %s\n", rb_obj_info(obj)); if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, " * transient_heap_block_escape %p %s\n", header, rb_obj_info(obj)); @@ -655,6 +666,9 @@ transient_heap_block_escape(struct transient_heap* theap, struct transient_heap_ rb_ary_transient_heap_promote(obj, TRUE); #endif break; + case T_HASH: + rb_hash_transient_heap_promote(obj, TRUE); + break; default: rb_bug("unsupporeted"); } From 3cd0a6b5ab3021b6583662b612af2df6af0abad2 Mon Sep 17 00:00:00 2001 From: tacinight Date: Fri, 3 Aug 2018 08:47:44 +0800 Subject: [PATCH 6/6] integrate data to hash flag Signed-off-by: tacinight --- array.c | 6 +- compile.c | 2 +- gc.c | 16 +- hash.c | 732 ++++++++++++++++++++++++----------------------- internal.h | 44 ++- st.c | 2 +- thread.c | 4 +- transient_heap.c | 8 +- vm_eval.c | 2 +- 9 files changed, 423 insertions(+), 393 deletions(-) diff --git a/array.c b/array.c index fd3838ee045b02..e34d3070a3a1d6 100644 --- a/array.c +++ b/array.c @@ -4379,10 +4379,10 @@ static inline void ary_recycle_hash(VALUE hash) { assert(RBASIC_CLASS(hash) == 0); - if (RHASH(hash)->ntbl) { - st_table *tbl = RHASH(hash)->ntbl; + if (RHASH_TABLE_P(hash)) { + st_table *tbl = RHASH(hash)->as.ntbl; st_free_table(tbl); - RHASH(hash)->ntbl = NULL; + RHASH(hash)->as.ntbl = NULL; } //rb_gc_force_recycle(hash); } diff --git a/compile.c b/compile.c index 5a458200ec78eb..1430099a40e392 100644 --- a/compile.c +++ b/compile.c @@ -9363,7 +9363,7 @@ ibf_dump_object_hash(struct ibf_dump *dump, VALUE obj) { long len = RHASH_SIZE(obj); (void)IBF_W(&len, long, 1); - if (len > 0) st_foreach(RHASH(obj)->ntbl, ibf_dump_object_hash_i, (st_data_t)dump); + if (len > 0) st_foreach(RHASH(obj)->as.ntbl, ibf_dump_object_hash_i, (st_data_t)dump); } static VALUE diff --git a/gc.c b/gc.c index 3518c42a890880..1c5d05a0f4b668 100644 --- a/gc.c +++ b/gc.c @@ -3262,11 +3262,11 @@ obj_memsize_of(VALUE obj, int use_all_types) size += rb_ary_memsize(obj); break; case T_HASH: - if (RHASH(obj)->ltbl) { + if (RHASH_ARRAY_P(obj)) { size += sizeof(li_table); } - if (RHASH(obj)->ntbl) { - size += st_memsize(RHASH(obj)->ntbl); + else if (RHASH(obj)->as.ntbl) { + size += st_memsize(RHASH(obj)->as.ntbl); } break; case T_REGEXP: @@ -4162,14 +4162,14 @@ mark_hash(rb_objspace_t *objspace, st_table *tbl) static void mark_hash_linear(rb_objspace_t *objspace, VALUE hash) { - if (RHASH(hash)->ltbl) { - linear_foreach(RHASH(hash)->ltbl, mark_keyvalue, (st_data_t)objspace); + if (RHASH_ARRAY_P(hash)) { + linear_foreach(hash, mark_keyvalue, (st_data_t)objspace); if (objspace->mark_func_data == NULL && RHASH_TRANSIENT_P(hash)) { - rb_transient_heap_mark(hash, RHASH(hash)->ltbl); + rb_transient_heap_mark(hash, RHASH(hash)->as.ltbl); } } - else if (RHASH(hash)->ntbl) - mark_hash(objspace, RHASH(hash)->ntbl); + else if (RHASH_TABLE_P(hash)) + st_foreach(RHASH(hash)->as.ntbl, mark_keyvalue, (st_data_t)objspace); gc_mark(objspace, RHASH(hash)->ifnone); } diff --git a/hash.c b/hash.c index ded8c2402fc639..72f25f06eafe90 100644 --- a/hash.c +++ b/hash.c @@ -307,29 +307,58 @@ static const struct st_hash_type identhash = { #define RESERVED_HASH_VAL ((st_hash_t) 0) #define RESERVED_HASH_SUBSTITUTION_VAL (~(st_hash_t) 0) -#define HASH_HAS_NO_TABLE(hash) !(RHASH(hash)->ltbl) && !(RHASH(hash)->ntbl) -#define HASH_HAS_TABLE(hash) RHASH(hash)->ltbl || RHASH(hash)->ntbl +#define RHASH_TABLE_EMPTY(hash) !(RHASH(hash)->as.ltbl) +#define RHASH_TABLE_NONEMPTY(hash) RHASH(hash)->as.ltbl #define SET_KEY(entry, _key) (entry)->key = (_key) #define SET_HASH(entry, _hash) (entry)->hash = (_hash) #define SET_RECORD(entry, _value) (entry)->record = (_value) -#define RHASH_TYPE(hash) (RHASH(hash)->ltbl ? RHASH(hash)->ltbl->type : RHASH(hash)->ntbl->type) +#define RHASH_TYPE(hash) (RHASH_ARRAY_P(hash) ? RHASH(hash)->as.ltbl->type : RHASH(hash)->as.ntbl->type) -#ifndef RHASH_DEBUG -#define RHASH_DEBUG 0 -#endif +typedef st_data_t st_hash_t; + +static inline st_hash_t +do_hash(st_data_t key, li_table *tab) +{ + return (st_hash_t)(tab->type->hash)(key); + // st_hash_t hash = (st_hash_t)(tab->type->hash)(key); + // return hash == ((st_hash_t) 0) ? (~(st_hash_t) 0) : hash; +} + +static inline void +set_entry(li_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash) +{ + SET_HASH(entry, hash); + SET_KEY(entry, key); + SET_RECORD(entry, val); +} -#define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(RHASH_DEBUG > 0, expr, #expr) -#if RHASH_DEBUG > 0 +static inline void +clear_entry(li_table_entry* entry) +{ + SET_KEY(entry, Qundef); + SET_RECORD(entry, Qundef); + SET_HASH(entry, 0); +} + +static inline int +empty_entry(li_table_entry *entry) +{ + return entry->hash == 0; +} +//#define RHASH_DEBUG + +#ifdef RHASH_DEBUG #define hash_varify(hash) hash_varify_(hash, __FILE__, __LINE__) +#define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(1, expr, #expr) static VALUE hash_varify_(VALUE hash, const char *file, int line) { HASH_ASSERT(RB_TYPE_P(hash, T_HASH)); - if (RHASH(hash)->ltbl) { - li_table *tab = RHASH(hash)->ltbl; + if (RHASH_ARRAY_P(hash)) { + li_table *tab = RHASH(hash)->as.ltbl; li_table_entry *cur_entry, *entries; st_data_t h, k, v; uint8_t i, n = 0; @@ -347,50 +376,64 @@ hash_varify_(VALUE hash, const char *file, int line) n++; } } - HASH_ASSERT(n == tab->num_entries); + HASH_ASSERT(n == RHASH_ARRAY_LEN(hash)); } - + if (RHASH_TRANSIENT_P(hash)) { - HASH_ASSERT(RHASH(hash)->ltbl != NULL); - HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH(hash)->ltbl)); + HASH_ASSERT(RHASH(hash)->as.ltbl != NULL); + HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH(hash)->as.ltbl)); } return hash; } #else #define hash_varify(h) ((void)0) +#define HASH_ASSERT(e) ((void)0) #endif -typedef st_data_t st_hash_t; +#define RHASH_SET_ARRAY_FLAG(h) FL_SET_RAW(h, RHASH_ARRAY_FLAG) +#define RHASH_UNSET_ARRAY_FLAG(h) FL_UNSET_RAW(h, RHASH_ARRAY_FLAG) +#define RHASH_SET_TRANSIENT_FLAG(h) FL_SET_RAW(h, RHASH_TRANSIENT_FLAG) +#define RHASH_UNSET_TRANSIENT_FLAG(h) FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG) -static inline st_hash_t -do_hash(st_data_t key, li_table *tab) -{ - st_hash_t hash = (st_hash_t)(tab->type->hash)(key); +#define RHASH_SET_ARRAY_LEN(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_LEN_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_LEN_SHIFT; \ +} while (0) - return hash == ((st_hash_t) 0) ? (~(st_hash_t) 0) : hash; -} +#define RHASH_ARRAY_BOUND(h) \ + (HASH_ASSERT(RHASH_ARRAY_P(h)), \ + (long)((RBASIC(h)->flags >> RHASH_ARRAY_BOUND_SHIFT) & \ + (RHASH_ARRAY_BOUND_MASK >> RHASH_ARRAY_BOUND_SHIFT))) -static inline void -set_entry(li_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash) -{ - SET_HASH(entry, hash); - SET_KEY(entry, key); - SET_RECORD(entry, val); -} +#define RHASH_SET_ARRAY_BOUND(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_BOUND_SHIFT; \ +} while (0) -static inline void -clear_entry(li_table_entry* entry) -{ - SET_KEY(entry, Qundef); - SET_RECORD(entry, Qundef); - SET_HASH(entry, 0); -} +#define HASH_ARRAY_INCREASE_LEN(h, n) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)+(n)); \ +} while (0) + +#define RHASH_ARRAY_LEN_ADD_ONE(h) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)+(1)); \ +} while (0) + +#define RHASH_ARRAY_LEN_MINUS_ONE(h) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)-(1)); \ +} while (0) + +#define RHASH_CLEAR_BITS(h) do { \ + RBASIC(h)->flags &= ~RHASH_ARRAY_LEN_MASK; \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ +} while (0) -static inline int -empty_entry(li_table_entry *entry) -{ - return entry->hash == 0; -} static li_table* linear_init_table(VALUE hash, const struct st_hash_type *type) @@ -399,16 +442,19 @@ linear_init_table(VALUE hash, const struct st_hash_type *type) uint8_t i; tab = (li_table*)rb_transient_heap_alloc(hash, sizeof(li_table)); if (tab != NULL) { - FL_SET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_SET_TRANSIENT_FLAG(hash); } else { - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_UNSET_TRANSIENT_FLAG(hash); tab = (li_table*)malloc(sizeof(li_table)); } tab->type = type; - tab->num_entries = 0; + RHASH_SET_ARRAY_FLAG(hash); + RHASH_SET_ARRAY_LEN(hash, 0); + RHASH_SET_ARRAY_BOUND(hash, 0); for (i = 0; i < LINEAR_TABLE_BOUND; i++) clear_entry(tab->entries + i); + RHASH(hash)->as.ltbl = tab; return tab; } @@ -425,14 +471,14 @@ linear_init_objtable(VALUE hash) } static st_index_t -find_entry(li_table *tab, st_hash_t hash_value, st_data_t key) +find_entry(VALUE hash, st_hash_t hash_value, st_data_t key) { - uint8_t i; - li_table_entry *entries; + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *entries = tab->entries; - if (tab->num_entries == 0) return LINEAR_TABLE_BOUND; - entries = tab->entries; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + if (RHASH_ARRAY_LEN(hash) == 0) return LINEAR_TABLE_BOUND; + for (i = 0; i < bound; i++) { if (PTR_EQUAL(tab, &entries[i], hash_value, key)) return i; } @@ -440,10 +486,16 @@ find_entry(li_table *tab, st_hash_t hash_value, st_data_t key) } static inline void -linear_free_table(VALUE hash, li_table *tab) +linear_free_table(VALUE hash) { - if (!RHASH_TRANSIENT_P(hash) && tab) - free(tab); + if (RHASH_ARRAY_P(hash)) { + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH_CLEAR_BITS(hash); + if (!RHASH_TRANSIENT_P(hash)) { + free(RHASH(hash)->as.ltbl); + } + RHASH(hash)->as.ltbl = NULL; + } } static void @@ -451,51 +503,54 @@ rb_hash_heap_free(VALUE hash) { // fprintf(stderr, "rb_hash_heap_free: %p\n", (void*)hash); if (RHASH_TRANSIENT_P(hash)) { - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_UNSET_TRANSIENT_FLAG(hash); } else { - linear_free_table(hash, RHASH(hash)->ltbl); + linear_free_table(hash); } } void rb_hash_free(VALUE hash) { - if (RHASH(hash)->ltbl) { + if (RHASH_ARRAY_P(hash)) { rb_hash_heap_free(hash); } - else if (RHASH(hash)->ntbl) { - st_free_table(RHASH(hash)->ntbl); + else if (RHASH_TABLE_P(hash)) { + st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = NULL; } } + static void try_convert_table(VALUE hash) { st_table *new_tab; - li_table *tab; + li_table *tab = RHASH(hash)->as.ltbl; li_table_entry *entries; + uint8_t size = RHASH_ARRAY_LEN(hash); st_index_t i; - tab = RHASH(hash)->ltbl; - if (!tab || tab->num_entries < LINEAR_TABLE_MAX_SIZE) + if (!RHASH_ARRAY_P(hash) || size < LINEAR_TABLE_MAX_SIZE) return; - new_tab = st_init_table_with_size(tab->type, tab->num_entries * 2); + new_tab = st_init_table_with_size(tab->type, size * 2); entries = tab->entries; for (i = 0; i < LINEAR_TABLE_BOUND; i++) { HASH_ASSERT(entries[i].hash != 0); st_add_direct(new_tab, entries[i].key, entries[i].record); } - HASH_ASSERT(tab == RHASH(hash)->ltbl); - linear_free_table(hash, tab); + HASH_ASSERT(tab == RHASH(hash)->as.ltbl); + linear_free_table(hash); /* converting table means to promote the hash, unset the transient flag anyway*/ - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); - RHASH(hash)->ltbl = NULL; - RHASH(hash)->ntbl = new_tab; + RHASH_UNSET_TRANSIENT_FLAG(hash); + RHASH_CLEAR_BITS(hash); + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH(hash)->as.ntbl = new_tab; return; } -static void +static st_table * force_convert_table(VALUE hash) { st_table *new_tab; @@ -503,38 +558,38 @@ force_convert_table(VALUE hash) li_table_entry *cur_entry, *entries; uint8_t i; - if (RHASH(hash)->ntbl) - return; + if (RHASH_TABLE_P(hash)) + return RHASH(hash)->as.ntbl; - tab = RHASH(hash)->ltbl; + tab = RHASH(hash)->as.ltbl; if (tab) { - new_tab = st_init_table_with_size(tab->type, tab->num_entries); + new_tab = st_init_table_with_size(tab->type, RHASH_ARRAY_LEN(hash)); entries = tab->entries; for (i = 0; i < LINEAR_TABLE_BOUND; i++) { cur_entry = &entries[i]; if (empty_entry(cur_entry)) continue; st_add_direct(new_tab, cur_entry->key, cur_entry->record); } - HASH_ASSERT(tab == RHASH(hash)->ltbl); - linear_free_table(hash, tab); + HASH_ASSERT(tab == RHASH(hash)->as.ltbl); + linear_free_table(hash); /* converting table means to promote the hash, unset the transient flag anyway*/ - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_UNSET_TRANSIENT_FLAG(hash); } - else if (!RHASH(hash)->ntbl) { + else if (!RHASH(hash)->as.ntbl) { new_tab = st_init_table(&objhash); } - RHASH(hash)->ltbl = NULL; - RHASH(hash)->ntbl = new_tab; - return; + RHASH_CLEAR_BITS(hash); + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH(hash)->as.ntbl = new_tab; + return RHASH(hash)->as.ntbl; } static int compact_table(li_table *tab) { - li_table_entry *entries; + li_table_entry *entries = tab->entries; uint8_t empty = 0, non_empty = 1; - entries = tab->entries; for (; non_empty < LINEAR_TABLE_BOUND; empty++, non_empty++) { while (!empty_entry(&entries[empty])) { empty++; @@ -556,31 +611,32 @@ compact_table(li_table *tab) } static int -add_direct_with_hash(li_table *tab, st_data_t key, st_data_t val, st_hash_t hash) +add_direct_with_hash(VALUE hash, st_data_t key, st_data_t val, st_hash_t hash_value) { - uint8_t bin; + uint8_t bin = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; li_table_entry *entry; - if (tab->num_entries >= LINEAR_TABLE_MAX_SIZE) + if (RHASH_ARRAY_LEN(hash) >= LINEAR_TABLE_MAX_SIZE) return 1; - - bin = compact_table(tab); + if (UNLIKELY(bin >= LINEAR_TABLE_BOUND)) + bin = compact_table(tab); HASH_ASSERT(bin < LINEAR_TABLE_BOUND); - entry = &tab->entries[bin]; - set_entry(entry, key, val, hash); - tab->num_entries++; + entry = &tab->entries[bin++]; + set_entry(entry, key, val, hash_value); + RHASH_SET_ARRAY_BOUND(hash, bin); + RHASH_ARRAY_LEN_ADD_ONE(hash); return 0; } int -linear_foreach(li_table *tab, int (*func)(ANYARGS), st_data_t arg) +linear_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg) { - uint8_t i; - li_table_entry *entries, *cur_entry; + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; enum st_retval retval; - entries = tab->entries; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + for (i = 0; i < bound; i++) { cur_entry = &entries[i]; if (empty_entry(cur_entry)) continue; @@ -593,7 +649,7 @@ linear_foreach(li_table *tab, int (*func)(ANYARGS), st_data_t arg) return 0; case ST_DELETE: clear_entry(cur_entry); - tab->num_entries--; + RHASH_ARRAY_LEN_MINUS_ONE(hash); break; } } @@ -601,29 +657,30 @@ linear_foreach(li_table *tab, int (*func)(ANYARGS), st_data_t arg) } static int -linear_foreach_check(li_table *tab, int (*func)(ANYARGS), st_data_t arg, +linear_foreach_check(VALUE hash, int (*func)(ANYARGS), st_data_t arg, st_data_t never) { - uint8_t i, ret = 0; - li_table_entry *entries, *cur_entry; + uint8_t i, ret = 0, bound = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *cur_entry, *entries; enum st_retval retval; st_data_t key; - st_hash_t hash; + st_hash_t hash_value; - if (tab->num_entries == 0) return 0; + if (RHASH_ARRAY_LEN(hash) == 0) return 0; entries = tab->entries; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + for (i = 0; i < bound; i++) { cur_entry = &entries[i]; if (empty_entry(cur_entry)) continue; key = cur_entry->key; - hash = cur_entry->hash; + hash_value = cur_entry->hash; retval = (*func)(key, cur_entry->record, arg, 0); switch (retval) { case ST_CHECK: { if (entries[i].key == never && entries[i].hash == 0) break; - ret = find_entry(tab, hash, key); + ret = find_entry(hash, hash_value, key); if (ret == LINEAR_TABLE_BOUND) { retval = (*func)(0, 0, arg, 1); return 2; @@ -635,7 +692,7 @@ linear_foreach_check(li_table *tab, int (*func)(ANYARGS), st_data_t arg, return 0; case ST_DELETE: { clear_entry(cur_entry); - tab->num_entries--; + RHASH_ARRAY_LEN_MINUS_ONE(hash); break; } } @@ -644,16 +701,17 @@ linear_foreach_check(li_table *tab, int (*func)(ANYARGS), st_data_t arg, } static int -linear_update(li_table *tab, st_data_t key, +linear_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg) { + li_table *tab = RHASH(hash)->as.ltbl; li_table_entry *entry; int retval, existing; uint8_t bin; st_data_t value = 0, old_key; - st_hash_t hash = do_hash(key, tab); + st_hash_t hash_value = do_hash(key, tab); - bin = find_entry(tab, hash, key); + bin = find_entry(hash, hash_value, key); existing = bin != LINEAR_TABLE_BOUND; entry = &tab->entries[bin]; if (existing) { @@ -666,7 +724,7 @@ linear_update(li_table *tab, st_data_t key, switch (retval) { case ST_CONTINUE: if (!existing) { - if (add_direct_with_hash(tab, key, value, hash)) + if (add_direct_with_hash(hash, key, value, hash_value)) return -1; break; } @@ -678,7 +736,7 @@ linear_update(li_table *tab, st_data_t key, case ST_DELETE: if (existing) { clear_entry(entry); - tab->num_entries--; + RHASH_ARRAY_LEN_MINUS_ONE(hash); } break; } @@ -686,20 +744,22 @@ linear_update(li_table *tab, st_data_t key, } static int -linear_insert(li_table *tab, st_data_t key, st_data_t value) +linear_insert(VALUE hash, st_data_t key, st_data_t value) { - st_index_t bin; - st_hash_t hash_value; + st_index_t bin = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + st_hash_t hash_value = do_hash(key, tab); - hash_value = do_hash(key, tab); - bin = find_entry(tab, hash_value, key); + bin = find_entry(hash, hash_value, key); if (bin == LINEAR_TABLE_BOUND) { - if (tab->num_entries >= LINEAR_TABLE_MAX_SIZE) + if (RHASH_ARRAY_LEN(hash) >= LINEAR_TABLE_MAX_SIZE) return -1; - bin = compact_table(tab); + if (bin >= LINEAR_TABLE_BOUND) + bin = compact_table(tab); HASH_ASSERT(bin < LINEAR_TABLE_BOUND); - set_entry(&tab->entries[bin], key, value, hash_value); - tab->num_entries++; + set_entry(&tab->entries[bin++], key, value, hash_value); + RHASH_SET_ARRAY_BOUND(hash, bin); + RHASH_ARRAY_LEN_ADD_ONE(hash); return 0; } tab->entries[bin].record = value; @@ -707,13 +767,14 @@ linear_insert(li_table *tab, st_data_t key, st_data_t value) } int -linear_lookup(li_table *tab, st_data_t key, st_data_t *value) +linear_lookup(VALUE hash, st_data_t key, st_data_t *value) { st_index_t bin; st_hash_t hash_value; + li_table *tab = RHASH(hash)->as.ltbl; hash_value = do_hash(key, tab); - bin = find_entry(tab, hash_value, key); + bin = find_entry(hash, hash_value, key); if (bin == LINEAR_TABLE_BOUND) { return 0; } @@ -724,14 +785,14 @@ linear_lookup(li_table *tab, st_data_t key, st_data_t *value) } static int -linear_delete(li_table *tab, st_data_t *key, st_data_t *value) +linear_delete(VALUE hash, st_data_t *key, st_data_t *value) { st_index_t bin; - st_hash_t hash_value; + li_table *tab = RHASH(hash)->as.ltbl; + st_hash_t hash_value = do_hash(*key, tab); li_table_entry *entry; - hash_value = do_hash(*key, tab); - bin = find_entry(tab, hash_value, *key); + bin = find_entry(hash, hash_value, *key); if (bin == LINEAR_TABLE_BOUND) { if (value != 0) *value = 0; return 0; @@ -739,24 +800,23 @@ linear_delete(li_table *tab, st_data_t *key, st_data_t *value) entry = &tab->entries[bin]; if (value != 0) *value = entry->record; clear_entry(entry); - tab->num_entries--; + RHASH_ARRAY_LEN_MINUS_ONE(hash); return 1; } static int -linear_shift(li_table *tab, st_data_t *key, st_data_t *value) +linear_shift(VALUE hash, st_data_t *key, st_data_t *value) { - uint8_t i; - li_table_entry *entry, *entries; + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table_entry *entry, *entries = RHASH(hash)->as.ltbl->entries; - entries = tab->entries; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + for (i = 0; i < bound; i++) { entry = &entries[i]; if (!empty_entry(entry)) { if (value != 0) *value = entry->record; *key = entry->key; clear_entry(entry); - tab->num_entries--; + RHASH_ARRAY_LEN_MINUS_ONE(hash); return 1; } } @@ -765,15 +825,15 @@ linear_shift(li_table *tab, st_data_t *key, st_data_t *value) } static int -linear_keys(li_table *tab, st_data_t *keys, st_index_t size) +linear_keys(VALUE hash, st_data_t *keys, st_index_t size) { - uint8_t i; + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); st_data_t *keys_start, *keys_end; - li_table_entry *cur_entry, *entries = tab->entries; + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; keys_start = keys; keys_end = keys + size; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + for (i = 0; i < bound; i++) { if (keys == keys_end) break; cur_entry = &entries[i]; @@ -785,15 +845,15 @@ linear_keys(li_table *tab, st_data_t *keys, st_index_t size) } static int -linear_values(li_table *tab, st_data_t *values, st_index_t size) +linear_values(VALUE hash, st_data_t *values, st_index_t size) { - uint8_t i; + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); st_data_t *values_start, *values_end; - li_table_entry *cur_entry, *entries = tab->entries; + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; values_start = values; values_end = values + size; - for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + for (i = 0; i < bound; i++) { if (values == values_end) break; cur_entry = &entries[i]; @@ -805,26 +865,34 @@ linear_values(li_table *tab, st_data_t *values, st_index_t size) } static li_table* -linear_copy(VALUE hash, li_table *old_tab) +linear_copy(VALUE hash1, VALUE hash2) { li_table *new_tab; - new_tab = (li_table*) rb_transient_heap_alloc(hash, sizeof(li_table)); + li_table *old_tab = RHASH(hash2)->as.ltbl; + new_tab = (li_table*) rb_transient_heap_alloc(hash1, sizeof(li_table)); if (new_tab != NULL) { - FL_SET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_SET_TRANSIENT_FLAG(hash1); } else { - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_UNSET_TRANSIENT_FLAG(hash1); new_tab = (li_table*) malloc(sizeof(li_table)); } *new_tab = *old_tab; + HASH_ASSERT(new_tab->type == old_tab->type); + RHASH_SET_ARRAY_FLAG(hash1); + RHASH_SET_ARRAY_BOUND(hash1, RHASH_ARRAY_BOUND(hash2)); + RHASH_SET_ARRAY_LEN(hash1, RHASH_ARRAY_LEN(hash2)); + RHASH(hash1)->as.ltbl = new_tab; return new_tab; } static void -linear_clear(li_table *tab) +linear_clear(VALUE hash) { - tab->num_entries = 0; - memset(tab->entries, 0, 8 * sizeof(li_table_entry)); + li_table *tab = RHASH(hash)->as.ltbl; + RHASH_SET_ARRAY_LEN(hash, 0); + RHASH_SET_ARRAY_BOUND(hash, 0); + memset(tab->entries, 0, LINEAR_TABLE_MAX_SIZE * sizeof(li_table_entry)); } void @@ -832,23 +900,22 @@ rb_hash_transient_heap_promote(VALUE hash, int promote) { if (RHASH_TRANSIENT_P(hash)) { li_table *new_tab; - li_table *old_tab = RHASH(hash)->ltbl; - if (UNLIKELY(RHASH(hash)->ltbl == NULL)) { + li_table *old_tab = RHASH(hash)->as.ltbl; + if (UNLIKELY(old_tab == NULL)) { rb_gc_force_recycle(hash); return; } HASH_ASSERT(old_tab != NULL); if (promote) { new_tab = malloc(sizeof(li_table)); - FL_UNSET_RAW(hash, RHASH_TRANSIENT_FLAG); + RHASH_UNSET_TRANSIENT_FLAG(hash); } else { new_tab = rb_transient_heap_alloc(hash, sizeof(li_table)); } *new_tab = *old_tab; HASH_ASSERT(new_tab->type == old_tab->type); - HASH_ASSERT(new_tab->num_entries == old_tab->num_entries); - RHASH(hash)->ltbl = new_tab; + RHASH(hash)->as.ltbl = new_tab; } hash_varify(hash); } @@ -930,9 +997,9 @@ hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) st_table *tbl; if (error) return ST_STOP; - tbl = RHASH(arg->hash)->ntbl; + tbl = RHASH(arg->hash)->as.ntbl; status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); - if (RHASH(arg->hash)->ntbl != tbl) { + if (RHASH(arg->hash)->as.ntbl != tbl) { rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); } switch (status) { @@ -965,11 +1032,11 @@ hash_foreach_call(VALUE arg) { VALUE hash = ((struct hash_foreach_arg *)arg)->hash; int ret = 0; - if (RHASH(hash)->ltbl) - ret = linear_foreach_check(RHASH(hash)->ltbl, hash_linear_foreach_iter, + if (RHASH_ARRAY_P(hash)) + ret = linear_foreach_check(hash, hash_linear_foreach_iter, (st_data_t)arg, (st_data_t)Qundef); - else if (RHASH(hash)->ntbl) - ret = st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, + else if (RHASH_TABLE_P(hash)) + ret = st_foreach_check(RHASH(hash)->as.ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef); if (ret) { rb_raise(rb_eRuntimeError, "ret: %d, hash modified during iteration", ret); @@ -982,7 +1049,7 @@ rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) { struct hash_foreach_arg arg; - if (!RHASH(hash)->ntbl && !RHASH(hash)->ltbl) + if (RHASH_TABLE_EMPTY(hash)) return; RHASH_ITER_LEV(hash)++; arg.hash = hash; @@ -1027,7 +1094,7 @@ VALUE rb_hash_new_compare_by_id(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = rb_init_identtable(); + RHASH(hash)->as.ntbl = rb_init_identtable(); return hash; } @@ -1037,9 +1104,9 @@ rb_hash_new_with_size(st_index_t size) VALUE ret = rb_hash_new(); if (size) { if (size <= LINEAR_TABLE_MAX_SIZE) - RHASH(ret)->ltbl = linear_init_objtable(ret); + RHASH(ret)->as.ltbl = linear_init_objtable(ret); else - RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); + RHASH(ret)->as.ntbl = st_init_table_with_size(&objhash, size); } return ret; } @@ -1050,10 +1117,10 @@ hash_dup(VALUE hash, VALUE klass, VALUE flags) VALUE ret = hash_alloc_flags(klass, flags, RHASH_IFNONE(hash)); if (!RHASH_EMPTY_P(hash)) { - if (RHASH(hash)->ltbl) - RHASH(ret)->ltbl = linear_copy(ret, RHASH(hash)->ltbl); - else - RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); + if (RHASH_ARRAY_P(hash)) + linear_copy(ret, hash); + else if (RHASH_TABLE_P(hash)) + RHASH(ret)->as.ntbl = st_copy(RHASH(hash)->as.ntbl); } return ret; } @@ -1078,41 +1145,39 @@ rb_hash_modify_check(VALUE hash) static li_table * hash_ltbl(VALUE hash) { - if (!RHASH(hash)->ltbl) { - RHASH(hash)->ltbl = linear_init_objtable(hash); + if (RHASH_TABLE_EMPTY(hash)) { + linear_init_objtable(hash); } - return RHASH(hash)->ltbl; + return RHASH(hash)->as.ltbl; } static struct st_table * hash_tbl(VALUE hash) { - if (!RHASH(hash)->ntbl) { - RHASH(hash)->ntbl = st_init_table(&objhash); + if (RHASH_TABLE_EMPTY(hash)) { + RHASH(hash)->as.ntbl = st_init_table(&objhash); } - return RHASH(hash)->ntbl; + return RHASH(hash)->as.ntbl; } struct st_table * rb_hash_tbl(VALUE hash) { OBJ_WB_UNPROTECT(hash); - force_convert_table(hash); - return hash_tbl(hash); + return force_convert_table(hash); } MJIT_FUNC_EXPORTED struct st_table * rb_hash_tbl_raw(VALUE hash) { - force_convert_table(hash); - return hash_tbl(hash); + return force_convert_table(hash); } static void rb_hash_modify(VALUE hash) { rb_hash_modify_check(hash); - if (HASH_HAS_NO_TABLE(hash)) + if (RHASH_TABLE_EMPTY(hash)) hash_ltbl(hash); } @@ -1166,15 +1231,15 @@ tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg) arg.new_value = 0; arg.old_value = Qundef; - if (RHASH(hash)->ltbl) { - result = linear_update(RHASH(hash)->ltbl, (st_data_t)key, func, (st_data_t)&arg); + if (RHASH_ARRAY_P(hash)) { + result = linear_update(hash, (st_data_t)key, func, (st_data_t)&arg); if (result == -1) { try_convert_table(hash); - result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + result = st_update(RHASH(hash)->as.ntbl, (st_data_t)key, func, (st_data_t)&arg); } } - else - result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + else if (RHASH_TABLE_P(hash)) + result = st_update(RHASH(hash)->as.ntbl, (st_data_t)key, func, (st_data_t)&arg); /* write barrier */ if (arg.new_key) RB_OBJ_WRITTEN(hash, arg.old_key, arg.new_key); @@ -1289,14 +1354,14 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) VALUE hash, tmp; if (argc == 1) { - tmp = rb_hash_s_try_convert(Qnil, argv[0]); //TODO try_convert + tmp = rb_hash_s_try_convert(Qnil, argv[0]); //TODO tmp array flag if (!NIL_P(tmp)) { hash = hash_alloc(klass); - if (RHASH(tmp)->ltbl) { - RHASH(hash)->ltbl = linear_copy(hash, RHASH(tmp)->ltbl); + if (RHASH_ARRAY_P(tmp)) { + linear_copy(hash, tmp); } - else if (RHASH(tmp)->ntbl) { - RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); + else if (RHASH(tmp)->as.ntbl) { + RHASH(hash)->as.ntbl = st_copy(RHASH(tmp)->as.ntbl); } return hash; } @@ -1383,21 +1448,15 @@ struct rehash_arg { st_table *tbl; }; -static int -rb_hash_rehash_opt_i(VALUE key, VALUE value, VALUE arg) -{ - li_table *tbl = (li_table *)arg; - - linear_insert(tbl, (st_data_t)key, (st_data_t)value); - return ST_CONTINUE; -} - static int rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg) { - st_table *tbl = (st_table *)arg; - - st_insert(tbl, (st_data_t)key, (st_data_t)value); + if (RHASH_ARRAY_P(arg)) { + linear_insert(arg, (st_data_t)key, (st_data_t)value); + } + else { + st_insert(RHASH(arg)->as.ntbl, (st_data_t)key, (st_data_t)value); + } return ST_CONTINUE; } @@ -1426,31 +1485,28 @@ rb_hash_rehash(VALUE hash) { VALUE tmp; st_table *tbl; - li_table *ltbl; if (RHASH_ITER_LEV(hash) > 0) { rb_raise(rb_eRuntimeError, "rehash during iteration"); } rb_hash_modify_check(hash); - if (RHASH(hash)->ltbl) { + if (RHASH_ARRAY_P(hash)) { tmp = hash_alloc(0); - ltbl = linear_init_table(hash, RHASH(hash)->ltbl->type); - RHASH(tmp)->ltbl = ltbl; - - rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)ltbl); - linear_free_table(hash, RHASH(hash)->ltbl); // TODO - RHASH(hash)->ltbl = ltbl; - RHASH(tmp)->ltbl = 0; - } - else if (RHASH(hash)->ntbl) { + linear_init_table(tmp, RHASH(hash)->as.ltbl->type); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + linear_free_table(hash); + linear_copy(hash, tmp); + linear_free_table(tmp); + } + else if (RHASH_TABLE_P(hash)) { + st_table *old_tab = RHASH(hash)->as.ntbl; tmp = hash_alloc(0); - tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); - RHASH(tmp)->ntbl = tbl; - - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = tbl; - RHASH(tmp)->ntbl = 0; + tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries); + RHASH(tmp)->as.ntbl = tbl; + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(old_tab); + RHASH(hash)->as.ntbl = tbl; + RHASH(tmp)->as.ntbl = NULL; } hash_varify(hash); return hash; @@ -1489,10 +1545,10 @@ rb_hash_aref(VALUE hash, VALUE key) { st_data_t val; - if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { return (VALUE)val; } - else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { return (VALUE)val; } hash_varify(hash); @@ -1504,10 +1560,10 @@ rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) { st_data_t val; - if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { return (VALUE)val; } - else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { return (VALUE)val; } hash_varify(hash); @@ -1563,10 +1619,10 @@ rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) if (block_given && argc == 2) { rb_warn("block supersedes default value argument"); } - if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, &val)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { return (VALUE)val; } - else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, &val)) { + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { return (VALUE)val; } if (block_given) return rb_yield(key); @@ -1768,10 +1824,10 @@ rb_hash_delete_entry(VALUE hash, VALUE key) { st_data_t ktmp = (st_data_t)key, val; - if (RHASH(hash)->ltbl && linear_delete(RHASH(hash)->ltbl, &ktmp, &val)) { + if (RHASH_ARRAY_P(hash) && linear_delete(hash, &ktmp, &val)) { return (VALUE)val; } - else if (RHASH(hash)->ntbl && st_delete(RHASH(hash)->ntbl, &ktmp, &val)) { + else if (RHASH_TABLE_P(hash) && st_delete(RHASH(hash)->as.ntbl, &ktmp, &val)) { return (VALUE)val; } else { @@ -1870,10 +1926,10 @@ rb_hash_shift(VALUE hash) struct shift_var var; rb_hash_modify_check(hash); - if (RHASH(hash)->ltbl) { + if (RHASH_ARRAY_P(hash)) { var.key = Qundef; if (RHASH_ITER_LEV(hash) == 0) { - if (linear_shift(RHASH(hash)->ltbl, &var.key, &var.val)) { + if (linear_shift(hash, &var.key, &var.val)) { return rb_assoc_new(var.key, var.val); } } @@ -1885,10 +1941,10 @@ rb_hash_shift(VALUE hash) } } } - if (RHASH(hash)->ntbl) { + if (RHASH_TABLE_P(hash)) { var.key = Qundef; if (RHASH_ITER_LEV(hash) == 0) { - if (st_shift(RHASH(hash)->ntbl, &var.key, &var.val)) { + if (st_shift(RHASH(hash)->as.ntbl, &var.key, &var.val)) { return rb_assoc_new(var.key, var.val); } } @@ -1938,7 +1994,7 @@ rb_hash_delete_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (HASH_HAS_TABLE(hash)) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, delete_if_i, hash); return hash; } @@ -2176,7 +2232,7 @@ rb_hash_keep_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (HASH_HAS_TABLE(hash)) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, keep_if_i, hash); return hash; } @@ -2202,17 +2258,17 @@ VALUE rb_hash_clear(VALUE hash) { rb_hash_modify_check(hash); - if (RHASH(hash)->ltbl && RHASH(hash)->ltbl->num_entries > 0) { + if (RHASH_ARRAY_P(hash) && RHASH_ARRAY_LEN(hash) > 0) { if (RHASH_ITER_LEV(hash) > 0) rb_hash_foreach(hash, clear_i, 0); else - linear_clear(RHASH(hash)->ltbl); + linear_clear(hash); } - else if (RHASH(hash)->ntbl && RHASH(hash)->ntbl->num_entries > 0) { + else if (RHASH_TABLE_P(hash) && RHASH(hash)->as.ntbl->num_entries > 0) { if (RHASH_ITER_LEV(hash) > 0) rb_hash_foreach(hash, clear_i, 0); else - st_clear(RHASH(hash)->ntbl); + st_clear(RHASH(hash)->as.ntbl); } return hash; @@ -2309,15 +2365,14 @@ VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val) { int iter_lev = RHASH_ITER_LEV(hash); - const struct st_hash_type *type; rb_hash_modify(hash); - if (HASH_HAS_NO_TABLE(hash)) { + if (RHASH_TABLE_EMPTY(hash)) { if (iter_lev > 0) no_new_key(); - RHASH(hash)->ltbl = linear_init_objtable(hash); + linear_init_objtable(hash); } - type = RHASH_TYPE(hash); - if (type == &identhash || rb_obj_class(key) != rb_cString) { + + if (RHASH_TYPE(hash) == &identhash || rb_obj_class(key) != rb_cString) { RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset, val); } else { @@ -2338,9 +2393,6 @@ replace_i(VALUE key, VALUE val, VALUE hash) static VALUE rb_hash_initialize_copy(VALUE hash, VALUE hash2) { - st_table *ntbl; - li_table *ltbl; - rb_hash_modify_check(hash); hash2 = to_hash(hash2); @@ -2348,25 +2400,23 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) if (hash == hash2) return hash; - ltbl = RHASH(hash)->ltbl; - ntbl = RHASH(hash)->ntbl; - if (RHASH(hash2)->ltbl) { - if (ltbl) linear_free_table(hash, ltbl); - RHASH(hash)->ltbl = linear_copy(hash, RHASH(hash2)->ltbl); - if (RHASH(hash)->ltbl->num_entries) + if (RHASH_ARRAY_P(hash2)) { + if (RHASH_ARRAY_P(hash)) linear_free_table(hash); + linear_copy(hash, hash2); + if (RHASH_ARRAY_LEN(hash)) rb_hash_rehash(hash); } - else if (RHASH(hash2)->ntbl) { - if (ntbl) st_free_table(ntbl); - RHASH(hash)->ntbl = st_copy(RHASH(hash2)->ntbl); - if (RHASH(hash)->ntbl->num_entries) + else if (RHASH_TABLE_P(hash2)) { + if (RHASH_TABLE_P(hash)) st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = st_copy(RHASH(hash2)->as.ntbl); + if (RHASH(hash)->as.ntbl->num_entries) rb_hash_rehash(hash); } - else if (ltbl) { - linear_clear(ltbl); + else if (RHASH_ARRAY_P(hash)) { + linear_clear(hash); } - else if (ntbl) { - st_clear(ntbl); + else if (RHASH_TABLE_P(hash)) { + st_clear(RHASH(hash)->as.ntbl); } COPY_DEFAULT(hash, hash2); @@ -2396,15 +2446,14 @@ rb_hash_replace(VALUE hash, VALUE hash2) COPY_DEFAULT(hash, hash2); rb_hash_clear(hash); - if (RHASH(hash2)->ltbl) { - hash_ltbl(hash)->type = RHASH(hash2)->ltbl->type; + if (RHASH_ARRAY_P(hash2)) { + linear_copy(hash, hash2); } - else if (RHASH(hash2)->ntbl) { - hash_tbl(hash)->type = RHASH(hash2)->ntbl->type; + else if (RHASH_TABLE_P(hash2)) { + hash_tbl(hash)->type = RHASH(hash2)->as.ntbl->type; + rb_hash_foreach(hash2, replace_i, hash); } - rb_hash_foreach(hash2, replace_i, hash); - return hash; } @@ -2624,7 +2673,7 @@ rb_hash_transform_keys_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (HASH_HAS_TABLE(hash)) { + if (RHASH_TABLE_NONEMPTY(hash)) { long i; VALUE pairs = rb_hash_flatten(0, NULL, hash); rb_hash_clear(hash); @@ -2698,7 +2747,7 @@ rb_hash_transform_values_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (HASH_HAS_TABLE(hash)) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, transform_values_i, hash); return hash; } @@ -2848,16 +2897,14 @@ rb_hash_keys(VALUE hash) if (size == 0) return keys; if (ST_DATA_COMPATIBLE_P(VALUE)) { - if (RHASH(hash)->ltbl) { - li_table *table = RHASH(hash)->ltbl; - + if (RHASH_ARRAY_P(hash)) { rb_gc_writebarrier_remember(keys); RARRAY_PTR_USE(keys, ptr, { - size = linear_keys(table, ptr, size); + size = linear_keys(hash, ptr, size); }); } - else { - st_table *table = RHASH(hash)->ntbl; + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH(hash)->as.ntbl; rb_gc_writebarrier_remember(keys); RARRAY_PTR_USE(keys, ptr, { @@ -2902,16 +2949,14 @@ rb_hash_values(VALUE hash) if (size == 0) return values; if (ST_DATA_COMPATIBLE_P(VALUE)) { - if (RHASH(hash)->ltbl) { - li_table *table = RHASH(hash)->ltbl; - + if (RHASH_ARRAY_P(hash)) { rb_gc_writebarrier_remember(values); RARRAY_PTR_USE(values, ptr, { - size = linear_values(table, ptr, size); + size = linear_values(hash, ptr, size); }); } - else { - st_table *table = RHASH(hash)->ntbl; + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH(hash)->as.ntbl; rb_gc_writebarrier_remember(values); RARRAY_PTR_USE(values, ptr, { @@ -2949,10 +2994,10 @@ rb_hash_values(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key) { - if (RHASH(hash)->ltbl && linear_lookup(RHASH(hash)->ltbl, key, 0)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, 0)) { return Qtrue; } - else if (RHASH(hash)->ntbl && st_lookup(RHASH(hash)->ntbl, key, 0)) { + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, 0)) { return Qtrue; } return Qfalse; @@ -2996,7 +3041,7 @@ rb_hash_has_value(VALUE hash, VALUE val) struct equal_data { VALUE result; - st_table *tbl; + VALUE hash; int eql; }; @@ -3006,33 +3051,15 @@ eql_i(VALUE key, VALUE val1, VALUE arg) struct equal_data *data = (struct equal_data *)arg; st_data_t val2; - if (!st_lookup(data->tbl, key, &val2)) { + if (RHASH_ARRAY_P(data->hash) && !linear_lookup(data->hash, key, &val2)) { data->result = Qfalse; return ST_STOP; } - if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) { + else if (RHASH_TABLE_P(data->hash) && !st_lookup(RHASH(data->hash)->as.ntbl, key, &val2)) { data->result = Qfalse; return ST_STOP; } - return ST_CONTINUE; -} - -struct equal_data_opt { - VALUE result; - li_table *tbl; - int eql; -}; -static int -eql_opt_i(VALUE key, VALUE val1, VALUE arg) -{ - struct equal_data_opt *data = (struct equal_data_opt *)arg; - st_data_t val2; - - if (!linear_lookup(data->tbl, key, &val2)) { - data->result = Qfalse; - return ST_STOP; - } if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) { data->result = Qfalse; return ST_STOP; @@ -3040,19 +3067,6 @@ eql_opt_i(VALUE key, VALUE val1, VALUE arg) return ST_CONTINUE; } -static VALUE -recursive_eql_opt(VALUE hash, VALUE dt, int recur) -{ - struct equal_data_opt *data; - - if (recur) return Qtrue; - data = (struct equal_data_opt*)dt; - data->result = Qtrue; - rb_hash_foreach(hash, eql_opt_i, dt); - - return data->result; -} - static VALUE recursive_eql(VALUE hash, VALUE dt, int recur) { @@ -3070,7 +3084,6 @@ static VALUE hash_equal(VALUE hash1, VALUE hash2, int eql) { struct equal_data data; - struct equal_data_opt data_opt; if (hash1 == hash2) return Qtrue; if (!RB_TYPE_P(hash2, T_HASH)) { @@ -3091,22 +3104,15 @@ hash_equal(VALUE hash1, VALUE hash2, int eql) } if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return Qfalse; - if (RHASH(hash1)->ltbl && RHASH(hash2)->ltbl) { - if (RHASH(hash1)->ltbl->type != RHASH(hash2)->ltbl->type) - return Qfalse; - - data_opt.tbl = RHASH(hash2)->ltbl; - data_opt.eql = eql; - return rb_exec_recursive_paired(recursive_eql_opt, hash1, hash2, (VALUE)&data_opt); - } - else if (RHASH(hash1)->ntbl && RHASH(hash2)->ntbl) { - if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) + if (RHASH_TABLE_NONEMPTY(hash1) && RHASH_TABLE_NONEMPTY(hash2)) { + if (RHASH_TYPE(hash1) != RHASH_TYPE(hash2)) return Qfalse; - data.tbl = RHASH(hash2)->ntbl; + data.hash = hash2; data.eql = eql; return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); } + #if 0 if (!(rb_equal(RHASH_IFNONE(hash1), RHASH_IFNONE(hash2)) && FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) @@ -3433,10 +3439,10 @@ static VALUE reset_hash_type(VALUE arg) { struct reset_hash_type_arg *p = (struct reset_hash_type_arg *)arg; - if (RHASH(p->hash)->ltbl) - RHASH(p->hash)->ltbl->type = p->orighash; + if (RHASH_ARRAY_P(p->hash)) + RHASH(p->hash)->as.ltbl->type = p->orighash; else - RHASH(p->hash)->ntbl->type = p->orighash; + RHASH(p->hash)->as.ntbl->type = p->orighash; return Qundef; } @@ -3475,12 +3481,12 @@ rb_hash_assoc(VALUE hash, VALUE key) VALUE args[2]; if (RHASH_EMPTY_P(hash)) return Qnil; - if (RHASH(hash)->ltbl) { - ltable = RHASH(hash)->ltbl; + if (RHASH_ARRAY_P(hash)) { + ltable = RHASH(hash)->as.ltbl; orighash = ltable->type; } - else { - table = RHASH(hash)->ntbl; + else if (RHASH_TABLE_P(hash)) { + table = RHASH(hash)->as.ntbl; orighash = table->type; } @@ -3691,25 +3697,30 @@ rb_hash_compact_bang(VALUE hash) static VALUE rb_hash_compare_by_id(VALUE hash) { + VALUE tmp; if (rb_hash_compare_by_id_p(hash)) return hash; rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) { - li_table *identtable; - identtable = linear_init_identtable(hash); - rb_hash_foreach(hash, rb_hash_rehash_opt_i, (VALUE)identtable); - if (RHASH(hash)->ltbl) - linear_free_table(hash, RHASH(hash)->ltbl); - RHASH(hash)->ltbl = identtable; - } - else { + if (RHASH_ARRAY_P(hash) || RHASH_TABLE_EMPTY(hash)) { + tmp = hash_alloc(0); + linear_init_identtable(tmp); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + if (RHASH_ARRAY_P(hash)) + linear_free_table(hash); + linear_copy(hash, tmp); + linear_free_table(tmp); + } + else if (RHASH_TABLE_P(hash)) { st_table *identtable; + tmp = hash_alloc(0); identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)identtable); - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = identtable; + RHASH(tmp)->as.ntbl = identtable; + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = identtable; + RHASH(tmp)->as.ntbl = NULL; } - + rb_gc_force_recycle(tmp); return hash; } @@ -3725,10 +3736,10 @@ rb_hash_compare_by_id(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_compare_by_id_p(VALUE hash) { - if (RHASH(hash)->ltbl && RHASH(hash)->ltbl->type == &identhash) { + if (RHASH_ARRAY_P(hash) && RHASH(hash)->as.ltbl->type == &identhash) { return Qtrue; } - if (RHASH(hash)->ntbl && RHASH(hash)->ntbl->type == &identhash) { + if (RHASH(hash)->as.ntbl && RHASH(hash)->as.ntbl->type == &identhash) { return Qtrue; } return Qfalse; @@ -3738,7 +3749,7 @@ VALUE rb_ident_hash_new(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = st_init_table(&identhash); + RHASH(hash)->as.ntbl = st_init_table(&identhash); return hash; } @@ -3997,15 +4008,14 @@ add_new_i(st_data_t *key, st_data_t *val, st_data_t arg, int existing) int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) //TODO { - li_table *ltbl; st_table *tbl; int ret = 0; VALUE args[2]; args[0] = hash; args[1] = val; - if (!RHASH(hash)->ntbl) { - ltbl = hash_ltbl(hash); - ret = linear_update(ltbl, (st_data_t)key, add_new_i, (st_data_t)args); + if (RHASH_ARRAY_P(hash) || RHASH_TABLE_EMPTY(hash)) { + hash_ltbl(hash); + ret = linear_update(hash, (st_data_t)key, add_new_i, (st_data_t)args); if (ret != -1) return ret; try_convert_table(hash); @@ -4016,20 +4026,20 @@ rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) //TODO } static st_data_t -linear_stringify(VALUE key) +key_stringify(VALUE key) { return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ? rb_hash_key_str(key) : key; } static void -linear_bulk_insert(li_table *tab, long argc, const VALUE *argv, VALUE hash) +linear_bulk_insert(VALUE hash, long argc, const VALUE *argv) { long i; for (i = 0; i < argc; ) { - st_data_t k = linear_stringify(argv[i++]); + st_data_t k = key_stringify(argv[i++]); st_data_t v = argv[i++]; - linear_insert(tab, k, v); + linear_insert(hash, k, v); RB_OBJ_WRITTEN(hash, Qundef, k); RB_OBJ_WRITTEN(hash, Qundef, v); } @@ -4039,22 +4049,20 @@ MJIT_FUNC_EXPORTED void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) { st_index_t size; - li_table *ltbl = RHASH(hash)->ltbl; HASH_ASSERT(argc % 2 == 0); if (! argc) return; size = argc / 2; - if (HASH_HAS_NO_TABLE(hash)) { - if (size <= LINEAR_TABLE_MAX_SIZE) { + if (RHASH_TABLE_EMPTY(hash)) { + if (size <= LINEAR_TABLE_MAX_SIZE) hash_ltbl(hash); - } - else { + else hash_tbl(hash); - } } - if (ltbl && (ltbl->num_entries + size <= LINEAR_TABLE_MAX_SIZE)) { - linear_bulk_insert(ltbl, argc, argv, hash); + if (RHASH_ARRAY_P(hash) && + (RHASH_ARRAY_LEN(hash) + size <= LINEAR_TABLE_MAX_SIZE)) { + linear_bulk_insert(hash, argc, argv); return; } diff --git a/internal.h b/internal.h index 5698760357aa9f..6af6ffde11dfd1 100644 --- a/internal.h +++ b/internal.h @@ -670,6 +670,30 @@ struct RComplex { #define RCOMPLEX_SET_IMAG(cmp, i) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->imag,(i)) #endif +enum ruby_rhash_flags { + RHASH_ARRAY_LEN_MAX = 8, + RHASH_ARRAY_FLAG = FL_USER3, + RHASH_ARRAY_LEN_MASK = (FL_USER4|FL_USER5|FL_USER6|FL_USER7), + RHASH_ARRAY_LEN_SHIFT = (FL_USHIFT+4), + RHASH_ARRAY_BOUND_MASK = (FL_USER8|FL_USER9|FL_USER10|FL_USER11), + RHASH_ARRAY_BOUND_SHIFT = (FL_USHIFT+8), + + RHASH_ENUM_END +}; + +#define HASH_PROC_DEFAULT FL_USER2 +#define RHASH_ARRAY_FLAG (VALUE)RHASH_ARRAY_FLAG +#define RHASH_ARRAY_P(hash) FL_TEST_RAW((hash), RHASH_ARRAY_FLAG) +#define RHASH_ARRAY_LEN_MASK (VALUE)RHASH_ARRAY_LEN_MASK +#define RHASH_ARRAY_LEN_SHIFT RHASH_ARRAY_LEN_SHIFT +#define RHASH_ARRAY_BOUND_MASK (VALUE)RHASH_ARRAY_BOUND_MASK +#define RHASH_ARRAY_BOUND_SHIFT RHASH_ARRAY_BOUND_SHIFT +#define RHASH_TRANSIENT_FLAG FL_USER14 +#define RHASH_TRANSIENT_P(hash) FL_TEST_RAW((hash), RHASH_TRANSIENT_FLAG) + +#define RHASH_ARRAY_LEN(h) \ + (long)((RBASIC(h)->flags & RHASH_ARRAY_LEN_MASK) >> RHASH_ARRAY_LEN_SHIFT) + #define LINEAR_TABLE_MAX_SIZE 8 #define LINEAR_TABLE_BOUND LINEAR_TABLE_MAX_SIZE @@ -681,14 +705,15 @@ typedef struct li_table_entry { typedef struct LinearTable { const struct st_hash_type *type; - st_index_t num_entries; li_table_entry entries[LINEAR_TABLE_MAX_SIZE]; } li_table; struct RHash { struct RBasic basic; - struct st_table *ntbl; /* possibly 0 */ - struct LinearTable *ltbl; + union { + struct st_table *ntbl; /* possibly 0 */ + struct LinearTable *ltbl; + } as; int iter_lev; const VALUE ifnone; }; @@ -701,8 +726,9 @@ struct RHash { #undef RHASH_SIZE #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) #define RHASH_IFNONE(h) (RHASH(h)->ifnone) -#define RHASH_SIZE_NTBL(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : (st_index_t)0) -#define RHASH_SIZE(h) (RHASH(h)->ltbl ? RHASH(h)->ltbl->num_entries : RHASH_SIZE_NTBL(h)) +#define RHASH_SIZE_NTBL(h) (RHASH(h)->as.ntbl ? RHASH(h)->as.ntbl->num_entries : (st_index_t)0) +#define RHASH_SIZE(h) (RHASH_ARRAY_P(h) ? RHASH_ARRAY_LEN(h) : RHASH_SIZE_NTBL(h)) +#define RHASH_TABLE_P(h) (!RHASH_ARRAY_P(h) && RHASH(h)->as.ntbl) #endif /* missing/setproctitle.c */ @@ -1355,9 +1381,6 @@ RUBY_SYMBOL_EXPORT_END #define NEWOBJ_OF(obj,type,klass,flags) RB_NEWOBJ_OF(obj,type,klass,flags) /* hash.c */ -#define RHASH_TRANSIENT_FLAG FL_USER14 -#define RHASH_TRANSIENT_P(hash) FL_TEST_RAW((hash), RHASH_TRANSIENT_FLAG) - struct st_table *rb_hash_tbl_raw(VALUE hash); VALUE rb_hash_new_with_size(st_index_t size); RUBY_SYMBOL_EXPORT_BEGIN @@ -1380,10 +1403,9 @@ VALUE rb_hash_values(VALUE hash); VALUE rb_hash_rehash(VALUE hash); void rb_hash_free(VALUE hash); int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val); -int linear_foreach(li_table *, int (*)(ANYARGS), st_data_t); -int linear_lookup(li_table *, st_data_t, st_data_t *); +int linear_foreach(VALUE hash, int (*)(ANYARGS), st_data_t); +int linear_lookup(VALUE hash, st_data_t, st_data_t *); void rb_hash_bulk_insert(long, const VALUE *, VALUE); -#define HASH_PROC_DEFAULT FL_USER2 /* inits.c */ void rb_call_inits(void); diff --git a/st.c b/st.c index 0ed2d2b95db41d..b7506537d33d85 100644 --- a/st.c +++ b/st.c @@ -2287,7 +2287,7 @@ void rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash) { st_index_t n, size = argc / 2; - st_table *tab = RHASH(hash)->ntbl; + st_table *tab = RHASH(hash)->as.ntbl; tab = rb_hash_tbl_raw(hash); n = tab->num_entries + size; diff --git a/thread.c b/thread.c index 0395d0fffb68f2..656107261c0f18 100644 --- a/thread.c +++ b/thread.c @@ -3506,10 +3506,10 @@ rb_thread_variable_p(VALUE thread, VALUE key) locals = rb_ivar_get(thread, id_locals); - if (RHASH(locals)->ltbl && linear_lookup(RHASH(locals)->ltbl, ID2SYM(id), 0)) { + if (RHASH_ARRAY_P(locals) && linear_lookup(locals, ID2SYM(id), 0)) { return Qtrue; } - else if (RHASH(locals)->ntbl && st_lookup(RHASH(locals)->ntbl, ID2SYM(id), 0)) { + else if (RHASH_TABLE_P(locals) && st_lookup(RHASH(locals)->as.ntbl, ID2SYM(id), 0)) { return Qtrue; } diff --git a/transient_heap.c b/transient_heap.c index dcd365dd056352..09a5f76a88ee5b 100644 --- a/transient_heap.c +++ b/transient_heap.c @@ -14,7 +14,7 @@ * 2: enable verify */ #ifndef TRANSIENT_HEAP_CHECK_MODE -#define TRANSIENT_HEAP_CHECK_MODE 1 +#define TRANSIENT_HEAP_CHECK_MODE 0 #endif #define TH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(TRANSIENT_HEAP_CHECK_MODE > 0, expr, #expr) @@ -509,7 +509,7 @@ rb_transient_heap_mark(VALUE obj, const void *ptr) rb_bug("rb_transient_heap_mark: magic is broken"); } else if (header->obj != obj) { - transient_heap_dump(theap); + // transient_heap_dump(theap); rb_bug("rb_transient_heap_mark: unmatch (%s is stored, but %s is given)\n", rb_obj_info(header->obj), rb_obj_info(obj)); } @@ -547,7 +547,7 @@ transient_heap_ptr(VALUE obj, int error) break; case T_HASH: if (RHASH_TRANSIENT_P(obj)) { - ptr = (VALUE *)(RHASH(obj)->ltbl); + ptr = (VALUE *)(RHASH(obj)->as.ltbl); } else { ptr = NULL; @@ -670,7 +670,7 @@ transient_heap_block_escape(struct transient_heap* theap, struct transient_heap_ rb_hash_transient_heap_promote(obj, TRUE); break; default: - rb_bug("unsupporeted"); + rb_bug("unsupporeted: %s\n", rb_obj_info(obj)); } header->obj = Qundef; // to verify } diff --git a/vm_eval.c b/vm_eval.c index df93d000dd3b0a..64f03ea3b2c065 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -2012,7 +2012,7 @@ static void local_var_list_init(struct local_var_list *vars) { vars->tbl = rb_hash_new(); - RHASH(vars->tbl)->ntbl = st_init_numtable(); /* compare_by_identity */ + RHASH(vars->tbl)->as.ntbl = st_init_numtable(); /* compare_by_identity */ RBASIC_CLEAR_CLASS(vars->tbl); }