Browse Source

Improve object hash part algorithm performance

Make the hash algorithm simpler by using a bit mask rather than a modulus for
probing the hash.

Make the hash part load factor lower than before to reduce clustering.  Low
memory environments disable hash part support anyway, so this doesn't impact
them.
pull/1284/head
Sami Vaarala 8 years ago
parent
commit
2a49a15e0e
  1. 31
      src-input/duk_hobject.h
  2. 74
      src-input/duk_hobject_props.c
  3. 8
      src-input/duk_util.h
  4. 74
      src-input/duk_util_hashprime.c

31
src-input/duk_hobject.h

@ -32,8 +32,8 @@
#if !defined(DUK_HOBJECT_H_INCLUDED)
#define DUK_HOBJECT_H_INCLUDED
/* Object flag. There are currently 25 flag bits available. Make sure
* this stays in sync with debugger object inspection code.
/* Object flags. Make sure this stays in sync with debugger object
* inspection code.
*/
/* XXX: some flags are object subtype specific (e.g. common to all function
@ -651,22 +651,9 @@
#if defined(DUK_USE_OBJSIZES16)
#define DUK_HOBJECT_MAX_PROPERTIES 0x0000ffffUL
#else
#define DUK_HOBJECT_MAX_PROPERTIES 0x7fffffffUL /* 2**31-1 ~= 2G properties */
#define DUK_HOBJECT_MAX_PROPERTIES 0x3fffffffUL /* 2**30-1 ~= 1G properties */
#endif
/* higher value conserves memory; also note that linear scan is cache friendly */
#define DUK_HOBJECT_E_USE_HASH_LIMIT 32
/* hash size relative to entries size: for value X, approx. hash_prime(e_size + e_size / X) */
#define DUK_HOBJECT_H_SIZE_DIVISOR 4 /* hash size approx. 1.25 times entries size */
/* if new_size < L * old_size, resize without abandon check; L = 3-bit fixed point, e.g. 9 -> 9/8 = 112.5% */
#define DUK_HOBJECT_A_FAST_RESIZE_LIMIT 9 /* 112.5%, i.e. new size less than 12.5% higher -> fast resize */
/* if density < L, abandon array part, L = 3-bit fixed point, e.g. 2 -> 2/8 = 25% */
/* limit is quite low: one array entry is 8 bytes, one normal entry is 4+1+8+4 = 17 bytes (with hash entry) */
#define DUK_HOBJECT_A_ABANDON_LIMIT 2 /* 25%, i.e. less than 25% used -> abandon */
/* internal align target for props allocation, must be 2*n for some n */
#if (DUK_USE_ALIGN_BY == 4)
#define DUK_HOBJECT_ALIGN_TARGET 4
@ -678,18 +665,6 @@
#error invalid DUK_USE_ALIGN_BY
#endif
/* controls for minimum entry part growth */
#define DUK_HOBJECT_E_MIN_GROW_ADD 16
#define DUK_HOBJECT_E_MIN_GROW_DIVISOR 8 /* 2^3 -> 1/8 = 12.5% min growth */
/* controls for minimum array part growth */
#define DUK_HOBJECT_A_MIN_GROW_ADD 16
#define DUK_HOBJECT_A_MIN_GROW_DIVISOR 8 /* 2^3 -> 1/8 = 12.5% min growth */
/* probe sequence */
#define DUK_HOBJECT_HASH_INITIAL(hash,h_size) ((hash) % (h_size))
#define DUK_HOBJECT_HASH_PROBE_STEP(hash) DUK_UTIL_GET_HASH_PROBE_STEP((hash))
/*
* PC-to-line constants
*/

74
src-input/duk_hobject_props.c

@ -52,10 +52,6 @@
#define DUK__NO_ARRAY_INDEX DUK_HSTRING_NO_ARRAY_INDEX
/* hash probe sequence */
#define DUK__HASH_INITIAL(hash,h_size) DUK_HOBJECT_HASH_INITIAL((hash),(h_size))
#define DUK__HASH_PROBE_STEP(hash) DUK_HOBJECT_HASH_PROBE_STEP((hash))
/* marker values for hash part */
#define DUK__HASH_UNUSED DUK_HOBJECT_HASHIDX_UNUSED
#define DUK__HASH_DELETED DUK_HOBJECT_HASHIDX_DELETED
@ -218,14 +214,26 @@ DUK_LOCAL duk_bool_t duk__key_is_plain_buf_ownprop(duk_hthread *thr, duk_hbuffer
DUK_LOCAL duk_uint32_t duk__get_default_h_size(duk_uint32_t e_size) {
DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES);
if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) {
if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) {
duk_uint32_t res;
duk_uint32_t tmp;
/* result: hash_prime(floor(1.2 * e_size)) */
res = duk_util_get_hash_prime(e_size + e_size / DUK_HOBJECT_H_SIZE_DIVISOR);
/* if fails, e_size will be zero = not an issue, except performance-wise */
DUK_ASSERT(res == 0 || res > e_size);
/* Hash size should be 2^N where N is chosen so that 2^N is
* larger than e_size. Extra shifting is used to ensure hash
* is relatively sparse.
*/
tmp = e_size;
res = 2; /* Result will be 2 ** (N + 1). */
while (tmp >= 0x40) {
tmp >>= 6;
res <<= 6;
}
while (tmp != 0) {
tmp >>= 1;
res <<= 1;
}
DUK_ASSERT((DUK_HOBJECT_MAX_PROPERTIES << 2U) > DUK_HOBJECT_MAX_PROPERTIES); /* Won't wrap, even shifted by 2. */
DUK_ASSERT(res > e_size);
return res;
} else {
return 0;
@ -239,7 +247,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_e(duk_uint32_t e_size) {
DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES);
res = (e_size + DUK_HOBJECT_E_MIN_GROW_ADD) / DUK_HOBJECT_E_MIN_GROW_DIVISOR;
res = (e_size + DUK_USE_HOBJECT_ENTRY_MINGROW_ADD) / DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR;
DUK_ASSERT(res >= 1); /* important for callers */
return res;
}
@ -250,7 +258,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_a(duk_uint32_t a_size) {
DUK_ASSERT((duk_size_t) a_size <= DUK_HOBJECT_MAX_PROPERTIES);
res = (a_size + DUK_HOBJECT_A_MIN_GROW_ADD) / DUK_HOBJECT_A_MIN_GROW_DIVISOR;
res = (a_size + DUK_USE_HOBJECT_ARRAY_MINGROW_ADD) / DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR;
DUK_ASSERT(res >= 1); /* important for callers */
return res;
}
@ -325,7 +333,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_density_check(duk_uint32_t a_used, duk_u
* of the check, but may confuse debugging.
*/
return (a_used < DUK_HOBJECT_A_ABANDON_LIMIT * (a_size >> 3));
return (a_used < DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT * (a_size >> 3));
}
/* Fast check for extending array: check whether or not a slow density check is required. */
@ -351,7 +359,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_slow_check_required(duk_uint32_t arr_idx
* arr_idx > limit'' * ((old_size + 7) / 8)
*/
return (arr_idx > DUK_HOBJECT_A_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3));
return (arr_idx > DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3));
}
/*
@ -851,6 +859,8 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,
#if defined(DUK_USE_HOBJECT_HASH_PART)
if (DUK_UNLIKELY(new_h_size > 0)) {
duk_uint32_t mask;
DUK_ASSERT(new_h != NULL);
/* fill new_h with u32 0xff = UNUSED */
@ -859,13 +869,15 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,
DUK_MEMSET(new_h, 0xff, sizeof(duk_uint32_t) * new_h_size);
DUK_ASSERT(new_e_next <= new_h_size); /* equality not actually possible */
mask = new_h_size - 1;
for (i = 0; i < new_e_next; i++) {
duk_hstring *key = new_e_k[i];
duk_uint32_t j, step;
DUK_ASSERT(key != NULL);
j = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size);
step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
j = DUK_HSTRING_GET_HASH(key) & mask;
step = 1; /* Cache friendly but clustering prone. */
for (;;) {
DUK_ASSERT(new_h[j] != DUK__HASH_DELETED); /* should never happen */
@ -875,10 +887,9 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,
break;
}
DUK_DDD(DUK_DDDPRINT("rebuild miss %ld, step %ld", (long) j, (long) step));
j = (j + step) % new_h_size;
j = (j + step) & mask;
/* guaranteed to finish */
DUK_ASSERT(j != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size));
/* Guaranteed to finish (hash is larger than #props). */
}
}
} else {
@ -1122,7 +1133,7 @@ DUK_INTERNAL void duk_hobject_compact_props(duk_hthread *thr, duk_hobject *obj)
}
#if defined(DUK_USE_HOBJECT_HASH_PART)
if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) {
if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) {
h_size = duk__get_default_h_size(e_size);
} else {
h_size = 0;
@ -1183,13 +1194,15 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o
duk_uint32_t n;
duk_uint32_t i, step;
duk_uint32_t *h_base;
duk_uint32_t mask;
DUK_DDD(DUK_DDDPRINT("duk_hobject_find_existing_entry() using hash part for lookup"));
h_base = DUK_HOBJECT_H_GET_BASE(heap, obj);
n = DUK_HOBJECT_GET_HSIZE(obj);
i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n);
step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
mask = n - 1;
i = DUK_HSTRING_GET_HASH(key) & mask;
step = 1; /* Cache friendly but clustering prone. */
for (;;) {
duk_uint32_t t;
@ -1217,10 +1230,9 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o
DUK_DDD(DUK_DDDPRINT("lookup miss i=%ld, t=%ld",
(long) i, (long) t));
}
i = (i + step) % n;
i = (i + step) & mask;
/* guaranteed to finish, as hash is never full */
DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n));
/* Guaranteed to finish (hash is larger than #props). */
}
}
#endif /* DUK_USE_HOBJECT_HASH_PART */
@ -1325,13 +1337,14 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj
#if defined(DUK_USE_HOBJECT_HASH_PART)
if (DUK_UNLIKELY(DUK_HOBJECT_GET_HSIZE(obj) > 0)) {
duk_uint32_t n;
duk_uint32_t n, mask;
duk_uint32_t i, step;
duk_uint32_t *h_base = DUK_HOBJECT_H_GET_BASE(thr->heap, obj);
n = DUK_HOBJECT_GET_HSIZE(obj);
i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n);
step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
mask = n - 1;
i = DUK_HSTRING_GET_HASH(key) & mask;
step = 1; /* Cache friendly but clustering prone. */
for (;;) {
duk_uint32_t t = h_base[i];
@ -1346,10 +1359,9 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj
break;
}
DUK_DDD(DUK_DDDPRINT("duk__alloc_entry_checked() miss %ld", (long) i));
i = (i + step) % n;
i = (i + step) & mask;
/* guaranteed to find an empty slot */
DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), DUK_HOBJECT_GET_HSIZE(obj)));
/* Guaranteed to finish (hash is larger than #props). */
}
}
#endif /* DUK_USE_HOBJECT_HASH_PART */

8
src-input/duk_util.h

@ -5,10 +5,6 @@
#if !defined(DUK_UTIL_H_INCLUDED)
#define DUK_UTIL_H_INCLUDED
#define DUK_UTIL_MIN_HASH_PRIME 17 /* must match genhashsizes.py */
#define DUK_UTIL_GET_HASH_PROBE_STEP(hash) (duk_util_probe_steps[(hash) & 0x1f])
#if defined(DUK_USE_GET_RANDOM_DOUBLE)
#define DUK_UTIL_GET_RANDOM_DOUBLE(thr) DUK_USE_GET_RANDOM_DOUBLE((thr)->heap_udata)
#else
@ -504,10 +500,6 @@ DUK_INTERNAL_DECL duk_uint8_t duk_util_probe_steps[32];
DUK_INTERNAL_DECL duk_uint32_t duk_util_hashbytes(const duk_uint8_t *data, duk_size_t len, duk_uint32_t seed);
#endif
#if defined(DUK_USE_HOBJECT_HASH_PART)
DUK_INTERNAL_DECL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size);
#endif
DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode(duk_bitdecoder_ctx *ctx, duk_small_int_t bits);
DUK_INTERNAL_DECL duk_small_uint_t duk_bd_decode_flag(duk_bitdecoder_ctx *ctx);
DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode_flagged(duk_bitdecoder_ctx *ctx, duk_small_int_t bits, duk_uint32_t def_value);

74
src-input/duk_util_hashprime.c

@ -1,74 +0,0 @@
/*
* Round a number upwards to a prime (not usually the nearest one).
*
* Uses a table of successive 32-bit primes whose ratio is roughly
* constant. This keeps the relative upwards 'rounding error' bounded
* and the data size small. A simple 'predict-correct' compression is
* used to compress primes to one byte per prime. See genhashsizes.py
* for details.
*
* The minimum prime returned here must be coordinated with the possible
* probe sequence steps in duk_hobject and duk_heap stringtable.
*/
#include "duk_internal.h"
#if defined(DUK_USE_HOBJECT_HASH_PART)
/* hash size ratio goal, must match genhashsizes.py */
#define DUK__HASH_SIZE_RATIO 1177 /* floor(1.15 * (1 << 10)) */
/* prediction corrections for prime list (see genhashsizes.py) */
DUK_LOCAL const duk_int8_t duk__hash_size_corrections[] = {
17, /* minimum prime */
4, 3, 4, 1, 4, 1, 1, 2, 2, 2, 2, 1, 6, 6, 9, 5, 1, 2, 2, 5, 1, 3, 3, 3,
5, 4, 4, 2, 4, 8, 3, 4, 23, 2, 4, 7, 8, 11, 2, 12, 15, 10, 1, 1, 5, 1, 5,
8, 9, 17, 14, 10, 7, 5, 2, 46, 21, 1, 9, 9, 4, 4, 10, 23, 36, 6, 20, 29,
18, 6, 19, 21, 16, 11, 5, 5, 48, 9, 1, 39, 14, 8, 4, 29, 9, 1, 15, 48, 12,
22, 6, 15, 27, 4, 2, 17, 28, 8, 9, 4, 5, 8, 3, 3, 8, 37, 11, 15, 8, 30,
43, 6, 33, 41, 5, 20, 32, 41, 38, 24, 77, 14, 19, 11, 4, 35, 18, 19, 41,
10, 23, 16, 9, 2,
-1
};
/* probe steps (see genhashsizes.py), currently assumed to be 32 entries long
* (DUK_UTIL_GET_HASH_PROBE_STEP macro).
*/
DUK_INTERNAL duk_uint8_t duk_util_probe_steps[32] = {
2, 3, 5, 7, 11, 13, 19, 31, 41, 47, 59, 67, 73, 79, 89, 101, 103, 107,
109, 127, 137, 139, 149, 157, 163, 167, 173, 181, 191, 193, 197, 199
};
DUK_INTERNAL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size) {
const duk_int8_t *p = duk__hash_size_corrections;
duk_uint32_t curr;
curr = (duk_uint32_t) *p++;
for (;;) {
duk_small_int_t t = (duk_small_int_t) *p++;
if (t < 0) {
/* may happen if size is very close to 2^32-1 */
break;
}
/* prediction: portable variant using doubles if 64-bit values not available */
#if defined(DUK_USE_64BIT_OPS)
curr = (duk_uint32_t) ((((duk_uint64_t) curr) * ((duk_uint64_t) DUK__HASH_SIZE_RATIO)) >> 10);
#else
/* 32-bit x 11-bit = 43-bit, fits accurately into a double */
curr = (duk_uint32_t) DUK_FLOOR(((double) curr) * ((double) DUK__HASH_SIZE_RATIO) / 1024.0);
#endif
/* correction */
curr += t;
DUK_DDD(DUK_DDDPRINT("size=%ld, curr=%ld", (long) size, (long) curr));
if (curr >= size) {
return curr;
}
}
return 0;
}
#endif /* DUK_USE_HOBJECT_HASH_PART */
Loading…
Cancel
Save