Improve object hash part algorithm performance

Make the hash algorithm simpler by using a bit mask rather than a modulus for probing the hash. Make the hash part load factor lower than before to reduce clustering. Low memory environments disable hash part support anyway, so this doesn't impact them.
8 years ago · 2a49a15e0e
4 changed files with 46 additions and 141 deletions
--- a/src-input/duk_hobject.h
+++ b/src-input/duk_hobject.h
@ -32,8 +32,8 @@
 #if !defined(DUK_HOBJECT_H_INCLUDED)
 #define DUK_HOBJECT_H_INCLUDED

-/* Object flag.  There are currently 25 flag bits available.  Make sure
- * this stays in sync with debugger object inspection code.
+/* Object flags.  Make sure this stays in sync with debugger object
+ * inspection code.
 */

 /* XXX: some flags are object subtype specific (e.g. common to all function
@ -651,22 +651,9 @@
 #if defined(DUK_USE_OBJSIZES16)
 #define DUK_HOBJECT_MAX_PROPERTIES       0x0000ffffUL
 #else
-#define DUK_HOBJECT_MAX_PROPERTIES       0x7fffffffUL   /* 2**31-1 ~= 2G properties */
+#define DUK_HOBJECT_MAX_PROPERTIES       0x3fffffffUL   /* 2**30-1 ~= 1G properties */
 #endif

-/* higher value conserves memory; also note that linear scan is cache friendly */
-#define DUK_HOBJECT_E_USE_HASH_LIMIT     32
-
-/* hash size relative to entries size: for value X, approx. hash_prime(e_size + e_size / X) */
-#define DUK_HOBJECT_H_SIZE_DIVISOR       4  /* hash size approx. 1.25 times entries size */
-
-/* if new_size < L * old_size, resize without abandon check; L = 3-bit fixed point, e.g. 9 -> 9/8 = 112.5% */
-#define DUK_HOBJECT_A_FAST_RESIZE_LIMIT  9  /* 112.5%, i.e. new size less than 12.5% higher -> fast resize */
-
-/* if density < L, abandon array part, L = 3-bit fixed point, e.g. 2 -> 2/8 = 25% */
-/* limit is quite low: one array entry is 8 bytes, one normal entry is 4+1+8+4 = 17 bytes (with hash entry) */
-#define DUK_HOBJECT_A_ABANDON_LIMIT      2  /* 25%, i.e. less than 25% used -> abandon */
-
 /* internal align target for props allocation, must be 2*n for some n */
 #if (DUK_USE_ALIGN_BY == 4)
 #define DUK_HOBJECT_ALIGN_TARGET         4
@ -678,18 +665,6 @@
 #error invalid DUK_USE_ALIGN_BY
 #endif

-/* controls for minimum entry part growth */
-#define DUK_HOBJECT_E_MIN_GROW_ADD       16
-#define DUK_HOBJECT_E_MIN_GROW_DIVISOR   8  /* 2^3 -> 1/8 = 12.5% min growth */
-
-/* controls for minimum array part growth */
-#define DUK_HOBJECT_A_MIN_GROW_ADD       16
-#define DUK_HOBJECT_A_MIN_GROW_DIVISOR   8  /* 2^3 -> 1/8 = 12.5% min growth */
-
-/* probe sequence */
-#define DUK_HOBJECT_HASH_INITIAL(hash,h_size)  ((hash) % (h_size))
-#define DUK_HOBJECT_HASH_PROBE_STEP(hash)      DUK_UTIL_GET_HASH_PROBE_STEP((hash))
-
 /*
 *  PC-to-line constants
 */
--- a/src-input/duk_hobject_props.c
+++ b/src-input/duk_hobject_props.c
@ -52,10 +52,6 @@

 #define DUK__NO_ARRAY_INDEX             DUK_HSTRING_NO_ARRAY_INDEX

-/* hash probe sequence */
-#define DUK__HASH_INITIAL(hash,h_size)  DUK_HOBJECT_HASH_INITIAL((hash),(h_size))
-#define DUK__HASH_PROBE_STEP(hash)      DUK_HOBJECT_HASH_PROBE_STEP((hash))
-
 /* marker values for hash part */
 #define DUK__HASH_UNUSED                DUK_HOBJECT_HASHIDX_UNUSED
 #define DUK__HASH_DELETED               DUK_HOBJECT_HASHIDX_DELETED
@ -218,14 +214,26 @@ DUK_LOCAL duk_bool_t duk__key_is_plain_buf_ownprop(duk_hthread *thr, duk_hbuffer
 DUK_LOCAL duk_uint32_t duk__get_default_h_size(duk_uint32_t e_size) {
 	DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES);

-	if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) {
+	if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) {
 		duk_uint32_t res;
+		duk_uint32_t tmp;

-		/* result: hash_prime(floor(1.2 * e_size)) */
-		res = duk_util_get_hash_prime(e_size + e_size / DUK_HOBJECT_H_SIZE_DIVISOR);
-
-		/* if fails, e_size will be zero = not an issue, except performance-wise */
-		DUK_ASSERT(res == 0 || res > e_size);
+		/* Hash size should be 2^N where N is chosen so that 2^N is
+		 * larger than e_size.  Extra shifting is used to ensure hash
+		 * is relatively sparse.
+		 */
+		tmp = e_size;
+		res = 2;  /* Result will be 2 ** (N + 1). */
+		while (tmp >= 0x40) {
+			tmp >>= 6;
+			res <<= 6;
+		}
+		while (tmp != 0) {
+			tmp >>= 1;
+			res <<= 1;
+		}
+		DUK_ASSERT((DUK_HOBJECT_MAX_PROPERTIES << 2U) > DUK_HOBJECT_MAX_PROPERTIES);  /* Won't wrap, even shifted by 2. */
+		DUK_ASSERT(res > e_size);
 		return res;
 	} else {
 		return 0;
@ -239,7 +247,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_e(duk_uint32_t e_size) {

 	DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES);

-	res = (e_size + DUK_HOBJECT_E_MIN_GROW_ADD) / DUK_HOBJECT_E_MIN_GROW_DIVISOR;
+	res = (e_size + DUK_USE_HOBJECT_ENTRY_MINGROW_ADD) / DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR;
 	DUK_ASSERT(res >= 1);  /* important for callers */
 	return res;
 }
@ -250,7 +258,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_a(duk_uint32_t a_size) {

 	DUK_ASSERT((duk_size_t) a_size <= DUK_HOBJECT_MAX_PROPERTIES);

-	res = (a_size + DUK_HOBJECT_A_MIN_GROW_ADD) / DUK_HOBJECT_A_MIN_GROW_DIVISOR;
+	res = (a_size + DUK_USE_HOBJECT_ARRAY_MINGROW_ADD) / DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR;
 	DUK_ASSERT(res >= 1);  /* important for callers */
 	return res;
 }
@ -325,7 +333,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_density_check(duk_uint32_t a_used, duk_u
 	 *  of the check, but may confuse debugging.
 	 */

-	return (a_used < DUK_HOBJECT_A_ABANDON_LIMIT * (a_size >> 3));
+	return (a_used < DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT * (a_size >> 3));
 }

 /* Fast check for extending array: check whether or not a slow density check is required. */
@ -351,7 +359,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_slow_check_required(duk_uint32_t arr_idx
 	 *    arr_idx > limit'' * ((old_size + 7) / 8)
 	 */

-	return (arr_idx > DUK_HOBJECT_A_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3));
+	return (arr_idx > DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3));
 }

 /*
@ -851,6 +859,8 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,

 #if defined(DUK_USE_HOBJECT_HASH_PART)
 	if (DUK_UNLIKELY(new_h_size > 0)) {
+		duk_uint32_t mask;
+
 		DUK_ASSERT(new_h != NULL);

 		/* fill new_h with u32 0xff = UNUSED */
@ -859,13 +869,15 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,
 		DUK_MEMSET(new_h, 0xff, sizeof(duk_uint32_t) * new_h_size);

 		DUK_ASSERT(new_e_next <= new_h_size);  /* equality not actually possible */
+
+		mask = new_h_size - 1;
 		for (i = 0; i < new_e_next; i++) {
 			duk_hstring *key = new_e_k[i];
 			duk_uint32_t j, step;

 			DUK_ASSERT(key != NULL);
-			j = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size);
-			step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
+			j = DUK_HSTRING_GET_HASH(key) & mask;
+			step = 1;  /* Cache friendly but clustering prone. */

 			for (;;) {
 				DUK_ASSERT(new_h[j] != DUK__HASH_DELETED);  /* should never happen */
@ -875,10 +887,9 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr,
 					break;
 				}
 				DUK_DDD(DUK_DDDPRINT("rebuild miss %ld, step %ld", (long) j, (long) step));
-				j = (j + step) % new_h_size;
+				j = (j + step) & mask;

-				/* guaranteed to finish */
-				DUK_ASSERT(j != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size));
+				/* Guaranteed to finish (hash is larger than #props). */
 			}
 		}
 	} else {
@ -1122,7 +1133,7 @@ DUK_INTERNAL void duk_hobject_compact_props(duk_hthread *thr, duk_hobject *obj)
 	}

 #if defined(DUK_USE_HOBJECT_HASH_PART)
-	if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) {
+	if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) {
 		h_size = duk__get_default_h_size(e_size);
 	} else {
 		h_size = 0;
@ -1183,13 +1194,15 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o
 		duk_uint32_t n;
 		duk_uint32_t i, step;
 		duk_uint32_t *h_base;
+		duk_uint32_t mask;

 		DUK_DDD(DUK_DDDPRINT("duk_hobject_find_existing_entry() using hash part for lookup"));

 		h_base = DUK_HOBJECT_H_GET_BASE(heap, obj);
 		n = DUK_HOBJECT_GET_HSIZE(obj);
-		i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n);
-		step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
+		mask = n - 1;
+		i = DUK_HSTRING_GET_HASH(key) & mask;
+		step = 1;  /* Cache friendly but clustering prone. */

 		for (;;) {
 			duk_uint32_t t;
@ -1217,10 +1230,9 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o
 				DUK_DDD(DUK_DDDPRINT("lookup miss i=%ld, t=%ld",
 				                     (long) i, (long) t));
 			}
-			i = (i + step) % n;
+			i = (i + step) & mask;

-			/* guaranteed to finish, as hash is never full */
-			DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n));
+			/* Guaranteed to finish (hash is larger than #props). */
 		}
 	}
 #endif  /* DUK_USE_HOBJECT_HASH_PART */
@ -1325,13 +1337,14 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj

 #if defined(DUK_USE_HOBJECT_HASH_PART)
 	if (DUK_UNLIKELY(DUK_HOBJECT_GET_HSIZE(obj) > 0)) {
-		duk_uint32_t n;
+		duk_uint32_t n, mask;
 		duk_uint32_t i, step;
 		duk_uint32_t *h_base = DUK_HOBJECT_H_GET_BASE(thr->heap, obj);

 		n = DUK_HOBJECT_GET_HSIZE(obj);
-		i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n);
-		step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key));
+		mask = n - 1;
+		i = DUK_HSTRING_GET_HASH(key) & mask;
+		step = 1;  /* Cache friendly but clustering prone. */

 		for (;;) {
 			duk_uint32_t t = h_base[i];
@ -1346,10 +1359,9 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj
 				break;
 			}
 			DUK_DDD(DUK_DDDPRINT("duk__alloc_entry_checked() miss %ld", (long) i));
-			i = (i + step) % n;
+			i = (i + step) & mask;

-			/* guaranteed to find an empty slot */
-			DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), DUK_HOBJECT_GET_HSIZE(obj)));
+			/* Guaranteed to finish (hash is larger than #props). */
 		}
 	}
 #endif  /* DUK_USE_HOBJECT_HASH_PART */
--- a/src-input/duk_util.h
+++ b/src-input/duk_util.h
@ -5,10 +5,6 @@
 #if !defined(DUK_UTIL_H_INCLUDED)
 #define DUK_UTIL_H_INCLUDED

-#define DUK_UTIL_MIN_HASH_PRIME  17  /* must match genhashsizes.py */
-
-#define DUK_UTIL_GET_HASH_PROBE_STEP(hash)  (duk_util_probe_steps[(hash) & 0x1f])
-
 #if defined(DUK_USE_GET_RANDOM_DOUBLE)
 #define DUK_UTIL_GET_RANDOM_DOUBLE(thr) DUK_USE_GET_RANDOM_DOUBLE((thr)->heap_udata)
 #else
@ -504,10 +500,6 @@ DUK_INTERNAL_DECL duk_uint8_t duk_util_probe_steps[32];
 DUK_INTERNAL_DECL duk_uint32_t duk_util_hashbytes(const duk_uint8_t *data, duk_size_t len, duk_uint32_t seed);
 #endif

-#if defined(DUK_USE_HOBJECT_HASH_PART)
-DUK_INTERNAL_DECL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size);
-#endif
-
 DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode(duk_bitdecoder_ctx *ctx, duk_small_int_t bits);
 DUK_INTERNAL_DECL duk_small_uint_t duk_bd_decode_flag(duk_bitdecoder_ctx *ctx);
 DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode_flagged(duk_bitdecoder_ctx *ctx, duk_small_int_t bits, duk_uint32_t def_value);
--- a/src-input/duk_util_hashprime.c
+++ b/src-input/duk_util_hashprime.c
@ -1,74 +0,0 @@
-/*
- *  Round a number upwards to a prime (not usually the nearest one).
- *
- *  Uses a table of successive 32-bit primes whose ratio is roughly
- *  constant.  This keeps the relative upwards 'rounding error' bounded
- *  and the data size small.  A simple 'predict-correct' compression is
- *  used to compress primes to one byte per prime.  See genhashsizes.py
- *  for details.
- *
- *  The minimum prime returned here must be coordinated with the possible
- *  probe sequence steps in duk_hobject and duk_heap stringtable.
- */
-
-#include "duk_internal.h"
-
-#if defined(DUK_USE_HOBJECT_HASH_PART)
-
-/* hash size ratio goal, must match genhashsizes.py */
-#define DUK__HASH_SIZE_RATIO   1177  /* floor(1.15 * (1 << 10)) */
-
-/* prediction corrections for prime list (see genhashsizes.py) */
-DUK_LOCAL const duk_int8_t duk__hash_size_corrections[] = {
-	17,  /* minimum prime */
-	4, 3, 4, 1, 4, 1, 1, 2, 2, 2, 2, 1, 6, 6, 9, 5, 1, 2, 2, 5, 1, 3, 3, 3,
-	5, 4, 4, 2, 4, 8, 3, 4, 23, 2, 4, 7, 8, 11, 2, 12, 15, 10, 1, 1, 5, 1, 5,
-	8, 9, 17, 14, 10, 7, 5, 2, 46, 21, 1, 9, 9, 4, 4, 10, 23, 36, 6, 20, 29,
-	18, 6, 19, 21, 16, 11, 5, 5, 48, 9, 1, 39, 14, 8, 4, 29, 9, 1, 15, 48, 12,
-	22, 6, 15, 27, 4, 2, 17, 28, 8, 9, 4, 5, 8, 3, 3, 8, 37, 11, 15, 8, 30,
-	43, 6, 33, 41, 5, 20, 32, 41, 38, 24, 77, 14, 19, 11, 4, 35, 18, 19, 41,
-	10, 23, 16, 9, 2,
-	-1
-};
-
-/* probe steps (see genhashsizes.py), currently assumed to be 32 entries long
- * (DUK_UTIL_GET_HASH_PROBE_STEP macro).
- */
-DUK_INTERNAL duk_uint8_t duk_util_probe_steps[32] = {
-	2, 3, 5, 7, 11, 13, 19, 31, 41, 47, 59, 67, 73, 79, 89, 101, 103, 107,
-	109, 127, 137, 139, 149, 157, 163, 167, 173, 181, 191, 193, 197, 199
-};
-
-DUK_INTERNAL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size) {
-	const duk_int8_t *p = duk__hash_size_corrections;
-	duk_uint32_t curr;
-
-	curr = (duk_uint32_t) *p++;
-	for (;;) {
-		duk_small_int_t t = (duk_small_int_t) *p++;
-		if (t < 0) {
-			/* may happen if size is very close to 2^32-1 */
-			break;
-		}
-
-		/* prediction: portable variant using doubles if 64-bit values not available */
-#if defined(DUK_USE_64BIT_OPS)
-		curr = (duk_uint32_t) ((((duk_uint64_t) curr) * ((duk_uint64_t) DUK__HASH_SIZE_RATIO)) >> 10);
-#else
-		/* 32-bit x 11-bit = 43-bit, fits accurately into a double */
-		curr = (duk_uint32_t) DUK_FLOOR(((double) curr) * ((double) DUK__HASH_SIZE_RATIO) / 1024.0);
-#endif
-
-		/* correction */
-		curr += t;
-
-		DUK_DDD(DUK_DDDPRINT("size=%ld, curr=%ld", (long) size, (long) curr));
-
-		if (curr >= size) {
-			return curr;
-		}
-	}
-	return 0;
-}
-
-#endif  /* DUK_USE_HOBJECT_HASH_PART */