Browse Source

new implementation for string hashing, with chaining.

v5-2
Roberto Ierusalimschy 25 years ago
parent
commit
c5fee7615e
  1. 10
      lapi.c
  2. 21
      lbuiltin.c
  3. 38
      lgc.c
  4. 8
      llex.c
  5. 21
      lobject.h
  6. 132
      lstring.c
  7. 14
      lstring.h

10
lapi.c

@ -1,5 +1,5 @@
/*
** $Id: lapi.c,v 1.51 1999/10/04 17:51:04 roberto Exp roberto $
** $Id: lapi.c,v 1.52 1999/10/07 19:04:30 roberto Exp roberto $
** Lua API
** See Copyright Notice in lua.h
*/
@ -393,11 +393,11 @@ TaggedString *luaA_nextvar (TaggedString *g) {
g = L->rootglobal; /* first variable */
else {
/* check whether name is in global var list */
luaL_arg_check(g != g->next, 1, "variable name expected");
g = g->next; /* get next */
luaL_arg_check(g != g->nextglobal, 1, "variable name expected");
g = g->nextglobal; /* get next */
}
while (g && g->u.s.globalval.ttype == LUA_T_NIL) /* skip globals with nil */
g = g->next;
g = g->nextglobal;
if (g) {
ttype(L->stack.top) = LUA_T_STRING; tsvalue(L->stack.top) = g;
incr_top;
@ -579,7 +579,7 @@ const char *lua_getobjname (lua_Object o, const char **name) {
/* try to find a name for given function */
TaggedString *g;
set_normalized(L->stack.top, Address(o)); /* to be accessed by "checkfunc" */
for (g=L->rootglobal; g; g=g->next) {
for (g=L->rootglobal; g; g=g->nextglobal) {
if (checkfunc(&g->u.s.globalval)) {
*name = g->str;
return "global";

21
lbuiltin.c

@ -1,5 +1,5 @@
/*
** $Id: lbuiltin.c,v 1.64 1999/10/04 17:51:04 roberto Exp roberto $
** $Id: lbuiltin.c,v 1.65 1999/10/07 19:04:30 roberto Exp roberto $
** Built-in functions
** See Copyright Notice in lua.h
*/
@ -447,7 +447,7 @@ static void luaB_foreachvar (void) {
TObject f; /* see comment in 'foreachi' */
f = *luaA_Address(luaL_functionarg(1));
luaD_checkstack(4); /* for extra var name, f, var name, and globalval */
for (s = L->rootglobal; s; s = s->next) {
for (s = L->rootglobal; s; s = s->nextglobal) {
if (s->u.s.globalval.ttype != LUA_T_NIL) {
pushtagstring(s); /* keep (extra) s on stack to avoid GC */
*(L->stack.top++) = f;
@ -606,6 +606,12 @@ static void mem_query (void) {
}
static void hash_query (void) {
const TObject *o = luaA_Address(luaL_nonnullarg(1));
lua_pushnumber(luaH_hashindex(o));
}
static void query_strings (void) {
int h = luaL_check_int(1) - 1;
int s = luaL_opt_int(2, 0) - 1;
@ -617,10 +623,10 @@ static void query_strings (void) {
}
else {
TaggedString *ts = L->string_root[h].hash[s];
if (ts == NULL) lua_pushstring("<NIL>");
else if (ts == &luaS_EMPTY) lua_pushstring("<EMPTY>");
else if (ts->constindex == -1) lua_pushstring("<USERDATA>");
else lua_pushstring(ts->str);
for (ts = L->string_root[h].hash[s]; ts; ts = ts->nexthash) {
if (ts->constindex == -1) lua_pushstring("<USERDATA>");
else lua_pushstring(ts->str);
}
}
}
@ -707,9 +713,10 @@ static void testC (void) {
static const struct luaL_reg builtin_funcs[] = {
#ifdef DEBUG
{"extra", extra_services},
{"hash", hash_query},
{"querystr", query_strings},
{"testC", testC},
{"totalmem", mem_query},
{"querystr", query_strings},
#endif
{"_ALERT", luaB_alert},
{"_ERRORMESSAGE", error_message},

38
lgc.c

@ -1,5 +1,5 @@
/*
** $Id: lgc.c,v 1.26 1999/09/27 18:00:25 roberto Exp roberto $
** $Id: lgc.c,v 1.27 1999/10/04 17:51:04 roberto Exp roberto $
** Garbage Collector
** See Copyright Notice in lua.h
*/
@ -64,7 +64,7 @@ static void hashmark (Hash *h) {
static void globalmark (void) {
TaggedString *g;
for (g=L->rootglobal; g; g=g->next) {
for (g=L->rootglobal; g; g=g->nextglobal) {
LUA_ASSERT(g->constindex >= 0, "userdata in global list");
if (g->u.s.globalval.ttype != LUA_T_NIL) {
markobject(&g->u.s.globalval);
@ -161,8 +161,8 @@ static void clear_global_list (void) {
TaggedString **p = &L->rootglobal;
TaggedString *next;
while ((next = *p) != NULL) {
if (next->marked) p = &next->next;
else *p = next->next;
if (next->marked) p = &next->nextglobal;
else *p = next->nextglobal;
}
}
@ -177,22 +177,28 @@ static void collectstring (int limit) {
int i;
ttype(&o) = LUA_T_USERDATA;
clear_global_list();
for (i=0; i<NUM_HASHS; i++) {
for (i=0; i<NUM_HASHS; i++) { /* for each hash table */
stringtable *tb = &L->string_root[i];
int j;
for (j=0; j<tb->size; j++) {
TaggedString *t = tb->hash[j];
if (t == NULL) continue;
if (t->marked < limit) {
if (t->constindex == -1) { /* is userdata? */
tsvalue(&o) = t;
luaD_gcIM(&o);
for (j=0; j<tb->size; j++) { /* for each list */
TaggedString **p = &tb->hash[j];
TaggedString *next;
while ((next = *p) != NULL) {
if (next->marked >= limit) {
if (next->marked < FIXMARK) /* does not change FIXMARKs */
next->marked = 0;
p = &next->nexthash;
}
else { /* collect */
if (next->constindex == -1) { /* is userdata? */
tsvalue(&o) = next;
luaD_gcIM(&o);
}
*p = next->nexthash;
luaS_free(next);
tb->nuse--;
}
luaS_free(t);
tb->hash[j] = &luaS_EMPTY;
}
else if (t->marked == 1)
t->marked = 0;
}
}
}

8
llex.c

@ -1,5 +1,5 @@
/*
** $Id: llex.c,v 1.39 1999/09/06 13:55:09 roberto Exp roberto $
** $Id: llex.c,v 1.40 1999/10/04 17:51:04 roberto Exp roberto $
** Lexical Analyzer
** See Copyright Notice in lua.h
*/
@ -37,7 +37,7 @@ void luaX_init (void) {
int i;
for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) {
TaggedString *ts = luaS_new(reserved[i]);
ts->marked = FIRST_RESERVED+i; /* reserved word (always > 255) */
ts->marked = RESERVEDMARK+i; /* reserved word */
}
}
@ -426,8 +426,8 @@ int luaX_lex (LexState *LS) {
} while (isalnum(LS->current) || LS->current == '_');
save('\0');
ts = luaS_new(L->Mbuffer+L->Mbuffbase);
if (ts->marked >= FIRST_RESERVED)
return ts->marked; /* reserved word */
if (ts->marked >= RESERVEDMARK) /* reserved word? */
return ts->marked-RESERVEDMARK+FIRST_RESERVED;
LS->seminfo.ts = ts;
return NAME;
}

21
lobject.h

@ -1,5 +1,5 @@
/*
** $Id: lobject.h,v 1.30 1999/09/06 20:34:18 roberto Exp roberto $
** $Id: lobject.h,v 1.31 1999/10/04 17:51:04 roberto Exp roberto $
** Type definitions for Lua objects
** See Copyright Notice in lua.h
*/
@ -14,13 +14,15 @@
#ifdef DEBUG
#include "lauxlib.h"
#define LUA_INTERNALERROR(s) \
luaL_verror("INTERNAL ERROR - %s [%s:%d]",(s),__FILE__,__LINE__)
#define LUA_ASSERT(c,s) { if (!(c)) LUA_INTERNALERROR(s); }
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <assert.h>
#define LUA_INTERNALERROR(s) assert(0)
#define LUA_ASSERT(c,s) assert(c)
#else
#define LUA_INTERNALERROR(s) /* empty */
#define LUA_ASSERT(c,s) /* empty */
#define LUA_INTERNALERROR(s) /* empty */
#define LUA_ASSERT(c,s) /* empty */
#endif
@ -90,8 +92,8 @@ typedef struct TObject {
*/
typedef struct TaggedString {
struct TaggedString *next;
int marked;
struct TaggedString *nexthash; /* chain hash table */
struct TaggedString *nextglobal; /* chain global variables */
unsigned long hash;
int constindex; /* hint to reuse constants (= -1 if this is a userdata) */
union {
@ -104,6 +106,7 @@ typedef struct TaggedString {
void *v; /* if this is a userdata, here is its value */
} d;
} u;
unsigned char marked;
char str[1]; /* \0 byte already reserved */
} TaggedString;

132
lstring.c

@ -1,5 +1,5 @@
/*
** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $
** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $
** String table (keeps all strings handled by Lua)
** See Copyright Notice in lua.h
*/
@ -19,11 +19,6 @@
TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0,
{{{LUA_T_NIL, {NULL}}, 0L}}, {0}};
/*
** to avoid hash tables with size = 0 (cannot hash with size=0), all
** hash tables are initialized with this `array'. Elements are never
@ -48,6 +43,7 @@ void luaS_init (void) {
void luaS_freeall (void) {
int i;
for (i=0; i<NUM_HASHS; i++) {
LUA_ASSERT(L->string_root[i].nuse==0, "non-empty string table");
if (L->string_root[i].hash != init_hash)
luaM_free(L->string_root[i].hash);
}
@ -56,43 +52,27 @@ void luaS_freeall (void) {
static unsigned long hash_s (const char *s, long l) {
unsigned long h = 0; /* seed */
unsigned long h = l; /* seed */
while (l--)
h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++));
return h;
}
static int newsize (const stringtable *tb) {
int realuse = 0;
int i;
/* count how many entries are really in use */
for (i=0; i<tb->size; i++) {
if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY)
realuse++;
}
return luaO_redimension(realuse*2);
}
static void grow (stringtable *tb) {
int ns = newsize(tb);
int ns = luaO_redimension(tb->size*2); /* new size */
TaggedString **newhash = luaM_newvector(ns, TaggedString *);
int i;
for (i=0; i<ns; i++)
newhash[i] = NULL;
for (i=0; i<ns; i++) newhash[i] = NULL;
/* rehash */
tb->nuse = 0;
for (i=0; i<tb->size; i++) {
if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) {
unsigned long h = tb->hash[i]->hash;
int h1 = h%ns;
while (newhash[h1]) {
h1 += (h&(ns-2)) + 1; /* double hashing */
if (h1 >= ns) h1 -= ns;
}
newhash[h1] = tb->hash[i];
tb->nuse++;
TaggedString *p = tb->hash[i];
while (p) { /* for each node in the list */
TaggedString *next = p->nexthash; /* save next */
int h = p->hash%ns; /* new position */
p->nexthash = newhash[h]; /* chain it in new position */
newhash[h] = p;
p = next;
}
}
luaM_free(tb->hash);
@ -101,91 +81,79 @@ static void grow (stringtable *tb) {
}
static TaggedString *newone (long l, unsigned long h) {
TaggedString *ts = (TaggedString *)luaM_malloc(
sizeof(TaggedString)+l*sizeof(char));
ts->marked = 0;
ts->nexthash = NULL;
ts->nextglobal = ts; /* signal it is not in global list */
ts->hash = h;
return ts;
}
static TaggedString *newone_s (const char *str, long l, unsigned long h) {
TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l);
TaggedString *ts = newone(l, h);
memcpy(ts->str, str, l);
ts->str[l] = 0; /* ending 0 */
ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */
ts->u.s.len = l;
ts->constindex = 0;
L->nblocks += gcsizestring(l);
ts->marked = 0;
ts->next = ts; /* signal it is in no list */
ts->hash = h;
return ts;
}
static TaggedString *newone_u (void *buff, int tag, unsigned long h) {
TaggedString *ts = luaM_new(TaggedString);
TaggedString *ts = newone(0, h);
ts->u.d.v = buff;
ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag;
ts->constindex = -1; /* tag -> this is a userdata */
L->nblocks++;
ts->marked = 0;
ts->next = ts; /* signal it is in no list */
ts->hash = h;
return ts;
}
static void newentry (stringtable *tb, TaggedString *ts, int h1) {
static void newentry (stringtable *tb, TaggedString *ts, int h) {
tb->nuse++;
if ((long)tb->nuse*3 < (long)tb->size*2) /* still have room? */
tb->hash[h1] = ts;
else { /* must grow */
if (tb->nuse >= tb->size) { /* no more room? */
if (tb->hash == init_hash) { /* cannot change init_hash */
LUA_ASSERT(h1==0, "`init_hash' has size 1");
LUA_ASSERT(h==0, "`init_hash' has size 1");
tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */
tb->hash[0] = NULL;
}
tb->hash[h1] = ts;
grow(tb);
h = ts->hash%tb->size; /* new hash position */
}
ts->nexthash = tb->hash[h]; /* chain new entry */
tb->hash[h] = ts;
}
static TaggedString *insert_s (const char *str, long l, stringtable *tb) {
static TaggedString *insert_s (const char *str, long l,
stringtable *tb, unsigned long h) {
int h1 = h%tb->size;
TaggedString *ts;
unsigned long h = hash_s(str, l);
int size = tb->size;
int j = -1; /* last empty place found (or -1) */
int h1 = h%size;
while ((ts = tb->hash[h1]) != NULL) {
if (ts == &luaS_EMPTY)
j = h1;
else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
return ts;
h1 += (h&(size-2)) + 1; /* double hashing */
if (h1 >= size) h1 -= size;
}
/* not found */
ts = newone_s(str, l, h); /* create new entry */
if (j != -1) /* is there an EMPTY space? */
tb->hash[j] = ts; /* use it for new element */
else
newentry(tb, ts, h1); /* no EMPTY places; must use a virgin one */
newentry(tb, ts, h1); /* insert it on table */
return ts;
}
static TaggedString *insert_u (void *buff, int tag, stringtable *tb) {
TaggedString *ts;
unsigned long h = (unsigned long)buff;
int size = tb->size;
int j = -1;
int h1 = h%size;
while ((ts = tb->hash[h1]) != NULL) {
if (ts == &luaS_EMPTY)
j = h1;
else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
int h1 = h%tb->size;
TaggedString *ts;
for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
return ts;
h1 += (h&(size-2)) + 1;
if (h1 >= size) h1 -= size;
}
/* not found */
ts = newone_u(buff, tag, h);
if (j != -1)
tb->hash[j] = ts;
else
newentry(tb, ts, h1);
newentry(tb, ts, h1);
return ts;
}
@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) {
}
TaggedString *luaS_newlstr (const char *str, long l) {
int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR;
return insert_s(str, l, &L->string_root[t]);
unsigned long h = hash_s(str, l);
return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h);
}
TaggedString *luaS_new (const char *str) {
@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) {
TaggedString *luaS_newfixedstring (const char *str) {
TaggedString *ts = luaS_new(str);
if (ts->marked == 0) ts->marked = 2; /* avoid GC */
if (ts->marked == 0) ts->marked = FIXMARK; /* avoid GC */
return ts;
}
@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) {
void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) {
ts->u.s.globalval = *newval;
if (ts->next == ts) { /* is not in list? */
ts->next = L->rootglobal;
if (ts->nextglobal == ts) { /* is not in list? */
ts->nextglobal = L->rootglobal;
L->rootglobal = ts;
}
}

14
lstring.h

@ -1,5 +1,5 @@
/*
** $Id: lstring.h,v 1.8 1999/08/16 20:52:00 roberto Exp roberto $
** $Id: lstring.h,v 1.9 1999/10/04 17:51:04 roberto Exp roberto $
** String table (keep all strings handled by Lua)
** See Copyright Notice in lua.h
*/
@ -11,12 +11,18 @@
#include "lobject.h"
#define NUM_HASHSTR 31
#define NUM_HASHUDATA 31
#define NUM_HASHSTR 31 /* a prime not in array `dimensions' */
#define NUM_HASHUDATA 31 /* idem */
#define NUM_HASHS (NUM_HASHSTR+NUM_HASHUDATA)
extern TaggedString luaS_EMPTY;
/*
** any taggedstring with mark>=FIXMARK is never collected.
** Marks>=RESERVEDMARK are used to identify reserved words.
*/
#define FIXMARK 2
#define RESERVEDMARK 3
void luaS_init (void);
TaggedString *luaS_createudata (void *udata, int tag);

Loading…
Cancel
Save