From c8121ce34b39c6fd31899f4da91e26063c8af54f Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 12 Feb 2024 15:16:11 -0300 Subject: [PATCH] Revising code for Varint encoding in dumps - Usign lua_Unsigned to count strings. - Varint uses a type large enough both for size_t and lua_Unsigned. - Most-Significant Bit 0 means last byte, to conform to common usage. - (unrelated) Change in macro 'getaddr' so that multiplication is by constants. --- ldump.c | 35 ++++++++++++++++++----------------- lundump.c | 24 ++++++++++++------------ lundump.h | 15 +++++++++++++++ 3 files changed, 45 insertions(+), 29 deletions(-) diff --git a/ldump.c b/ldump.c index b31e7bc7..34cfb576 100644 --- a/ldump.c +++ b/ldump.c @@ -30,7 +30,7 @@ typedef struct { int strip; int status; Table *h; /* table to track saved strings */ - lua_Integer nstr; /* counter to number saved strings */ + lua_Unsigned nstr; /* counter to number saved strings */ } DumpState; @@ -83,26 +83,27 @@ static void dumpByte (DumpState *D, int y) { /* -** 'dumpSize' buffer size: each byte can store up to 7 bits. (The "+6" -** rounds up the division.) +** size for 'dumpVarint' buffer: each byte can store up to 7 bits. +** (The "+6" rounds up the division.) */ -#define DIBS ((sizeof(size_t) * CHAR_BIT + 6) / 7) +#define DIBS ((sizeof(varint_t) * CHAR_BIT + 6) / 7) -static void dumpSize (DumpState *D, size_t x) { +/* +** Dumps an unsigned integer using the MSB Varint encoding +*/ +static void dumpVarint (DumpState *D, varint_t x) { lu_byte buff[DIBS]; - int n = 0; - do { - buff[DIBS - (++n)] = x & 0x7f; /* fill buffer in reverse order */ - x >>= 7; - } while (x != 0); - buff[DIBS - 1] |= 0x80; /* mark last byte */ + int n = 1; + buff[DIBS - 1] = x & 0x7f; /* fill least-significant byte */ + while ((x >>= 7) != 0) /* fill other bytes in reverse order */ + buff[DIBS - (++n)] = (x & 0x7f) | 0x80; dumpVector(D, buff + DIBS - n, n); } static void dumpInt (DumpState *D, int x) { lua_assert(x >= 0); - dumpSize(D, x); + dumpVarint(D, x); } @@ -125,22 +126,22 @@ static void dumpInteger (DumpState *D, lua_Integer x) { */ static void dumpString (DumpState *D, TString *ts) { if (ts == NULL) - dumpSize(D, 0); + dumpVarint(D, 0); else { TValue idx; if (luaH_getstr(D->h, ts, &idx) == HOK) { /* string already saved? */ - dumpSize(D, 1); /* reuse a saved string */ - dumpInt(D, ivalue(&idx)); /* index of saved string */ + dumpVarint(D, 1); /* reuse a saved string */ + dumpVarint(D, l_castS2U(ivalue(&idx))); /* index of saved string */ } else { /* must write and save the string */ TValue key, value; /* to save the string in the hash */ size_t size; const char *s = getlstr(ts, size); - dumpSize(D, size + 2); + dumpVarint(D, size + 2); dumpVector(D, s, size + 1); /* include ending '\0' */ D->nstr++; /* one more saved string */ setsvalue(D->L, &key, ts); /* the string is the key */ - setivalue(&value, D->nstr); /* its index is the value */ + setivalue(&value, l_castU2S(D->nstr)); /* its index is the value */ luaH_set(D->L, D->h, &key, &value); /* h[ts] = nstr */ /* integer value does not need barrier */ } diff --git a/lundump.c b/lundump.c index b33258b0..d485f266 100644 --- a/lundump.c +++ b/lundump.c @@ -37,7 +37,7 @@ typedef struct { const char *name; Table *h; /* list for string reuse */ size_t offset; /* current position relative to beginning of dump */ - lua_Integer nstr; /* number of strings in the list */ + lua_Unsigned nstr; /* number of strings in the list */ lu_byte fixed; /* dump is fixed in memory */ } LoadState; @@ -71,10 +71,9 @@ static void loadAlign (LoadState *S, int align) { } -#define getaddr(S,n,t) cast(t *, getaddr_(S,n,sizeof(t))) +#define getaddr(S,n,t) cast(t *, getaddr_(S,(n) * sizeof(t))) -static const void *getaddr_ (LoadState *S, int n, size_t sz) { - size_t size = n * sz; +static const void *getaddr_ (LoadState *S, size_t size) { const void *block = luaZ_getaddr(S->Z, size); S->offset += size; if (block == NULL) @@ -95,8 +94,8 @@ static lu_byte loadByte (LoadState *S) { } -static size_t loadUnsigned (LoadState *S, size_t limit) { - size_t x = 0; +static varint_t loadVarint (LoadState *S, varint_t limit) { + varint_t x = 0; int b; limit >>= 7; do { @@ -104,18 +103,18 @@ static size_t loadUnsigned (LoadState *S, size_t limit) { if (x >= limit) error(S, "integer overflow"); x = (x << 7) | (b & 0x7f); - } while ((b & 0x80) == 0); + } while ((b & 0x80) != 0); return x; } static size_t loadSize (LoadState *S) { - return loadUnsigned(S, MAX_SIZET); + return cast_sizet(loadVarint(S, MAX_SIZET)); } static int loadInt (LoadState *S) { - return cast_int(loadUnsigned(S, INT_MAX)); + return cast_int(loadVarint(S, INT_MAX)); } @@ -149,9 +148,10 @@ static void loadString (LoadState *S, Proto *p, TString **sl) { return; } else if (size == 1) { /* previously saved string? */ - int idx = loadInt(S); /* get its index */ + /* get its index */ + lua_Unsigned idx = cast(lua_Unsigned, loadVarint(S, LUA_MAXUNSIGNED)); TValue stv; - luaH_getint(S->h, idx, &stv); + luaH_getint(S->h, l_castU2S(idx), &stv); /* get its value */ *sl = ts = tsvalue(&stv); luaC_objbarrier(L, p, ts); return; /* do not save it again */ @@ -175,7 +175,7 @@ static void loadString (LoadState *S, Proto *p, TString **sl) { /* add string to list of saved strings */ S->nstr++; setsvalue(L, &sv, ts); - luaH_setint(L, S->h, S->nstr, &sv); + luaH_setint(L, S->h, l_castU2S(S->nstr), &sv); luaC_objbarrierback(L, obj2gco(S->h), ts); } diff --git a/lundump.h b/lundump.h index b10307e4..ff66d2e7 100644 --- a/lundump.h +++ b/lundump.h @@ -7,6 +7,8 @@ #ifndef lundump_h #define lundump_h +#include + #include "llimits.h" #include "lobject.h" #include "lzio.h" @@ -25,6 +27,19 @@ #define LUAC_FORMAT 0 /* this is the official format */ + +/* +** Type to handle MSB Varint encoding: Try to get the largest unsigned +** integer available. (It was enough to be the largest between size_t and +** lua_Integer, but the C89 preprocessor knows nothing about size_t.) +*/ +#if !defined(LUA_USE_C89) && defined(LLONG_MAX) +typedef unsigned long long varint_t; +#else +typedef unsigned long varint_t; +#endif + + /* load one chunk; from lundump.c */ LUAI_FUNC LClosure* luaU_undump (lua_State* L, ZIO* Z, const char* name, int fixed);