From cbae01620278f9b568805db16a96d0631ced473d Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 3 Jul 2023 14:12:54 -0300 Subject: [PATCH 1/7] Details --- lundump.h | 3 +-- testes/calls.lua | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lundump.h b/lundump.h index f3748a99..bc71ced8 100644 --- a/lundump.h +++ b/lundump.h @@ -21,8 +21,7 @@ /* ** Encode major-minor version in one byte, one nibble for each */ -#define MYINT(s) (s[0]-'0') /* assume one-digit numerals */ -#define LUAC_VERSION (MYINT(LUA_VERSION_MAJOR)*16+MYINT(LUA_VERSION_MINOR)) +#define LUAC_VERSION (LUA_VERSION_MAJOR_N*16+LUA_VERSION_MINOR_N) #define LUAC_FORMAT 0 /* this is the official format */ diff --git a/testes/calls.lua b/testes/calls.lua index 2d562a24..664be1b4 100644 --- a/testes/calls.lua +++ b/testes/calls.lua @@ -342,7 +342,7 @@ do -- another bug (in 5.4.0) end -do -- another bug (since 5.2) +if not _port then -- another bug (since 5.2) -- corrupted binary dump: list of upvalue names is larger than number -- of upvalues, overflowing the array of upvalues. local code = From 6b51133a988587f34ee9581d799ea9913581afd3 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 13 Jul 2023 14:55:46 -0300 Subject: [PATCH 2/7] Thread stacks resized in the atomic phase Although stack resize can be a little expensive, it seems unusual to have too many threads needing resize during one GC cycle. On the other hand, the change allows full collections to skip the propagate phase, going straight from a pause to the atomic phase. --- lgc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lgc.c b/lgc.c index dd824e77..f8f43393 100644 --- a/lgc.c +++ b/lgc.c @@ -638,7 +638,9 @@ static int traversethread (global_State *g, lua_State *th) { for (uv = th->openupval; uv != NULL; uv = uv->u.open.next) markobject(g, uv); /* open upvalues cannot be collected */ if (g->gcstate == GCSatomic) { /* final traversal? */ - for (; o < th->stack_last.p + EXTRA_STACK; o++) + if (!g->gcemergency) + luaD_shrinkstack(th); /* do not change stack in emergency cycle */ + for (o = th->top.p; o < th->stack_last.p + EXTRA_STACK; o++) setnilvalue(s2v(o)); /* clear dead stack slice */ /* 'remarkupvals' may have removed thread from 'twups' list */ if (!isintwups(th) && th->openupval != NULL) { @@ -646,8 +648,6 @@ static int traversethread (global_State *g, lua_State *th) { g->twups = th; } } - else if (!g->gcemergency) - luaD_shrinkstack(th); /* do not change stack in emergency cycle */ return 1 + stacksize(th); } @@ -1710,6 +1710,8 @@ static void fullinc (lua_State *L, global_State *g) { entersweep(L); /* sweep everything to turn them back to white */ /* finish any pending sweep phase to start a new cycle */ luaC_runtilstate(L, bitmask(GCSpause)); + luaC_runtilstate(L, bitmask(GCSpropagate)); /* start new cycle */ + g->gcstate = GCSenteratomic; /* go straight to atomic phase ??? */ luaC_runtilstate(L, bitmask(GCScallfin)); /* run up to finalizers */ /* estimate must be correct after a full GC cycle */ lua_assert(g->GCestimate == gettotalbytes(g)); From 1b3f507f620d996ffb69da7476a19251acfb89ca Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Tue, 25 Jul 2023 16:50:44 -0300 Subject: [PATCH 3/7] Bug: Call hook may be called twice when count hook yields Took the opportunity and moved the code that controls call hooks in 'luaV_execute' into a function. --- ldebug.c | 22 ++++++++++++++++++++++ ldebug.h | 1 + lstate.h | 2 +- lvm.c | 13 +++---------- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ldebug.c b/ldebug.c index 28b1caab..195d02f8 100644 --- a/ldebug.c +++ b/ldebug.c @@ -865,6 +865,28 @@ static int changedline (const Proto *p, int oldpc, int newpc) { } +/* +** Traces Lua calls. If code is running the first instruction of a function, +** and function is not vararg, and it is not coming from an yield, +** calls 'luaD_hookcall'. (Vararg functions will call 'luaD_hookcall' +** after adjusting its variable arguments; otherwise, they could call +** a line/count hook before the call hook. Functions coming from +** an yield already called 'luaD_hookcall' before yielding.) +*/ +int luaG_tracecall (lua_State *L) { + CallInfo *ci = L->ci; + Proto *p = ci_func(ci)->p; + ci->u.l.trap = 1; /* ensure hooks will be checked */ + if (ci->u.l.savedpc == p->code) { /* first instruction (not resuming)? */ + if (p->is_vararg) + return 0; /* hooks will start at VARARGPREP instruction */ + else if (!(ci->callstatus & CIST_HOOKYIELD)) /* not yieded? */ + luaD_hookcall(L, ci); /* check 'call' hook */ + } + return 1; /* keep 'trap' on */ +} + + /* ** Traces the execution of a Lua function. Called before the execution ** of each opcode, when debug is on. 'L->oldpc' stores the last diff --git a/ldebug.h b/ldebug.h index 2c3074c6..2bfce3cb 100644 --- a/ldebug.h +++ b/ldebug.h @@ -58,6 +58,7 @@ LUAI_FUNC const char *luaG_addinfo (lua_State *L, const char *msg, TString *src, int line); LUAI_FUNC l_noret luaG_errormsg (lua_State *L); LUAI_FUNC int luaG_traceexec (lua_State *L, const Instruction *pc); +LUAI_FUNC int luaG_tracecall (lua_State *L); #endif diff --git a/lstate.h b/lstate.h index 40ff89aa..007704c8 100644 --- a/lstate.h +++ b/lstate.h @@ -181,7 +181,7 @@ struct CallInfo { union { struct { /* only for Lua functions */ const Instruction *savedpc; - volatile l_signalT trap; + volatile l_signalT trap; /* function is tracing lines/counts */ int nextraargs; /* # of extra arguments in vararg functions */ } l; struct { /* only for C functions */ diff --git a/lvm.c b/lvm.c index 2b437bdf..a98aaceb 100644 --- a/lvm.c +++ b/lvm.c @@ -1157,18 +1157,11 @@ void luaV_execute (lua_State *L, CallInfo *ci) { startfunc: trap = L->hookmask; returning: /* trap already set */ - cl = clLvalue(s2v(ci->func.p)); + cl = ci_func(ci); k = cl->p->k; pc = ci->u.l.savedpc; - if (l_unlikely(trap)) { - if (pc == cl->p->code) { /* first instruction (not resuming)? */ - if (cl->p->is_vararg) - trap = 0; /* hooks will start after VARARGPREP instruction */ - else /* check 'call' hook */ - luaD_hookcall(L, ci); - } - ci->u.l.trap = 1; /* assume trap is on, for now */ - } + if (l_unlikely(trap)) + trap = luaG_tracecall(L); base = ci->func.p + 1; /* main loop of interpreter */ for (;;) { From f4211a5ea4e235ccfa8b8dfa46031c23e9e839e2 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 17 Aug 2023 10:42:56 -0300 Subject: [PATCH 4/7] More control over encoding of test files The few UTF-8 test files are commented as such, and there is only one non UTF-8 test file (to test non UTF-8 sources). --- testes/db.lua | 6 ++--- testes/files.lua | 8 +++---- testes/pm.lua | 56 +++++++++++++++++++++++++++++----------------- testes/sort.lua | 2 +- testes/strings.lua | 3 +++ testes/utf8.lua | 2 ++ 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/testes/db.lua b/testes/db.lua index 67b58934..d3758c41 100644 --- a/testes/db.lua +++ b/testes/db.lua @@ -345,7 +345,7 @@ function f(a,b) local _, y = debug.getlocal(1, 2) assert(x == a and y == b) assert(debug.setlocal(2, 3, "pera") == "AA".."AA") - assert(debug.setlocal(2, 4, "maçã") == "B") + assert(debug.setlocal(2, 4, "manga") == "B") x = debug.getinfo(2) assert(x.func == g and x.what == "Lua" and x.name == 'g' and x.nups == 2 and string.find(x.source, "^@.*db%.lua$")) @@ -373,9 +373,9 @@ function g (...) local arg = {...} do local a,b,c; a=math.sin(40); end local feijao - local AAAA,B = "xuxu", "mamão" + local AAAA,B = "xuxu", "abacate" f(AAAA,B) - assert(AAAA == "pera" and B == "maçã") + assert(AAAA == "pera" and B == "manga") do local B = 13 local x,y = debug.getlocal(1,5) diff --git a/testes/files.lua b/testes/files.lua index be00bf3f..1476006e 100644 --- a/testes/files.lua +++ b/testes/files.lua @@ -92,8 +92,8 @@ assert(io.output():seek("end") == string.len("alo joao")) assert(io.output():seek("set") == 0) -assert(io.write('"álo"', "{a}\n", "second line\n", "third line \n")) -assert(io.write('çfourth_line')) +assert(io.write('"alo"', "{a}\n", "second line\n", "third line \n")) +assert(io.write('Xfourth_line')) io.output(io.stdout) collectgarbage() -- file should be closed by GC assert(io.input() == io.stdin and rawequal(io.output(), io.stdout)) @@ -300,14 +300,14 @@ do -- test error returns end checkerr("invalid format", io.read, "x") assert(io.read(0) == "") -- not eof -assert(io.read(5, 'l') == '"álo"') +assert(io.read(5, 'l') == '"alo"') assert(io.read(0) == "") assert(io.read() == "second line") local x = io.input():seek() assert(io.read() == "third line ") assert(io.input():seek("set", x)) assert(io.read('L') == "third line \n") -assert(io.read(1) == "ç") +assert(io.read(1) == "X") assert(io.read(string.len"fourth_line") == "fourth_line") assert(io.input():seek("cur", -string.len"fourth_line")) assert(io.read() == "fourth_line") diff --git a/testes/pm.lua b/testes/pm.lua index 795596d4..44454dff 100644 --- a/testes/pm.lua +++ b/testes/pm.lua @@ -1,6 +1,9 @@ -- $Id: testes/pm.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + + print('testing pattern matching') local function checkerror (msg, f, ...) @@ -50,6 +53,19 @@ assert(f('aLo_ALO', '%a*') == 'aLo') assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu") + +-- Adapt a pattern to UTF-8 +local function PU (p) + -- break '?' into each individual byte of a character + p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c) + return string.gsub(c, ".", "%0?") + end) + -- change '.' to utf-8 character patterns + p = string.gsub(p, "%.", utf8.charpattern) + return p +end + + assert(f('aaab', 'a*') == 'aaa'); assert(f('aaa', '^.*$') == 'aaa'); assert(f('aaa', 'b*') == ''); @@ -73,16 +89,16 @@ assert(f('aaa', '^.-$') == 'aaa') assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab') assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab') assert(f('alo xo', '.o$') == 'xo') -assert(f(' \n isto é assim', '%S%S*') == 'isto') -assert(f(' \n isto é assim', '%S*$') == 'assim') -assert(f(' \n isto é assim', '[a-z]*$') == 'assim') +assert(f(' \n isto é assim', '%S%S*') == 'isto') +assert(f(' \n isto é assim', '%S*$') == 'assim') +assert(f(' \n isto é assim', '[a-z]*$') == 'assim') assert(f('um caracter ? extra', '[^%sa-z]') == '?') assert(f('', 'a?') == '') -assert(f('á', 'á?') == 'á') -assert(f('ábl', 'á?b?l?') == 'ábl') -assert(f(' ábl', 'á?b?l?') == '') +assert(f('á', PU'á?') == 'á') +assert(f('ábl', PU'á?b?l?') == 'ábl') +assert(f(' ábl', PU'á?b?l?') == '') assert(f('aa', '^aa?a?a') == 'aa') -assert(f(']]]áb', '[^]]') == 'á') +assert(f(']]]áb', '[^]]+') == 'áb') assert(f("0alo alo", "%x*") == "0a") assert(f("alo alo", "%C+") == "alo alo") print('+') @@ -136,28 +152,28 @@ assert(string.match("alo xyzK", "(%w+)K") == "xyz") assert(string.match("254 K", "(%d*)K") == "") assert(string.match("alo ", "(%w*)$") == "") assert(not string.match("alo ", "(%w+)$")) -assert(string.find("(álo)", "%(á") == 1) -local a, b, c, d, e = string.match("âlo alo", "^(((.).).* (%w*))$") -assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) +assert(string.find("(álo)", "%(á") == 1) +local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$") +assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) a, b, c, d = string.match('0123456789', '(.+(.?)())') assert(a == '0123456789' and b == '' and c == 11 and d == nil) print('+') -assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') -assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim +assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') +assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ') -local t = "abç d" -a, b = string.gsub(t, '(.)', '%1@') -assert('@'..a == string.gsub(t, '', '@') and b == 5) -a, b = string.gsub('abçd', '(.)', '%0@', 2) -assert(a == 'a@b@çd' and b == 2) +local t = "abç d" +a, b = string.gsub(t, PU'(.)', '%1@') +assert(a == "a@b@ç@ @d@" and b == 5) +a, b = string.gsub('abçd', PU'(.)', '%0@', 2) +assert(a == 'a@b@çd' and b == 2) assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o') assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == "xyz=abc-abc=xyz") assert(string.gsub("abc", "%w", "%1%0") == "aabbcc") assert(string.gsub("abc", "%w+", "%0%1") == "abcabc") -assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') +assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') assert(string.gsub('', '^', 'r') == 'r') assert(string.gsub('', '$', 'r') == 'r') print('+') @@ -188,8 +204,8 @@ do end function f(a,b) return string.gsub(a,'.',b) end -assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == - "trocar tudo em bbbbb é alalalalalal") +assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == + "trocar tudo em bbbbb é alalalalalal") local function dostring (s) return load(s, "")() or "" end assert(string.gsub("alo $a='x'$ novamente $return a$", diff --git a/testes/sort.lua b/testes/sort.lua index 52919b8c..40bb2d8a 100644 --- a/testes/sort.lua +++ b/testes/sort.lua @@ -289,7 +289,7 @@ timesort(a, limit, function(x,y) return nil end, "equal") for i,v in pairs(a) do assert(v == false) end -AA = {"álo", "\0first :-)", "alo", "then this one", "45", "and a new"} +AA = {"\xE1lo", "\0first :-)", "alo", "then this one", "45", "and a new"} table.sort(AA) check(AA) diff --git a/testes/strings.lua b/testes/strings.lua index b033c6ab..90983edd 100644 --- a/testes/strings.lua +++ b/testes/strings.lua @@ -1,6 +1,9 @@ -- $Id: testes/strings.lua $ -- See Copyright Notice in file all.lua +-- ISO Latin encoding + + print('testing strings and string library') local maxi = math.maxinteger diff --git a/testes/utf8.lua b/testes/utf8.lua index c5a9dd3f..efadbd5c 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua @@ -1,6 +1,8 @@ -- $Id: testes/utf8.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + print "testing UTF-8 library" local utf8 = require'utf8' From 9b4f39ab14fb2e55345c3d23537d129dac23b091 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 17 Aug 2023 15:59:28 -0300 Subject: [PATCH 5/7] More disciplined use of 'getstr' and 'tsslen' We may want to add other string variants in the future; this change documents better where the code may need to handle those variants. --- lapi.c | 4 ++-- ldebug.c | 6 +++--- lgc.c | 8 +++++--- lobject.c | 2 +- lobject.h | 18 ++++++++---------- lstate.c | 2 +- lstring.c | 11 ++++++----- lundump.c | 2 +- lvm.c | 17 ++++++++++------- 9 files changed, 37 insertions(+), 33 deletions(-) diff --git a/lapi.c b/lapi.c index 34e64af1..332e97d1 100644 --- a/lapi.c +++ b/lapi.c @@ -417,9 +417,9 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) { o = index2value(L, idx); /* previous call may reallocate the stack */ } if (len != NULL) - *len = vslen(o); + *len = tsslen(tsvalue(o)); lua_unlock(L); - return svalue(o); + return getstr(tsvalue(o)); } diff --git a/ldebug.c b/ldebug.c index 195d02f8..690ac38f 100644 --- a/ldebug.c +++ b/ldebug.c @@ -426,7 +426,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, */ static void kname (const Proto *p, int c, const char **name) { TValue *kvalue = &p->k[c]; - *name = (ttisstring(kvalue)) ? svalue(kvalue) : "?"; + *name = (ttisstring(kvalue)) ? getstr(tsvalue(kvalue)) : "?"; } @@ -569,7 +569,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, int b = (op == OP_LOADK) ? GETARG_Bx(i) : GETARG_Ax(p->code[pc + 1]); if (ttisstring(&p->k[b])) { - *name = svalue(&p->k[b]); + *name = getstr(tsvalue(&p->k[b])); return "constant"; } break; @@ -627,7 +627,7 @@ static const char *funcnamefromcode (lua_State *L, const Proto *p, default: return NULL; /* cannot find a reasonable name */ } - *name = getstr(G(L)->tmname[tm]) + 2; + *name = getshrstr(G(L)->tmname[tm]) + 2; return "metamethod"; } diff --git a/lgc.c b/lgc.c index f8f43393..253a2892 100644 --- a/lgc.c +++ b/lgc.c @@ -542,10 +542,12 @@ static void traversestrongtable (global_State *g, Table *h) { static lu_mem traversetable (global_State *g, Table *h) { const char *weakkey, *weakvalue; const TValue *mode = gfasttm(g, h->metatable, TM_MODE); + TString *smode; markobjectN(g, h->metatable); - if (mode && ttisstring(mode) && /* is there a weak mode? */ - (cast_void(weakkey = strchr(svalue(mode), 'k')), - cast_void(weakvalue = strchr(svalue(mode), 'v')), + if (mode && ttisshrstring(mode) && /* is there a weak mode? */ + (cast_void(smode = tsvalue(mode)), + cast_void(weakkey = strchr(getshrstr(smode), 'k')), + cast_void(weakvalue = strchr(getshrstr(smode), 'v')), (weakkey || weakvalue))) { /* is really weak? */ if (!weakkey) /* strong keys? */ traverseweakvalue(g, h); diff --git a/lobject.c b/lobject.c index f73ffc6d..9cfa5227 100644 --- a/lobject.c +++ b/lobject.c @@ -542,7 +542,7 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { addstr2buff(&buff, fmt, strlen(fmt)); /* rest of 'fmt' */ clearbuff(&buff); /* empty buffer into the stack */ lua_assert(buff.pushed == 1); - return svalue(s2v(L->top.p - 1)); + return getstr(tsvalue(s2v(L->top.p - 1))); } diff --git a/lobject.h b/lobject.h index 556608e4..980e42f8 100644 --- a/lobject.h +++ b/lobject.h @@ -386,7 +386,7 @@ typedef struct GCObject { typedef struct TString { CommonHeader; lu_byte extra; /* reserved words for short strings; "has hash" for longs */ - lu_byte shrlen; /* length for short strings */ + lu_byte shrlen; /* length for short strings, 0xFF for long strings */ unsigned int hash; union { size_t lnglen; /* length for long strings */ @@ -398,19 +398,17 @@ typedef struct TString { /* -** Get the actual string (array of bytes) from a 'TString'. +** Get the actual string (array of bytes) from a 'TString'. (Generic +** version and specialized versions for long and short strings.) */ -#define getstr(ts) ((ts)->contents) +#define getstr(ts) ((ts)->contents) +#define getlngstr(ts) check_exp((ts)->shrlen == 0xFF, (ts)->contents) +#define getshrstr(ts) check_exp((ts)->shrlen != 0xFF, (ts)->contents) -/* get the actual string (array of bytes) from a Lua value */ -#define svalue(o) getstr(tsvalue(o)) - /* get string length from 'TString *s' */ -#define tsslen(s) ((s)->tt == LUA_VSHRSTR ? (s)->shrlen : (s)->u.lnglen) - -/* get string length from 'TValue *o' */ -#define vslen(o) tsslen(tsvalue(o)) +#define tsslen(s) \ + ((s)->shrlen != 0xFF ? (s)->shrlen : (s)->u.lnglen) /* }================================================================== */ diff --git a/lstate.c b/lstate.c index 06667dac..7fefacba 100644 --- a/lstate.c +++ b/lstate.c @@ -433,7 +433,7 @@ void luaE_warning (lua_State *L, const char *msg, int tocont) { void luaE_warnerror (lua_State *L, const char *where) { TValue *errobj = s2v(L->top.p - 1); /* error object */ const char *msg = (ttisstring(errobj)) - ? svalue(errobj) + ? getstr(tsvalue(errobj)) : "error object is not a string"; /* produce warning "error in %s (%s)" (where, msg) */ luaE_warning(L, "error in ", 1); diff --git a/lstring.c b/lstring.c index 13dcaf42..1032ad86 100644 --- a/lstring.c +++ b/lstring.c @@ -36,7 +36,7 @@ int luaS_eqlngstr (TString *a, TString *b) { lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR); return (a == b) || /* same instance or... */ ((len == b->u.lnglen) && /* equal length and ... */ - (memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */ + (memcmp(getlngstr(a), getlngstr(b), len) == 0)); /* equal contents */ } @@ -52,7 +52,7 @@ unsigned int luaS_hashlongstr (TString *ts) { lua_assert(ts->tt == LUA_VLNGSTR); if (ts->extra == 0) { /* no hash? */ size_t len = ts->u.lnglen; - ts->hash = luaS_hash(getstr(ts), len, ts->hash); + ts->hash = luaS_hash(getlngstr(ts), len, ts->hash); ts->extra = 1; /* now it has its hash */ } return ts->hash; @@ -157,6 +157,7 @@ static TString *createstrobj (lua_State *L, size_t l, int tag, unsigned int h) { TString *luaS_createlngstrobj (lua_State *L, size_t l) { TString *ts = createstrobj(L, l, LUA_VLNGSTR, G(L)->seed); ts->u.lnglen = l; + ts->shrlen = 0xFF; /* signals that it is a long string */ return ts; } @@ -193,7 +194,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { TString **list = &tb->hash[lmod(h, tb->size)]; lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ for (ts = *list; ts != NULL; ts = ts->u.hnext) { - if (l == ts->shrlen && (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) { + if (l == ts->shrlen && (memcmp(str, getshrstr(ts), l * sizeof(char)) == 0)) { /* found! */ if (isdead(g, ts)) /* dead (but not collected yet)? */ changewhite(ts); /* resurrect it */ @@ -206,7 +207,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { list = &tb->hash[lmod(h, tb->size)]; /* rehash with new size */ } ts = createstrobj(L, l, LUA_VSHRSTR, h); - memcpy(getstr(ts), str, l * sizeof(char)); + memcpy(getshrstr(ts), str, l * sizeof(char)); ts->shrlen = cast_byte(l); ts->u.hnext = *list; *list = ts; @@ -226,7 +227,7 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { if (l_unlikely(l >= (MAX_SIZE - sizeof(TString))/sizeof(char))) luaM_toobig(L); ts = luaS_createlngstrobj(L, l); - memcpy(getstr(ts), str, l * sizeof(char)); + memcpy(getlngstr(ts), str, l * sizeof(char)); return ts; } } diff --git a/lundump.c b/lundump.c index 02aed64f..f1852c35 100644 --- a/lundump.c +++ b/lundump.c @@ -122,7 +122,7 @@ static TString *loadStringN (LoadState *S, Proto *p) { ts = luaS_createlngstrobj(L, size); /* create string */ setsvalue2s(L, L->top.p, ts); /* anchor it ('loadVector' can GC) */ luaD_inctop(L); - loadVector(S, getstr(ts), size); /* load directly in final place */ + loadVector(S, getlngstr(ts), size); /* load directly in final place */ L->top.p--; /* pop string */ } luaC_objbarrier(L, p, ts); diff --git a/lvm.c b/lvm.c index a98aaceb..4d71cfff 100644 --- a/lvm.c +++ b/lvm.c @@ -91,8 +91,10 @@ static int l_strton (const TValue *obj, TValue *result) { lua_assert(obj != result); if (!cvt2num(obj)) /* is object not a string? */ return 0; - else - return (luaO_str2num(svalue(obj), result) == vslen(obj) + 1); + else { + TString *st = tsvalue(obj); + return (luaO_str2num(getstr(st), result) == tsslen(st) + 1); + } } @@ -626,8 +628,9 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) { static void copy2buff (StkId top, int n, char *buff) { size_t tl = 0; /* size already copied */ do { - size_t l = vslen(s2v(top - n)); /* length of string being copied */ - memcpy(buff + tl, svalue(s2v(top - n)), l * sizeof(char)); + TString *st = tsvalue(s2v(top - n)); + size_t l = tsslen(st); /* length of string being copied */ + memcpy(buff + tl, getstr(st), l * sizeof(char)); tl += l; } while (--n > 0); } @@ -653,11 +656,11 @@ void luaV_concat (lua_State *L, int total) { } else { /* at least two non-empty string values; get as many as possible */ - size_t tl = vslen(s2v(top - 1)); + size_t tl = tsslen(tsvalue(s2v(top - 1))); TString *ts; /* collect total length and number of strings */ for (n = 1; n < total && tostring(L, s2v(top - n - 1)); n++) { - size_t l = vslen(s2v(top - n - 1)); + size_t l = tsslen(tsvalue(s2v(top - n - 1))); if (l_unlikely(l >= (MAX_SIZE/sizeof(char)) - tl)) { L->top.p = top - total; /* pop strings to avoid wasting stack */ luaG_runerror(L, "string length overflow"); @@ -671,7 +674,7 @@ void luaV_concat (lua_State *L, int total) { } else { /* long string; copy strings directly to final result */ ts = luaS_createlngstrobj(L, tl); - copy2buff(top, n, getstr(ts)); + copy2buff(top, n, getlngstr(ts)); } setsvalue2s(L, top - n, ts); /* create result */ } From 5ab6a5756b3c50c99f1388885e9a48a7da8cbe2d Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 23 Aug 2023 13:49:27 -0300 Subject: [PATCH 6/7] Bug: Wrong line number for function calls --- lparser.c | 12 ++++++------ testes/errors.lua | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lparser.c b/lparser.c index b745f236..2b888c7c 100644 --- a/lparser.c +++ b/lparser.c @@ -1022,10 +1022,11 @@ static int explist (LexState *ls, expdesc *v) { } -static void funcargs (LexState *ls, expdesc *f, int line) { +static void funcargs (LexState *ls, expdesc *f) { FuncState *fs = ls->fs; expdesc args; int base, nparams; + int line = ls->linenumber; switch (ls->t.token) { case '(': { /* funcargs -> '(' [ explist ] ')' */ luaX_next(ls); @@ -1063,8 +1064,8 @@ static void funcargs (LexState *ls, expdesc *f, int line) { } init_exp(f, VCALL, luaK_codeABC(fs, OP_CALL, base, nparams+1, 2)); luaK_fixline(fs, line); - fs->freereg = base+1; /* call remove function and arguments and leaves - (unless changed) one result */ + fs->freereg = base+1; /* call removes function and arguments and leaves + one result (unless changed later) */ } @@ -1103,7 +1104,6 @@ static void suffixedexp (LexState *ls, expdesc *v) { /* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ FuncState *fs = ls->fs; - int line = ls->linenumber; primaryexp(ls, v); for (;;) { switch (ls->t.token) { @@ -1123,12 +1123,12 @@ static void suffixedexp (LexState *ls, expdesc *v) { luaX_next(ls); codename(ls, &key); luaK_self(fs, v, &key); - funcargs(ls, v, line); + funcargs(ls, v); break; } case '(': case TK_STRING: case '{': { /* funcargs */ luaK_exp2nextreg(fs, v); - funcargs(ls, v, line); + funcargs(ls, v); break; } default: return; diff --git a/testes/errors.lua b/testes/errors.lua index bf6f389d..b777a329 100644 --- a/testes/errors.lua +++ b/testes/errors.lua @@ -392,19 +392,19 @@ lineerror("a\n=\n-\n\nprint\n;", 3) lineerror([[ a -( +( -- << 23) -]], 1) +]], 2) lineerror([[ local a = {x = 13} a . x -( +( -- << 23 ) -]], 2) +]], 5) lineerror([[ local a = {x = 13} From 9363a8b9901a5643c9da061ea8dda8a86cdc7ef1 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 23 Aug 2023 13:50:38 -0300 Subject: [PATCH 7/7] Documentation for "LUA_NOENV" Registry field "LUA_NOENV" (that signals to libraries that option -E is on) now part of the "official" API of Lua stand-alone. --- manual/manual.of | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/manual/manual.of b/manual/manual.of index f8d8ddd4..ad120f5e 100644 --- a/manual/manual.of +++ b/manual/manual.of @@ -9026,6 +9026,10 @@ Lua does not consult any environment variables. In particular, the values of @Lid{package.path} and @Lid{package.cpath} are set with the default paths defined in @id{luaconf.h}. +To signal to the libraries that this option is on, +the stand-alone interpreter sets the field +@idx{"LUA_NOENV"} in the registry to a true value. +Other libraries may consult this field for the same purpose. The options @T{-e}, @T{-l}, and @T{-W} are handled in the order they appear.