Browse Source

Merge branch 'add-compiler-doc'

Add a cleaned up, relatively old compiler design document.
pull/297/head
Sami Vaarala 9 years ago
parent
commit
e55e469c68
  1. 2820
      doc/compiler.rst
  2. 6
      src/duk_api_public.h.in
  3. 2
      src/duk_api_stack.c
  4. 2
      src/duk_api_string.c
  5. 4
      src/duk_error_augment.c
  6. 2
      src/duk_hobject_pc2line.c
  7. 4
      src/duk_hobject_props.c
  8. 4
      src/duk_js_call.c
  9. 24
      src/duk_js_compiler.c
  10. 2
      src/duk_js_compiler.h
  11. 2
      src/duk_js_executor.c
  12. 8
      src/duk_js_var.c
  13. 4
      src/duk_numconv.c
  14. 8
      src/duk_regexp_compiler.c
  15. 2
      src/duk_regexp_executor.c
  16. 2
      src/duk_tval.h
  17. 29
      tests/ecmascript/test-dev-compiler-dynamic-scope.js
  18. 16
      website/api/duk_compile.yaml
  19. 2
      website/guide/performance.html

2820
doc/compiler.rst

File diff suppressed because it is too large

6
src/duk_api_public.h.in

@ -154,9 +154,9 @@ struct duk_number_list_entry {
#define DUK_ENUM_NO_PROXY_BEHAVIOR (1 << 5) /* enumerate a proxy object itself without invoking proxy behavior */
/* Compilation flags for duk_compile() and duk_eval() */
#define DUK_COMPILE_EVAL (1 << 0) /* compile eval code (instead of program) */
#define DUK_COMPILE_FUNCTION (1 << 1) /* compile function code (instead of program) */
#define DUK_COMPILE_STRICT (1 << 2) /* use strict (outer) context for program, eval, or function */
#define DUK_COMPILE_EVAL (1 << 0) /* compile eval code (instead of global code) */
#define DUK_COMPILE_FUNCTION (1 << 1) /* compile function code (instead of global code) */
#define DUK_COMPILE_STRICT (1 << 2) /* use strict (outer) context for global, eval, or function code */
#define DUK_COMPILE_SAFE (1 << 3) /* (internal) catch compilation errors */
#define DUK_COMPILE_NORESULT (1 << 4) /* (internal) omit eval result */
#define DUK_COMPILE_NOSOURCE (1 << 5) /* (internal) no source string on stack */

2
src/duk_api_stack.c

@ -512,7 +512,7 @@ DUK_LOCAL duk_bool_t duk__resize_valstack(duk_context *ctx, duk_size_t new_size)
*
* Note: cannot use a plain DUK_REALLOC() because a mark-and-sweep may
* invalidate the original thr->valstack base pointer inside the realloc
* process. See doc/memory-management.txt.
* process. See doc/memory-management.rst.
*/
new_alloc_size = sizeof(duk_tval) * new_size;

2
src/duk_api_string.c

@ -31,7 +31,7 @@ DUK_LOCAL void duk__concat_and_join_helper(duk_context *ctx, duk_idx_t count_in,
h = duk_to_hstring(ctx, -((duk_idx_t) count) - 1);
DUK_ASSERT(h != NULL);
/* A bit tricky overflow test, see doc/code-issues.txt. */
/* A bit tricky overflow test, see doc/code-issues.rst. */
t1 = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h);
t2 = (duk_size_t) (count - 1);
limit = (duk_size_t) DUK_HSTRING_MAX_BYTELEN;

4
src/duk_error_augment.c

@ -48,7 +48,7 @@
* The user error handler is stored in 'Duktape.errCreate' or
* 'Duktape.errThrow' depending on whether we're augmenting the error at
* creation or throw time. There are several alternatives to this approach,
* see doc/error-objects.txt for discussion.
* see doc/error-objects.rst for discussion.
*
* Note: since further longjmp()s may occur while calling the error handler
* (for many reasons, e.g. a labeled 'break' inside the handler), the
@ -174,7 +174,7 @@ DUK_LOCAL void duk__add_traceback(duk_hthread *thr, duk_hthread *thr_callstack,
* and cheap to create. It may change arbitrarily from version to version.
* It should be decoded/accessed through version specific accessors only.
*
* See doc/error-objects.txt.
* See doc/error-objects.rst.
*/
DUK_DDD(DUK_DDDPRINT("adding traceback to object: %!T",

2
src/duk_hobject_pc2line.c

@ -4,7 +4,7 @@
*
* The run-time pc2line data is bit-packed, and documented in:
*
* doc/function-objects.txt
* doc/function-objects.rst
*/
#include "duk_internal.h"

4
src/duk_hobject_props.c

@ -2,8 +2,8 @@
* Hobject property set/get functionality.
*
* This is very central functionality for size, performance, and compliance.
* It is also rather intricate; see hobject-algorithms.txt for discussion on
* the algorithms and memory-management.txt for discussion on refcounts and
* It is also rather intricate; see hobject-algorithms.rst for discussion on
* the algorithms and memory-management.rst for discussion on refcounts and
* side effect issues.
*
* Notes:

4
src/duk_js_call.c

@ -804,7 +804,7 @@ void duk__adjust_valstack_and_top(duk_hthread *thr, duk_idx_t num_stack_args, du
* indices) cause an error to propagate out of this function. If there is
* no catchpoint for this error, the fatal error handler is called.
*
* See 'execution.txt'.
* See 'execution.rst'.
*
* The allowed thread states for making a call are:
* - thr matches heap->curr_thread, and thr is already RUNNING
@ -816,7 +816,7 @@ void duk__adjust_valstack_and_top(duk_hthread *thr, duk_idx_t num_stack_args, du
* avoiding a dozen helpers with awkward plumbing.
*
* Note: setjmp() and local variables have a nasty interaction,
* see execution.txt; non-volatile locals modified after setjmp()
* see execution.rst; non-volatile locals modified after setjmp()
* call are not guaranteed to keep their value.
*/

24
src/duk_js_compiler.c

@ -17,7 +17,7 @@
* Recursion limits are in key functions to prevent arbitrary C recursion:
* function body parsing, statement parsing, and expression parsing.
*
* See doc/compiler.txt for discussion on the design.
* See doc/compiler.rst for discussion on the design.
*
* A few typing notes:
*
@ -232,7 +232,7 @@ DUK_LOCAL_DECL duk_int_t duk__parse_func_like_fnum(duk_compiler_ctx *comp_ctx, d
/* XXX: actually single step levels would work just fine, clean up */
/* binding power "levels" (see doc/compiler.txt) */
/* binding power "levels" (see doc/compiler.rst) */
#define DUK__BP_INVALID 0 /* always terminates led() */
#define DUK__BP_EOF 2
#define DUK__BP_CLOSING 4 /* token closes expression, e.g. ')', ']' */
@ -419,7 +419,7 @@ DUK_LOCAL void duk__advance_helper(duk_compiler_ctx *comp_ctx, duk_small_int_t e
* We can use either 't' or 't_nores'; the latter would not
* recognize keywords. Some keywords can be followed by a
* RegExp (e.g. "return"), so using 't' is better. This is
* not trivial, see doc/compiler.txt.
* not trivial, see doc/compiler.rst.
*/
regexp = 1;
@ -3344,7 +3344,7 @@ DUK_LOCAL void duk__expr_nud(duk_compiler_ctx *comp_ctx, duk_ivalue *res) {
* such that parsing ends at an LPAREN (CallExpression) but not at
* a PERIOD or LBRACKET (MemberExpression).
*
* See doc/compiler.txt for discussion on the parsing approach,
* See doc/compiler.rst for discussion on the parsing approach,
* and testcases/test-dev-new.js for a bunch of documented tests.
*/
@ -4193,7 +4193,7 @@ DUK_LOCAL void duk__expr_led(duk_compiler_ctx *comp_ctx, duk_ivalue *left, duk_i
* Truthval determines when to skip right-hand-side.
* For logical AND truthval=1, for logical OR truthval=0.
*
* See doc/compiler.txt for discussion on compiling logical
* See doc/compiler.rst for discussion on compiling logical
* AND and OR expressions. The approach here is very simplistic,
* generating extra jumps and multiple evaluations of truth values,
* but generates code on-the-fly with only local back-patching.
@ -4853,7 +4853,7 @@ DUK_LOCAL void duk__parse_var_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res)
DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res, duk_int_t pc_label_site) {
duk_hthread *thr = comp_ctx->thr;
duk_context *ctx = (duk_context *) thr;
duk_int_t pc_v34_lhs; /* start variant 3/4 left-hand-side code (L1 in doc/compiler.txt example) */
duk_int_t pc_v34_lhs; /* start variant 3/4 left-hand-side code (L1 in doc/compiler.rst example) */
duk_reg_t temp_reset; /* knock back "next temp" to this whenever possible */
duk_reg_t reg_temps; /* preallocated temporaries (2) for variants 3 and 4 */
@ -4880,7 +4880,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,
* Parsing these without arbitrary lookahead or backtracking is relatively
* tricky but we manage to do so for now.
*
* See doc/compiler.txt for a detailed discussion of control flow
* See doc/compiler.rst for a detailed discussion of control flow
* issues, evaluation order issues, etc.
*/
@ -5107,7 +5107,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,
*
* Variables set before entering here:
*
* pc_v34_lhs: insert a "JUMP L2" here (see doc/compiler.txt example).
* pc_v34_lhs: insert a "JUMP L2" here (see doc/compiler.rst example).
* reg_temps + 0: iteration target value (written to LHS)
* reg_temps + 1: enumerator object
*/
@ -5122,7 +5122,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,
/* First we need to insert a jump in the middle of previously
* emitted code to get the control flow right. No jumps can
* cross the position where the jump is inserted. See doc/compiler.txt
* cross the position where the jump is inserted. See doc/compiler.rst
* for discussion on the intricacies of control flow and side effects
* for variants 3 and 4.
*/
@ -5237,7 +5237,7 @@ DUK_LOCAL void duk__parse_switch_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *re
* only process the first match before switching to a "propagation" mode
* where case values are no longer evaluated
*
* See E5 Section 12.11. Also see doc/compiler.txt for compilation
* See E5 Section 12.11. Also see doc/compiler.rst for compilation
* discussion.
*/
@ -5720,7 +5720,7 @@ DUK_LOCAL void duk__parse_try_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res)
/*
* See the following documentation for discussion:
*
* doc/execution.txt: control flow details
* doc/execution.rst: control flow details
*
* Try, catch, and finally "parts" are Blocks, not Statements, so
* they must always be delimited by curly braces. This is unlike e.g.
@ -6172,7 +6172,7 @@ DUK_LOCAL void duk__parse_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res, duk_
*
* https://bugs.ecmascript.org/show_bug.cgi?id=8
*
* See doc/compiler.txt for details.
* See doc/compiler.rst for details.
*/
DUK_DDD(DUK_DDDPRINT("do statement"));
DUK_ASSERT(label_id >= 0);

2
src/duk_js_compiler.h

@ -218,7 +218,7 @@ struct duk_compiler_ctx {
* Prototypes
*/
#define DUK_JS_COMPILE_FLAG_EVAL (1 << 0) /* source is eval code (not program) */
#define DUK_JS_COMPILE_FLAG_EVAL (1 << 0) /* source is eval code (not global) */
#define DUK_JS_COMPILE_FLAG_STRICT (1 << 1) /* strict outer context */
#define DUK_JS_COMPILE_FLAG_FUNCEXPR (1 << 2) /* source is a function expression (used for Function constructor) */

2
src/duk_js_executor.c

@ -2174,7 +2174,7 @@ DUK_INTERNAL void duk_js_execute_bytecode(duk_hthread *exec_thr) {
* - the value stack (registers) of the current thread
* - the catch stack of the current thread
*
* See execution.txt for discussion.
* See execution.rst for discussion.
*/
DUK_ASSERT(thr != NULL);

8
src/duk_js_var.c

@ -13,8 +13,8 @@
* convenient field). The prototype chain is not followed in the ordinary
* sense for variable lookups.
*
* See identifier-handling.txt for more details on the identifier algorithms
* and the internal representation. See function-objects.txt for details on
* See identifier-handling.rst for more details on the identifier algorithms
* and the internal representation. See function-objects.rst for details on
* what function templates and instances are expected to look like.
*
* Care must be taken to avoid duk_tval pointer invalidation caused by
@ -55,7 +55,7 @@ typedef struct {
*
* See E5 Section 13.2 for detailed requirements on the function objects;
* there are no similar requirements for function "templates" which are an
* implementation dependent internal feature. Also see function-objects.txt
* implementation dependent internal feature. Also see function-objects.rst
* for a discussion on the function instance properties provided by this
* implementation.
*
@ -213,7 +213,7 @@ void duk_js_push_closure(duk_hthread *thr,
* _Lexenv is always set; _Varenv defaults to _Lexenv if missing,
* so _Varenv is only set if _Lexenv != _Varenv.
*
* This is relatively complex, see doc/identifier-handling.txt.
* This is relatively complex, see doc/identifier-handling.rst.
*/
if (DUK_HOBJECT_HAS_NEWENV(&fun_temp->obj)) {

4
src/duk_numconv.c

@ -7,7 +7,7 @@
* and uses a minimum number of digits. The big number arithmetic has a
* fixed maximum size and does not require dynamic allocations.
*
* See: doc/number-conversion.txt.
* See: doc/number-conversion.rst.
*/
#include "duk_internal.h"
@ -622,7 +622,7 @@ DUK_LOCAL void duk__bi_exp_small(duk__bigint *x, duk_small_int_t b, duk_small_in
* The same algorithm is used for number parsing (with b=10 and B=2)
* by generating one extra digit and doing rounding manually.
*
* See doc/number-conversion.txt for limitations.
* See doc/number-conversion.rst for limitations.
*/
/* Maximum number of digits generated. */

8
src/duk_regexp_compiler.c

@ -1,7 +1,7 @@
/*
* Regexp compilation.
*
* See doc/regexp.txt for a discussion of the compilation approach and
* See doc/regexp.rst for a discussion of the compilation approach and
* current limitations.
*
* Regexp bytecode assumes jumps can be expressed with signed 32-bit
@ -142,7 +142,7 @@ DUK_LOCAL void duk__remove_slice(duk_re_compiler_ctx *re_ctx, duk_uint32_t data_
*
* Computing the final (adjusted) skip value, which is relative to the
* first byte of the next instruction, is a bit tricky because of the
* variable length UTF-8 encoding. See doc/regexp.txt for discussion.
* variable length UTF-8 encoding. See doc/regexp.rst for discussion.
*/
DUK_LOCAL duk_uint32_t duk__insert_jump_offset(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip) {
duk_small_int_t len;
@ -175,7 +175,7 @@ DUK_LOCAL duk_uint32_t duk__append_jump_offset(duk_re_compiler_ctx *re_ctx, duk_
* continuous range is not necessarily continuous (e.g. [x-{] is
* continuous but [X-{] is not). The current algorithm creates the
* canonicalized range(s) space efficiently at the cost of compile
* time execution time (see doc/regexp.txt for discussion).
* time execution time (see doc/regexp.rst for discussion).
*
* Note that the ctx->nranges is a context-wide temporary value
* (this is OK because there cannot be multiple character classes
@ -241,7 +241,7 @@ DUK_LOCAL void duk__generate_ranges(void *userdata, duk_codepoint_t r1, duk_code
* lookaheads, capturing parentheses, and non-capturing parentheses.
*
* The function determines whether the entire disjunction is a 'simple atom'
* (see doc/regexp.txt discussion on 'simple quantifiers') and if so,
* (see doc/regexp.rst discussion on 'simple quantifiers') and if so,
* returns the atom character length which is needed by the caller to keep
* track of its own atom character length. A disjunction with more than one
* alternative is never considered a simple atom (although in some cases

2
src/duk_regexp_executor.c

@ -179,7 +179,7 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
*
* There is no opcode for matching multiple characters. The
* regexp compiler has trouble joining strings efficiently
* during compilation. See doc/regexp.txt for more discussion.
* during compilation. See doc/regexp.rst for more discussion.
*/
duk_codepoint_t c1, c2;

2
src/duk_tval.h

@ -11,7 +11,7 @@
*
* Selecting the tagged type format involves many trade-offs (memory
* use, size and performance of generated code, portability, etc),
* see doc/types.txt for a detailed discussion (especially of how the
* see doc/types.rst for a detailed discussion (especially of how the
* IEEE double format is used to pack tagged values).
*
* NB: because macro arguments are often expressions, macros should

29
tests/ecmascript/test-dev-compiler-dynamic-scope.js

@ -0,0 +1,29 @@
/*
* Example from compiler.rst.
*/
/*===
321
123
===*/
var foo = 123;
var myfunc;
function f(x) {
eval(x);
return function () { print(foo); }
}
// declare 'foo' in f(), returned closure sees this 'foo' instead
// of the global one
myfunc = f('var foo = 321');
myfunc(); // prints 321, not 123
// don't declare 'foo' in f(), returned closure sees the global 'foo'
// instead of the global one
myfunc = f('var quux = 432');
myfunc(); // prints 123

16
website/api/duk_compile.yaml

@ -27,7 +27,7 @@ summary: |
<p>The source code being compiled may be:</p>
<ul>
<li>Program code: compiles into a function with zero arguments, which
<li>Global code: compiles into a function with zero arguments, which
executes like a top level Ecmascript program (default)</li>
<li>Eval code: compiles into a function with zero arguments, which
executes like an Ecmascript <code>eval</code> call
@ -39,14 +39,14 @@ summary: |
<p>All of these have slightly different semantics in Ecmascript. See
<a href="http://www.ecma-international.org/ecma-262/5.1/#sec-10.4">Establishing an Execution Context</a>
for a detailed discussion.
One major difference is that program and eval contexts have an implicit
One major difference is that global and eval contexts have an implicit
return value: the last <i>non-empty</i> statement value is an automatic
return value for the program or eval code, whereas functions don't have
an automatic return value.
</p>
<p>Program and eval code don't have an explicit <code>function</code> syntax.
For instance, the following can be compiled both as a program and as an
<p>Global and eval code don't have an explicit <code>function</code> syntax.
For instance, the following can be compiled both as a global and as an
eval expression:</p>
<pre class="ecmascript-code">
print("Hello world!");
@ -71,14 +71,14 @@ summary: |
})
</pre>
<p>The bytecode generated for program and eval code is currently slower
<p>The bytecode generated for global and eval code is currently slower
than that generated for functions: a "slow path" is used for all variable
accesses in program and eval code, and the implicit return value handling
of program and eval code generates some unnecessary bytecode. From a
performance point of view (both memory and execution performance) it is
thus preferable to have as much code inside functions as possible.</p>
<p>When compiling eval and program expressions, be careful to avoid the
<p>When compiling eval and global expressions, be careful to avoid the
usual Ecmascript gotchas, such as:</p>
<pre class="ecmascript-code">
/* Function at top level is a function declaration which registers a global
@ -101,12 +101,12 @@ summary: |
</pre>
example: |
/* Program code. Note that the hello() function is a function
/* Global code. Note that the hello() function is a function
* declaration which gets registered into the global object when
* executed. Implicit return value is 123.
*/
duk_push_string(ctx, "print('program');\n"
duk_push_string(ctx, "print('global');\n"
"function hello() { print('Hello world!'); }\n"
"123;");
duk_push_string(ctx, "hello");

2
website/guide/performance.html

@ -157,7 +157,7 @@ slower.</p>
<p>To keep identifier accesses in the fast path:</p>
<ul>
<li>Execute (almost all) inside Ecmascript functions, not in the top-level
program or eval code: global/eval code never uses fast path identifier
global or eval code: global/eval code never uses fast path identifier
accesses (however, function code inside global/eval does)</li>
<li>Store frequently accessed values in local variables instead of looking
them up from the global object or other objects</li>

Loading…
Cancel
Save