Merge branch 'add-compiler-doc'

Add a cleaned up, relatively old compiler design document.
9 years ago · e55e469c68
26 changed files with 2930 additions and 81 deletions
--- a/doc/compiler.rst
+++ b/doc/compiler.rst
--- a/src/duk_api_public.h.in
+++ b/src/duk_api_public.h.in
@ -154,9 +154,9 @@ struct duk_number_list_entry {
 #define DUK_ENUM_NO_PROXY_BEHAVIOR        (1 << 5)    /* enumerate a proxy object itself without invoking proxy behavior */

 /* Compilation flags for duk_compile() and duk_eval() */
-#define DUK_COMPILE_EVAL                  (1 << 0)    /* compile eval code (instead of program) */
-#define DUK_COMPILE_FUNCTION              (1 << 1)    /* compile function code (instead of program) */
-#define DUK_COMPILE_STRICT                (1 << 2)    /* use strict (outer) context for program, eval, or function */
+#define DUK_COMPILE_EVAL                  (1 << 0)    /* compile eval code (instead of global code) */
+#define DUK_COMPILE_FUNCTION              (1 << 1)    /* compile function code (instead of global code) */
+#define DUK_COMPILE_STRICT                (1 << 2)    /* use strict (outer) context for global, eval, or function code */
 #define DUK_COMPILE_SAFE                  (1 << 3)    /* (internal) catch compilation errors */
 #define DUK_COMPILE_NORESULT              (1 << 4)    /* (internal) omit eval result */
 #define DUK_COMPILE_NOSOURCE              (1 << 5)    /* (internal) no source string on stack */
--- a/src/duk_api_stack.c
+++ b/src/duk_api_stack.c
@ -512,7 +512,7 @@ DUK_LOCAL duk_bool_t duk__resize_valstack(duk_context *ctx, duk_size_t new_size)
 	 *
 	 * Note: cannot use a plain DUK_REALLOC() because a mark-and-sweep may
 	 * invalidate the original thr->valstack base pointer inside the realloc
-	 * process.  See doc/memory-management.txt.
+	 * process.  See doc/memory-management.rst.
 	 */

 	new_alloc_size = sizeof(duk_tval) * new_size;
--- a/src/duk_api_string.c
+++ b/src/duk_api_string.c
@ -31,7 +31,7 @@ DUK_LOCAL void duk__concat_and_join_helper(duk_context *ctx, duk_idx_t count_in,
 		h = duk_to_hstring(ctx, -((duk_idx_t) count) - 1);
 		DUK_ASSERT(h != NULL);

-		/* A bit tricky overflow test, see doc/code-issues.txt. */
+		/* A bit tricky overflow test, see doc/code-issues.rst. */
 		t1 = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h);
 		t2 = (duk_size_t) (count - 1);
 		limit = (duk_size_t) DUK_HSTRING_MAX_BYTELEN;
--- a/src/duk_error_augment.c
+++ b/src/duk_error_augment.c
@ -48,7 +48,7 @@
 *  The user error handler is stored in 'Duktape.errCreate' or
 *  'Duktape.errThrow' depending on whether we're augmenting the error at
 *  creation or throw time.  There are several alternatives to this approach,
- *  see doc/error-objects.txt for discussion.
+ *  see doc/error-objects.rst for discussion.
 *
 *  Note: since further longjmp()s may occur while calling the error handler
 *  (for many reasons, e.g. a labeled 'break' inside the handler), the
@ -174,7 +174,7 @@ DUK_LOCAL void duk__add_traceback(duk_hthread *thr, duk_hthread *thr_callstack,
 	 *  and cheap to create.  It may change arbitrarily from version to version.
 	 *  It should be decoded/accessed through version specific accessors only.
 	 *
-	 *  See doc/error-objects.txt.
+	 *  See doc/error-objects.rst.
 	 */

 	DUK_DDD(DUK_DDDPRINT("adding traceback to object: %!T",
--- a/src/duk_hobject_pc2line.c
+++ b/src/duk_hobject_pc2line.c
@ -4,7 +4,7 @@
 *
 *  The run-time pc2line data is bit-packed, and documented in:
 *
- *    doc/function-objects.txt
+ *    doc/function-objects.rst
 */

 #include "duk_internal.h"
--- a/src/duk_hobject_props.c
+++ b/src/duk_hobject_props.c
@ -2,8 +2,8 @@
 *  Hobject property set/get functionality.
 *
 *  This is very central functionality for size, performance, and compliance.
- *  It is also rather intricate; see hobject-algorithms.txt for discussion on
- *  the algorithms and memory-management.txt for discussion on refcounts and
+ *  It is also rather intricate; see hobject-algorithms.rst for discussion on
+ *  the algorithms and memory-management.rst for discussion on refcounts and
 *  side effect issues.
 *
 *  Notes:
--- a/src/duk_js_call.c
+++ b/src/duk_js_call.c
@ -804,7 +804,7 @@ void duk__adjust_valstack_and_top(duk_hthread *thr, duk_idx_t num_stack_args, du
 *  indices) cause an error to propagate out of this function.  If there is
 *  no catchpoint for this error, the fatal error handler is called.
 *
- *  See 'execution.txt'.
+ *  See 'execution.rst'.
 *
 *  The allowed thread states for making a call are:
 *    - thr matches heap->curr_thread, and thr is already RUNNING
@ -816,7 +816,7 @@ void duk__adjust_valstack_and_top(duk_hthread *thr, duk_idx_t num_stack_args, du
 *  avoiding a dozen helpers with awkward plumbing.
 *
 *  Note: setjmp() and local variables have a nasty interaction,
- *  see execution.txt; non-volatile locals modified after setjmp()
+ *  see execution.rst; non-volatile locals modified after setjmp()
 *  call are not guaranteed to keep their value.
 */

--- a/src/duk_js_compiler.c
+++ b/src/duk_js_compiler.c
@ -17,7 +17,7 @@
 *  Recursion limits are in key functions to prevent arbitrary C recursion:
 *  function body parsing, statement parsing, and expression parsing.
 *
- *  See doc/compiler.txt for discussion on the design.
+ *  See doc/compiler.rst for discussion on the design.
 *
 *  A few typing notes:
 *
@ -232,7 +232,7 @@ DUK_LOCAL_DECL duk_int_t duk__parse_func_like_fnum(duk_compiler_ctx *comp_ctx, d

 /* XXX: actually single step levels would work just fine, clean up */

-/* binding power "levels" (see doc/compiler.txt) */
+/* binding power "levels" (see doc/compiler.rst) */
 #define DUK__BP_INVALID                0             /* always terminates led() */
 #define DUK__BP_EOF                    2
 #define DUK__BP_CLOSING                4             /* token closes expression, e.g. ')', ']' */
@ -419,7 +419,7 @@ DUK_LOCAL void duk__advance_helper(duk_compiler_ctx *comp_ctx, duk_small_int_t e
 	 *  We can use either 't' or 't_nores'; the latter would not
 	 *  recognize keywords.  Some keywords can be followed by a
 	 *  RegExp (e.g. "return"), so using 't' is better.  This is
-	 *  not trivial, see doc/compiler.txt.
+	 *  not trivial, see doc/compiler.rst.
 	 */

 	regexp = 1;
@ -3344,7 +3344,7 @@ DUK_LOCAL void duk__expr_nud(duk_compiler_ctx *comp_ctx, duk_ivalue *res) {
 		 *  such that parsing ends at an LPAREN (CallExpression) but not at
 		 *  a PERIOD or LBRACKET (MemberExpression).
 		 *
-		 *  See doc/compiler.txt for discussion on the parsing approach,
+		 *  See doc/compiler.rst for discussion on the parsing approach,
 		 *  and testcases/test-dev-new.js for a bunch of documented tests.
 		 */

@ -4193,7 +4193,7 @@ DUK_LOCAL void duk__expr_led(duk_compiler_ctx *comp_ctx, duk_ivalue *left, duk_i
 	 *  Truthval determines when to skip right-hand-side.
 	 *  For logical AND truthval=1, for logical OR truthval=0.
 	 *
-	 *  See doc/compiler.txt for discussion on compiling logical
+	 *  See doc/compiler.rst for discussion on compiling logical
 	 *  AND and OR expressions.  The approach here is very simplistic,
 	 *  generating extra jumps and multiple evaluations of truth values,
 	 *  but generates code on-the-fly with only local back-patching.
@ -4853,7 +4853,7 @@ DUK_LOCAL void duk__parse_var_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res)
 DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res, duk_int_t pc_label_site) {
 	duk_hthread *thr = comp_ctx->thr;
 	duk_context *ctx = (duk_context *) thr;
-	duk_int_t pc_v34_lhs;    /* start variant 3/4 left-hand-side code (L1 in doc/compiler.txt example) */
+	duk_int_t pc_v34_lhs;    /* start variant 3/4 left-hand-side code (L1 in doc/compiler.rst example) */
 	duk_reg_t temp_reset;    /* knock back "next temp" to this whenever possible */
 	duk_reg_t reg_temps;     /* preallocated temporaries (2) for variants 3 and 4 */

@ -4880,7 +4880,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,
 	 *  Parsing these without arbitrary lookahead or backtracking is relatively
 	 *  tricky but we manage to do so for now.
 	 *
-	 *  See doc/compiler.txt for a detailed discussion of control flow
+	 *  See doc/compiler.rst for a detailed discussion of control flow
 	 *  issues, evaluation order issues, etc.
 	 */

@ -5107,7 +5107,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,
 	 *
 	 *  Variables set before entering here:
 	 *
-	 *    pc_v34_lhs:    insert a "JUMP L2" here (see doc/compiler.txt example).
+	 *    pc_v34_lhs:    insert a "JUMP L2" here (see doc/compiler.rst example).
 	 *    reg_temps + 0: iteration target value (written to LHS)
 	 *    reg_temps + 1: enumerator object
 	 */
@ -5122,7 +5122,7 @@ DUK_LOCAL void duk__parse_for_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res,

 		/* First we need to insert a jump in the middle of previously
 		 * emitted code to get the control flow right.  No jumps can
-		 * cross the position where the jump is inserted.  See doc/compiler.txt
+		 * cross the position where the jump is inserted.  See doc/compiler.rst
 		 * for discussion on the intricacies of control flow and side effects
 		 * for variants 3 and 4.
 		 */
@ -5237,7 +5237,7 @@ DUK_LOCAL void duk__parse_switch_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *re
 	 *      only process the first match before switching to a "propagation" mode
 	 *      where case values are no longer evaluated
 	 *
-	 *  See E5 Section 12.11.  Also see doc/compiler.txt for compilation
+	 *  See E5 Section 12.11.  Also see doc/compiler.rst for compilation
 	 *  discussion.
 	 */

@ -5720,7 +5720,7 @@ DUK_LOCAL void duk__parse_try_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res)
 	/*
 	 *  See the following documentation for discussion:
 	 *
-	 *    doc/execution.txt: control flow details
+	 *    doc/execution.rst: control flow details
 	 *
 	 *  Try, catch, and finally "parts" are Blocks, not Statements, so
 	 *  they must always be delimited by curly braces.  This is unlike e.g.
@ -6172,7 +6172,7 @@ DUK_LOCAL void duk__parse_stmt(duk_compiler_ctx *comp_ctx, duk_ivalue *res, duk_
 		 *
 		 *    https://bugs.ecmascript.org/show_bug.cgi?id=8
 		 *
-		 *  See doc/compiler.txt for details.
+		 *  See doc/compiler.rst for details.
 		 */
 		DUK_DDD(DUK_DDDPRINT("do statement"));
 		DUK_ASSERT(label_id >= 0);
--- a/src/duk_js_compiler.h
+++ b/src/duk_js_compiler.h
@ -218,7 +218,7 @@ struct duk_compiler_ctx {
 *  Prototypes
 */

-#define DUK_JS_COMPILE_FLAG_EVAL      (1 << 0)  /* source is eval code (not program) */
+#define DUK_JS_COMPILE_FLAG_EVAL      (1 << 0)  /* source is eval code (not global) */
 #define DUK_JS_COMPILE_FLAG_STRICT    (1 << 1)  /* strict outer context */
 #define DUK_JS_COMPILE_FLAG_FUNCEXPR  (1 << 2)  /* source is a function expression (used for Function constructor) */

--- a/src/duk_js_executor.c
+++ b/src/duk_js_executor.c
@ -2174,7 +2174,7 @@ DUK_INTERNAL void duk_js_execute_bytecode(duk_hthread *exec_thr) {
 	 *    - the value stack (registers) of the current thread
 	 *    - the catch stack of the current thread
 	 *
-	 *  See execution.txt for discussion.
+	 *  See execution.rst for discussion.
 	 */

 	DUK_ASSERT(thr != NULL);
--- a/src/duk_js_var.c
+++ b/src/duk_js_var.c
@ -13,8 +13,8 @@
 *  convenient field).  The prototype chain is not followed in the ordinary
 *  sense for variable lookups.
 *
- *  See identifier-handling.txt for more details on the identifier algorithms
- *  and the internal representation.  See function-objects.txt for details on
+ *  See identifier-handling.rst for more details on the identifier algorithms
+ *  and the internal representation.  See function-objects.rst for details on
 *  what function templates and instances are expected to look like.
 *
 *  Care must be taken to avoid duk_tval pointer invalidation caused by
@ -55,7 +55,7 @@ typedef struct {
 *
 *  See E5 Section 13.2 for detailed requirements on the function objects;
 *  there are no similar requirements for function "templates" which are an
- *  implementation dependent internal feature.  Also see function-objects.txt
+ *  implementation dependent internal feature.  Also see function-objects.rst
 *  for a discussion on the function instance properties provided by this
 *  implementation.
 *
@ -213,7 +213,7 @@ void duk_js_push_closure(duk_hthread *thr,
 	 *  _Lexenv is always set; _Varenv defaults to _Lexenv if missing,
 	 *  so _Varenv is only set if _Lexenv != _Varenv.
 	 *
-	 *  This is relatively complex, see doc/identifier-handling.txt.
+	 *  This is relatively complex, see doc/identifier-handling.rst.
 	 */

 	if (DUK_HOBJECT_HAS_NEWENV(&fun_temp->obj)) {
--- a/src/duk_numconv.c
+++ b/src/duk_numconv.c
@ -7,7 +7,7 @@
 *  and uses a minimum number of digits.  The big number arithmetic has a
 *  fixed maximum size and does not require dynamic allocations.
 *
- *  See: doc/number-conversion.txt.
+ *  See: doc/number-conversion.rst.
 */

 #include "duk_internal.h"
@ -622,7 +622,7 @@ DUK_LOCAL void duk__bi_exp_small(duk__bigint *x, duk_small_int_t b, duk_small_in
 *  The same algorithm is used for number parsing (with b=10 and B=2)
 *  by generating one extra digit and doing rounding manually.
 *
- *  See doc/number-conversion.txt for limitations.
+ *  See doc/number-conversion.rst for limitations.
 */

 /* Maximum number of digits generated. */
--- a/src/duk_regexp_compiler.c
+++ b/src/duk_regexp_compiler.c
@ -1,7 +1,7 @@
 /*
 *  Regexp compilation.
 *
- *  See doc/regexp.txt for a discussion of the compilation approach and
+ *  See doc/regexp.rst for a discussion of the compilation approach and
 *  current limitations.
 *
 *  Regexp bytecode assumes jumps can be expressed with signed 32-bit
@ -142,7 +142,7 @@ DUK_LOCAL void duk__remove_slice(duk_re_compiler_ctx *re_ctx, duk_uint32_t data_
 *
 *  Computing the final (adjusted) skip value, which is relative to the
 *  first byte of the next instruction, is a bit tricky because of the
- *  variable length UTF-8 encoding.  See doc/regexp.txt for discussion.
+ *  variable length UTF-8 encoding.  See doc/regexp.rst for discussion.
 */
 DUK_LOCAL duk_uint32_t duk__insert_jump_offset(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip) {
 	duk_small_int_t len;
@ -175,7 +175,7 @@ DUK_LOCAL duk_uint32_t duk__append_jump_offset(duk_re_compiler_ctx *re_ctx, duk_
 *  continuous range is not necessarily continuous (e.g. [x-{] is
 *  continuous but [X-{] is not).  The current algorithm creates the
 *  canonicalized range(s) space efficiently at the cost of compile
- *  time execution time (see doc/regexp.txt for discussion).
+ *  time execution time (see doc/regexp.rst for discussion).
 *
 *  Note that the ctx->nranges is a context-wide temporary value
 *  (this is OK because there cannot be multiple character classes
@ -241,7 +241,7 @@ DUK_LOCAL void duk__generate_ranges(void *userdata, duk_codepoint_t r1, duk_code
 *  lookaheads, capturing parentheses, and non-capturing parentheses.
 *
 *  The function determines whether the entire disjunction is a 'simple atom'
- *  (see doc/regexp.txt discussion on 'simple quantifiers') and if so,
+ *  (see doc/regexp.rst discussion on 'simple quantifiers') and if so,
 *  returns the atom character length which is needed by the caller to keep
 *  track of its own atom character length.  A disjunction with more than one
 *  alternative is never considered a simple atom (although in some cases
--- a/src/duk_regexp_executor.c
+++ b/src/duk_regexp_executor.c
@ -179,7 +179,7 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
 			 *
 			 *  There is no opcode for matching multiple characters.  The
 			 *  regexp compiler has trouble joining strings efficiently
-			 *  during compilation.  See doc/regexp.txt for more discussion.
+			 *  during compilation.  See doc/regexp.rst for more discussion.
 			 */
 			duk_codepoint_t c1, c2;

--- a/src/duk_tval.h
+++ b/src/duk_tval.h
@ -11,7 +11,7 @@
 *
 *  Selecting the tagged type format involves many trade-offs (memory
 *  use, size and performance of generated code, portability, etc),
- *  see doc/types.txt for a detailed discussion (especially of how the
+ *  see doc/types.rst for a detailed discussion (especially of how the
 *  IEEE double format is used to pack tagged values).
 *
 *  NB: because macro arguments are often expressions, macros should
--- a/tests/ecmascript/test-dev-compiler-dynamic-scope.js
+++ b/tests/ecmascript/test-dev-compiler-dynamic-scope.js
@ -0,0 +1,29 @@
+/*
+ *  Example from compiler.rst.
+ */
+
+/*===
+321
+123
+===*/
+
+var foo = 123;
+var myfunc;
+
+function f(x) {
+    eval(x);
+
+    return function () { print(foo); }
+}
+
+// declare 'foo' in f(), returned closure sees this 'foo' instead
+// of the global one
+
+myfunc = f('var foo = 321');
+myfunc();  // prints 321, not 123
+
+// don't declare 'foo' in f(), returned closure sees the global 'foo'
+// instead of the global one
+
+myfunc = f('var quux = 432');
+myfunc();  // prints 123
--- a/website/api/duk_compile.yaml
+++ b/website/api/duk_compile.yaml
@ -27,7 +27,7 @@ summary: |
  <p>The source code being compiled may be:</p>

  <ul>
-  <li>Program code: compiles into a function with zero arguments, which
+  <li>Global code: compiles into a function with zero arguments, which
      executes like a top level Ecmascript program (default)</li>
  <li>Eval code: compiles into a function with zero arguments, which
      executes like an Ecmascript <code>eval</code> call
@ -39,14 +39,14 @@ summary: |
  <p>All of these have slightly different semantics in Ecmascript.  See
  <a href="http://www.ecma-international.org/ecma-262/5.1/#sec-10.4">Establishing an Execution Context</a>
  for a detailed discussion.
-  One major difference is that program and eval contexts have an implicit
+  One major difference is that global and eval contexts have an implicit
  return value: the last <i>non-empty</i> statement value is an automatic
  return value for the program or eval code, whereas functions don't have
  an automatic return value.
  </p>

-  <p>Program and eval code don't have an explicit <code>function</code> syntax.
-  For instance, the following can be compiled both as a program and as an
+  <p>Global and eval code don't have an explicit <code>function</code> syntax.
+  For instance, the following can be compiled both as a global and as an
  eval expression:</p>
  <pre class="ecmascript-code">
  print("Hello world!");
@ -71,14 +71,14 @@ summary: |
  })
  </pre>

-  <p>The bytecode generated for program and eval code is currently slower
+  <p>The bytecode generated for global and eval code is currently slower
  than that generated for functions: a "slow path" is used for all variable
  accesses in program and eval code, and the implicit return value handling
  of program and eval code generates some unnecessary bytecode.  From a
  performance point of view (both memory and execution performance) it is
  thus preferable to have as much code inside functions as possible.</p>

-  <p>When compiling eval and program expressions, be careful to avoid the
+  <p>When compiling eval and global expressions, be careful to avoid the
  usual Ecmascript gotchas, such as:</p>
  <pre class="ecmascript-code">
  /* Function at top level is a function declaration which registers a global
@ -101,12 +101,12 @@ summary: |
  </pre>

 example: |
-  /* Program code.  Note that the hello() function is a function
+  /* Global code.  Note that the hello() function is a function
   * declaration which gets registered into the global object when
   * executed.  Implicit return value is 123.
   */

-  duk_push_string(ctx, "print('program');\n"
+  duk_push_string(ctx, "print('global');\n"
                       "function hello() { print('Hello world!'); }\n"
                       "123;");
  duk_push_string(ctx, "hello");
--- a/website/guide/performance.html
+++ b/website/guide/performance.html
@ -157,7 +157,7 @@ slower.</p>
 <p>To keep identifier accesses in the fast path:</p>
 <ul>
 <li>Execute (almost all) inside Ecmascript functions, not in the top-level
-    program or eval code: global/eval code never uses fast path identifier
+    global or eval code: global/eval code never uses fast path identifier
    accesses (however, function code inside global/eval does)</li>
 <li>Store frequently accessed values in local variables instead of looking
    them up from the global object or other objects</li>