From 9e8f16c43eb082a61b03dc2441ee8f67ca73ee69 Mon Sep 17 00:00:00 2001 From: Sami Vaarala Date: Fri, 26 Aug 2016 00:28:49 +0300 Subject: [PATCH] Split dist util, reorg tools, Python PEP8 --- Makefile | 3 +- config/extract_unique_options.py | 41 - config/genconfig.py | 1536 ----------- debugger/merge_debug_meta.py | 32 - debugger/util/heapjson_convert.py | 70 +- examples/alloc-logging/log2gnuplot.py | 40 +- examples/alloc-logging/pool_simulator.py | 1438 +++++------ misc/bin2img.py | 91 +- misc/c_overflow_test.py | 34 +- misc/chaos.py | 1 - src/dukutil.py | 266 -- src/extract_caseconv.py | 444 ---- src/extract_chars.py | 353 --- src/genbuildparams.py | 44 - src/genbuiltins.py | 2983 --------------------- src/genequivyear.py | 57 - src/genexesizereport.py | 164 -- src/genhashsizes.py | 124 - src/genobjsizereport.py | 73 - src/prepare_unicode_data.py | 37 - src/scan_used_stridx_bidx.py | 56 - tools/combine_src.py | 257 ++ tools/create_spdx_license.py | 246 ++ tools/duk_meta_to_strarray.py | 49 + tools/dukutil.py | 259 ++ tools/dump_bytecode.py | 130 + tools/extract_caseconv.py | 444 ++++ tools/extract_chars.py | 382 +++ tools/extract_unique_options.py | 41 + tools/genbuildparams.py | 44 + tools/genbuiltins.py | 2985 ++++++++++++++++++++++ tools/genconfig.py | 1530 +++++++++++ tools/json2yaml.py | 5 + tools/merge_debug_meta.py | 32 + tools/prepare_sources.py | 854 +++++++ tools/prepare_unicode_data.py | 37 + tools/resolve_combined_lineno.py | 26 + tools/scan_strings.py | 135 + tools/scan_used_stridx_bidx.py | 56 + {util => tools}/yaml2json.py | 2 +- util/autofix_debuglog_calls.py | 62 +- util/check_code_policy.py | 722 +++--- util/combine_src.py | 257 -- util/create_spdx_license.py | 246 -- util/ditz_hack.py | 30 - util/duk_meta_to_strarray.py | 49 - util/dump_bytecode.py | 130 - util/example_rombuild.sh | 43 +- util/example_user_builtins1.yaml | 6 +- util/fastint_reps.py | 102 +- util/filter_test262_log.py | 216 +- util/find_func_calls.py | 76 +- util/find_non_ascii.py | 24 +- util/fix_emscripten.py | 64 +- util/format_perftest.py | 78 +- {src => util}/gendoubleconsts.py | 40 +- util/genequivyear.py | 57 + util/genexesizereport.py | 164 ++ util/genhashsizes.py | 124 + {src => util}/gennumdigits.py | 24 +- util/genobjsizereport.py | 73 + util/json2yaml.py | 5 - util/make_ascii.py | 8 +- util/make_dist.py | 1316 ++++------ util/matrix_compile.py | 933 ++++--- util/prep_test.py | 310 +-- util/rdfdiff.py | 33 + util/resolve_combined_lineno.py | 26 - util/scan_strings.py | 135 - util/time_multi.py | 176 +- 70 files changed, 10680 insertions(+), 10250 deletions(-) delete mode 100644 config/extract_unique_options.py delete mode 100644 config/genconfig.py delete mode 100644 debugger/merge_debug_meta.py delete mode 100644 src/dukutil.py delete mode 100644 src/extract_caseconv.py delete mode 100644 src/extract_chars.py delete mode 100644 src/genbuildparams.py delete mode 100644 src/genbuiltins.py delete mode 100644 src/genequivyear.py delete mode 100644 src/genexesizereport.py delete mode 100644 src/genhashsizes.py delete mode 100644 src/genobjsizereport.py delete mode 100644 src/prepare_unicode_data.py delete mode 100644 src/scan_used_stridx_bidx.py create mode 100644 tools/combine_src.py create mode 100644 tools/create_spdx_license.py create mode 100644 tools/duk_meta_to_strarray.py create mode 100644 tools/dukutil.py create mode 100644 tools/dump_bytecode.py create mode 100644 tools/extract_caseconv.py create mode 100644 tools/extract_chars.py create mode 100644 tools/extract_unique_options.py create mode 100644 tools/genbuildparams.py create mode 100644 tools/genbuiltins.py create mode 100644 tools/genconfig.py create mode 100644 tools/json2yaml.py create mode 100644 tools/merge_debug_meta.py create mode 100644 tools/prepare_sources.py create mode 100644 tools/prepare_unicode_data.py create mode 100644 tools/resolve_combined_lineno.py create mode 100644 tools/scan_strings.py create mode 100644 tools/scan_used_stridx_bidx.py rename {util => tools}/yaml2json.py (55%) delete mode 100644 util/combine_src.py delete mode 100644 util/create_spdx_license.py delete mode 100644 util/ditz_hack.py delete mode 100644 util/duk_meta_to_strarray.py delete mode 100644 util/dump_bytecode.py rename {src => util}/gendoubleconsts.py (65%) create mode 100644 util/genequivyear.py create mode 100644 util/genexesizereport.py create mode 100644 util/genhashsizes.py rename {src => util}/gennumdigits.py (62%) create mode 100644 util/genobjsizereport.py delete mode 100644 util/json2yaml.py create mode 100644 util/rdfdiff.py delete mode 100644 util/resolve_combined_lineno.py delete mode 100644 util/scan_strings.py diff --git a/Makefile b/Makefile index 2c46246f..abd7f1f2 100644 --- a/Makefile +++ b/Makefile @@ -1152,8 +1152,9 @@ codepolicycheck: --check-non-ascii \ --check-trailing-whitespace \ --check-mixed-indent \ + --check-tab-indent \ --dump-vim-commands \ - src/*.py + src/*.py tools/*.py util/*.py debugger/*/*.py examples/*/*.py @$(PYTHON) util/check_code_policy.py \ $(CODEPOLICYOPTS) \ --check-debug-log-calls \ diff --git a/config/extract_unique_options.py b/config/extract_unique_options.py deleted file mode 100644 index d378a69d..00000000 --- a/config/extract_unique_options.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python2 -# -# Extract unique DUK_USE_xxx flags from current code base: -# -# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in -# - -import os, sys, re - -# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched -# (only uppercase allowed) -re_use = re.compile(r'DUK_USE_[A-Z0-9_]+') -re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+') - -def main(): - uses = {} - opts = {} - - for fn in sys.argv[1:]: - f = open(fn, 'rb') - for line in f: - for t in re.findall(re_use, line): - if t[-1] != '_': # skip e.g. 'DUK_USE_' - uses[t] = True - for t in re.findall(re_opt, line): - if t[-1] != '_': - opts[t] = True - f.close() - - k = opts.keys() - k.sort() - for i in k: - print(i) - - k = uses.keys() - k.sort() - for i in k: - print(i) - -if __name__ == '__main__': - main() diff --git a/config/genconfig.py b/config/genconfig.py deleted file mode 100644 index 82cf6139..00000000 --- a/config/genconfig.py +++ /dev/null @@ -1,1536 +0,0 @@ -#!/usr/bin/env python2 -# -# Process Duktape option metadata and produce various useful outputs: -# -# - duk_config.h with specific or autodetected platform, compiler, and -# architecture; forced options; sanity checks; etc -# - option documentation for Duktape 1.x feature options (DUK_OPT_xxx) -# - option documentation for Duktape 1.x/2.x config options (DUK_USE_xxx) -# -# Genconfig tries to build all outputs based on modular metadata, so that -# managing a large number of config options (which is hard to avoid given -# the wide range of targets Duktape supports) remains maintainable. -# -# Genconfig does *not* try to support all exotic platforms out there. -# Instead, the goal is to allow the metadata to be extended, or to provide -# a reasonable starting point for manual duk_config.h tweaking. -# -# For Duktape 1.3 release the main goal was to autogenerate a Duktape 1.2 -# compatible "autodetect" header from legacy snippets, with other outputs -# being experimental. For Duktape 1.4 duk_config.h is always created from -# modular sources. -# - -import os -import sys -import re -import json -import yaml -import optparse -import tarfile -import tempfile -import atexit -import shutil -try: - from StringIO import StringIO -except ImportError: - from io import StringIO - -# -# Globals holding scanned metadata, helper snippets, etc -# - -# Metadata to scan from config files. -use_defs = None -use_defs_list = None -opt_defs = None -opt_defs_list = None -use_tags = None -use_tags_list = None -tags_meta = None -required_use_meta_keys = [ - 'define', - 'introduced', - 'default', - 'tags', - 'description' -] -allowed_use_meta_keys = [ - 'define', - 'feature_enables', - 'feature_disables', - 'feature_snippet', - 'feature_no_default', - 'related_feature_defines', - 'introduced', - 'deprecated', - 'removed', - 'unused', - 'requires', - 'conflicts', - 'related', - 'default', - 'tags', - 'description', -] -required_opt_meta_keys = [ - 'define', - 'introduced', - 'tags', - 'description' -] -allowed_opt_meta_keys = [ - 'define', - 'introduced', - 'deprecated', - 'removed', - 'unused', - 'requires', - 'conflicts', - 'related', - 'tags', - 'description' -] - -# Preferred tag order for option documentation. -doc_tag_order = [ - 'portability', - 'memory', - 'lowmemory', - 'ecmascript', - 'execution', - 'debugger', - 'debug', - 'development' -] - -# Preferred tag order for generated C header files. -header_tag_order = doc_tag_order - -# Helper headers snippets. -helper_snippets = None - -# Assume these provides come from outside. -assumed_provides = { - 'DUK_SINGLE_FILE': True, # compiling Duktape from a single source file (duktape.c) version - 'DUK_COMPILING_DUKTAPE': True, # compiling Duktape (not user application) - 'DUK_CONFIG_H_INCLUDED': True, # artifact, include guard -} - -# Platform files must provide at least these (additional checks -# in validate_platform_file()). Fill-ins provide missing optionals. -platform_required_provides = [ - 'DUK_USE_OS_STRING' # must be #define'd -] - -# Architecture files must provide at least these (additional checks -# in validate_architecture_file()). Fill-ins provide missing optionals. -architecture_required_provides = [ - 'DUK_USE_ARCH_STRING' -] - -# Compiler files must provide at least these (additional checks -# in validate_compiler_file()). Fill-ins provide missing optionals. -compiler_required_provides = [ - # Compilers need a lot of defines; missing defines are automatically - # filled in with defaults (which are mostly compiler independent), so - # the requires define list is not very large. - - 'DUK_USE_COMPILER_STRING', # must be #define'd - 'DUK_USE_BRANCH_HINTS', # may be #undef'd, as long as provided - 'DUK_USE_VARIADIC_MACROS', # may be #undef'd, as long as provided - 'DUK_USE_UNION_INITIALIZERS' # may be #undef'd, as long as provided -] - -# -# Miscellaneous helpers -# - -def get_auto_delete_tempdir(): - tmpdir = tempfile.mkdtemp(suffix='-genconfig') - def _f(dirname): - #print('Deleting temporary directory: %r' % dirname) - if os.path.isdir(dirname) and '-genconfig' in dirname: - shutil.rmtree(dirname) - atexit.register(_f, tmpdir) - return tmpdir - -def strip_comments_from_lines(lines): - # Not exact but close enough. Doesn't handle string literals etc, - # but these are not a concrete issue for scanning preprocessor - # #define references. - # - # Comment contents are stripped of any DUK_ prefixed text to avoid - # incorrect requires/provides detection. Other comment text is kept; - # in particular a "/* redefine */" comment must remain intact here. - # (The 'redefine' hack is not actively needed now.) - # - # Avoid Python 2.6 vs. Python 2.7 argument differences. - - def censor(x): - return re.sub(re.compile('DUK_\w+', re.MULTILINE), 'xxx', x.group(0)) - - tmp = '\n'.join(lines) - tmp = re.sub(re.compile('/\*.*?\*/', re.MULTILINE | re.DOTALL), censor, tmp) - tmp = re.sub(re.compile('//.*?$', re.MULTILINE), censor, tmp) - return tmp.split('\n') - -# Header snippet representation: lines, provides defines, requires defines. -re_line_provides = re.compile(r'^#(?:define|undef)\s+(\w+).*$') -re_line_requires = re.compile(r'(DUK_[A-Z0-9_]+)') # uppercase only, don't match DUK_USE_xxx for example -class Snippet: - lines = None # lines of text and/or snippets - provides = None # map from define to 'True' for now - requires = None # map from define to 'True' for now - - def __init__(self, lines, provides=None, requires=None, autoscan_requires=True, autoscan_provides=True): - self.lines = [] - if not isinstance(lines, list): - raise Exception('Snippet constructor must be a list (not e.g. a string): %s' % repr(lines)) - for line in lines: - if isinstance(line, str): - self.lines.append(line) - elif isinstance(line, unicode): - self.lines.append(line.encode('utf-8')) - else: - raise Exception('invalid line: %r' % line) - self.provides = {} - if provides is not None: - for k in provides.keys(): - self.provides[k] = True - self.requires = {} - if requires is not None: - for k in requires.keys(): - self.requires[k] = True - - stripped_lines = strip_comments_from_lines(lines) - # for line in stripped_lines: print(line) - - for line in stripped_lines: - # Careful with order, snippet may self-reference its own - # defines in which case there's no outward dependency. - # (This is not 100% because the order of require/provide - # matters and this is not handled now.) - # - # Also, some snippets may #undef/#define another define but - # they don't "provide" the define as such. Such redefinitions - # are marked "/* redefine */" in the snippets. They're best - # avoided (and not currently needed in Duktape 1.4.0). - - if autoscan_provides: - m = re_line_provides.match(line) - if m is not None and '/* redefine */' not in line and \ - len(m.group(1)) > 0 and m.group(1)[-1] != '_': - # Don't allow e.g. DUK_USE_ which results from matching DUK_USE_xxx - #print('PROVIDES: %r' % m.group(1)) - self.provides[m.group(1)] = True - if autoscan_requires: - matches = re.findall(re_line_requires, line) - for m in matches: - if len(m) > 0 and m[-1] == '_': - # Don't allow e.g. DUK_USE_ which results from matching DUK_USE_xxx - pass - elif m[:7] == 'DUK_OPT': - # DUK_OPT_xxx always come from outside - pass - elif m[:7] == 'DUK_USE': - # DUK_USE_xxx are internal and they should not be 'requirements' - pass - elif self.provides.has_key(m): - # Snippet provides it's own require; omit - pass - else: - #print('REQUIRES: %r' % m) - self.requires[m] = True - - def fromFile(cls, filename): - lines = [] - with open(filename, 'rb') as f: - for line in f: - if line[-1] == '\n': - line = line[:-1] - if line[:8] == '#snippet': - m = re.match(r'#snippet\s+"(.*?)"', line) - # XXX: better plumbing for lookup path - sub_fn = os.path.normpath(os.path.join(filename, '..', '..', 'header-snippets', m.group(1))) - #print('#snippet ' + sub_fn) - sn = Snippet.fromFile(sub_fn) - lines += sn.lines - else: - lines.append(line) - return Snippet(lines, autoscan_requires=True, autoscan_provides=True) - fromFile = classmethod(fromFile) - - def merge(cls, snippets): - ret = Snippet([], [], []) - for s in snippets: - ret.lines += s.lines - for k in s.provides.keys(): - ret.provides[k] = True - for k in s.requires.keys(): - ret.requires[k] = True - return ret - merge = classmethod(merge) - -# Helper for building a text file from individual lines, injected files, etc. -# Inserted values are converted to Snippets so that their provides/requires -# information can be tracked. When non-C outputs are created, these will be -# bogus but ignored. -class FileBuilder: - vals = None # snippet list - base_dir = None - use_cpp_warning = False - - def __init__(self, base_dir=None, use_cpp_warning=False): - self.vals = [] - self.base_dir = base_dir - self.use_cpp_warning = use_cpp_warning - - def line(self, line): - self.vals.append(Snippet([ line ])) - - def lines(self, lines): - if len(lines) > 0 and lines[-1] == '\n': - lines = lines[:-1] # strip last newline to avoid empty line - self.vals.append(Snippet(lines.split('\n'))) - - def empty(self): - self.vals.append(Snippet([ '' ])) - - def rst_heading(self, title, char, doubled=False): - tmp = [] - if doubled: - tmp.append(char * len(title)) - tmp.append(title) - tmp.append(char * len(title)) - self.vals.append(Snippet(tmp)) - - def snippet_relative(self, fn): - sn = Snippet.fromFile(os.path.join(self.base_dir, fn)) - self.vals.append(sn) - return sn - - def snippet_absolute(self, fn): - sn = Snippet.fromFile(fn) - self.vals.append(sn) - return sn - - def cpp_error(self, msg): - # XXX: assume no newlines etc - self.vals.append(Snippet([ '#error %s' % msg ])) - - def cpp_warning(self, msg): - # XXX: assume no newlines etc - # XXX: support compiler specific warning mechanisms - if self.use_cpp_warning: - # C preprocessor '#warning' is often supported - self.vals.append(Snippet([ '#warning %s' % msg ])) - else: - self.vals.append(Snippet([ '/* WARNING: %s */' % msg ])) - - def cpp_warning_or_error(self, msg, is_error=True): - if is_error: - self.cpp_error(msg) - else: - self.cpp_warning(msg) - - def chdr_comment_line(self, msg): - self.vals.append(Snippet([ '/* %s */' % msg ])) - - def chdr_block_heading(self, msg): - lines = [] - lines.append('') - lines.append('/*') - lines.append(' * ' + msg) - lines.append(' */') - lines.append('') - self.vals.append(Snippet(lines)) - - def join(self): - tmp = [] - for line in self.vals: - if not isinstance(line, object): - raise Exception('self.vals must be all snippets') - for x in line.lines: # x is a Snippet - tmp.append(x) - return '\n'.join(tmp) - - def fill_dependencies_for_snippets(self, idx_deps): - fill_dependencies_for_snippets(self.vals, idx_deps) - -# Insert missing define dependencies into index 'idx_deps' repeatedly -# until no unsatisfied dependencies exist. This is used to pull in -# the required DUK_F_xxx helper defines without pulling them all in. -# The resolution mechanism also ensures dependencies are pulled in the -# correct order, i.e. DUK_F_xxx helpers may depend on each other (as -# long as there are no circular dependencies). -# -# XXX: this can be simplified a lot -def fill_dependencies_for_snippets(snippets, idx_deps): - # graph[A] = [ B, ... ] <-> B, ... provide something A requires. - graph = {} - snlist = [] - resolved = [] # for printing only - - def add(sn): - if sn in snlist: - return # already present - snlist.append(sn) - - to_add = [] - - for k in sn.requires.keys(): - if assumed_provides.has_key(k): - continue - - found = False - for sn2 in snlist: - if sn2.provides.has_key(k): - if not graph.has_key(sn): - graph[sn] = [] - graph[sn].append(sn2) - found = True # at least one other node provides 'k' - - if not found: - #print('Resolving %r' % k) - resolved.append(k) - - # Find a header snippet which provides the missing define. - # Some DUK_F_xxx files provide multiple defines, so we don't - # necessarily know the snippet filename here. - - sn_req = None - for sn2 in helper_snippets: - if sn2.provides.has_key(k): - sn_req = sn2 - break - if sn_req is None: - print(repr(sn.lines)) - raise Exception('cannot resolve missing require: %r' % k) - - # Snippet may have further unresolved provides; add recursively - to_add.append(sn_req) - - if not graph.has_key(sn): - graph[sn] = [] - graph[sn].append(sn_req) - - for sn in to_add: - add(sn) - - # Add original snippets. This fills in the required nodes - # recursively. - for sn in snippets: - add(sn) - - # Figure out fill-ins by looking for snippets not in original - # list and without any unserialized dependent nodes. - handled = {} - for sn in snippets: - handled[sn] = True - keepgoing = True - while keepgoing: - keepgoing = False - for sn in snlist: - if handled.has_key(sn): - continue - - success = True - for dep in graph.get(sn, []): - if not handled.has_key(dep): - success = False - if success: - snippets.insert(idx_deps, sn) - idx_deps += 1 - snippets.insert(idx_deps, Snippet([ '' ])) - idx_deps += 1 - handled[sn] = True - keepgoing = True - break - - # XXX: detect and handle loops cleanly - for sn in snlist: - if handled.has_key(sn): - continue - print('UNHANDLED KEY') - print('PROVIDES: %r' % sn.provides) - print('REQUIRES: %r' % sn.requires) - print('\n'.join(sn.lines)) - -# print(repr(graph)) -# print(repr(snlist)) -# print('Resolved helper defines: %r' % resolved) -# print('Resolved %d helper defines' % len(resolved)) - -def serialize_snippet_list(snippets): - ret = [] - - emitted_provides = {} - for k in assumed_provides.keys(): - emitted_provides[k] = True - - for sn in snippets: - ret += sn.lines - for k in sn.provides.keys(): - emitted_provides[k] = True - for k in sn.requires.keys(): - if not emitted_provides.has_key(k): - # XXX: conditional warning, happens in some normal cases - #print('WARNING: define %r required, not provided so far' % k) - pass - - return '\n'.join(ret) - -def remove_duplicate_newlines(x): - ret = [] - empty = False - for line in x.split('\n'): - if line == '': - if empty: - pass - else: - ret.append(line) - empty = True - else: - empty = False - ret.append(line) - return '\n'.join(ret) - -def scan_use_defs(dirname): - global use_defs, use_defs_list - use_defs = {} - use_defs_list = [] - - for fn in os.listdir(dirname): - root, ext = os.path.splitext(fn) - if not root.startswith('DUK_USE_') or ext != '.yaml': - continue - with open(os.path.join(dirname, fn), 'rb') as f: - doc = yaml.load(f) - if doc.get('example', False): - continue - if doc.get('unimplemented', False): - print('WARNING: unimplemented: %s' % fn) - continue - dockeys = doc.keys() - for k in dockeys: - if not k in allowed_use_meta_keys: - print('WARNING: unknown key %s in metadata file %s' % (k, fn)) - for k in required_use_meta_keys: - if not k in dockeys: - print('WARNING: missing key %s in metadata file %s' % (k, fn)) - - use_defs[doc['define']] = doc - - keys = use_defs.keys() - keys.sort() - for k in keys: - use_defs_list.append(use_defs[k]) - -def scan_opt_defs(dirname): - global opt_defs, opt_defs_list - opt_defs = {} - opt_defs_list = [] - - for fn in os.listdir(dirname): - root, ext = os.path.splitext(fn) - if not root.startswith('DUK_OPT_') or ext != '.yaml': - continue - with open(os.path.join(dirname, fn), 'rb') as f: - doc = yaml.load(f) - if doc.get('example', False): - continue - if doc.get('unimplemented', False): - print('WARNING: unimplemented: %s' % fn) - continue - dockeys = doc.keys() - for k in dockeys: - if not k in allowed_opt_meta_keys: - print('WARNING: unknown key %s in metadata file %s' % (k, fn)) - for k in required_opt_meta_keys: - if not k in dockeys: - print('WARNING: missing key %s in metadata file %s' % (k, fn)) - - opt_defs[doc['define']] = doc - - keys = opt_defs.keys() - keys.sort() - for k in keys: - opt_defs_list.append(opt_defs[k]) - -def scan_use_tags(): - global use_tags, use_tags_list - use_tags = {} - - for doc in use_defs_list: - for tag in doc.get('tags', []): - use_tags[tag] = True - - use_tags_list = use_tags.keys() - use_tags_list.sort() - -def scan_tags_meta(filename): - global tags_meta - - with open(filename, 'rb') as f: - tags_meta = yaml.load(f) - -def scan_helper_snippets(dirname): # DUK_F_xxx snippets - global helper_snippets - helper_snippets = [] - - for fn in os.listdir(dirname): - if (fn[0:6] != 'DUK_F_'): - continue - #print('Autoscanning snippet: %s' % fn) - helper_snippets.append(Snippet.fromFile(os.path.join(dirname, fn))) - -def get_opt_defs(removed=True, deprecated=True, unused=True): - ret = [] - for doc in opt_defs_list: - # XXX: aware of target version - if removed == False and doc.get('removed', None) is not None: - continue - if deprecated == False and doc.get('deprecated', None) is not None: - continue - if unused == False and doc.get('unused', False) == True: - continue - ret.append(doc) - return ret - -def get_use_defs(removed=True, deprecated=True, unused=True): - ret = [] - for doc in use_defs_list: - # XXX: aware of target version - if removed == False and doc.get('removed', None) is not None: - continue - if deprecated == False and doc.get('deprecated', None) is not None: - continue - if unused == False and doc.get('unused', False) == True: - continue - ret.append(doc) - return ret - -def validate_platform_file(filename): - sn = Snippet.fromFile(filename) - - for req in platform_required_provides: - if req not in sn.provides: - raise Exception('Platform %s is missing %s' % (filename, req)) - - # DUK_SETJMP, DUK_LONGJMP, DUK_JMPBUF_TYPE are optional, fill-in - # provides if none defined. - -def validate_architecture_file(filename): - sn = Snippet.fromFile(filename) - - for req in architecture_required_provides: - if req not in sn.provides: - raise Exception('Architecture %s is missing %s' % (filename, req)) - - # Byte order and alignment defines are allowed to be missing, - # a fill-in will handle them. This is necessary because for - # some architecture byte order and/or alignment may vary between - # targets and may be software configurable. - - # XXX: require automatic detection to be signaled? - # e.g. define DUK_USE_ALIGN_BY -1 - # define DUK_USE_BYTE_ORDER -1 - -def validate_compiler_file(filename): - sn = Snippet.fromFile(filename) - - for req in compiler_required_provides: - if req not in sn.provides: - raise Exception('Compiler %s is missing %s' % (filename, req)) - -def get_tag_title(tag): - meta = tags_meta.get(tag, None) - if meta is None: - return tag - else: - return meta.get('title', tag) - -def get_tag_description(tag): - meta = tags_meta.get(tag, None) - if meta is None: - return None - else: - return meta.get('description', None) - -def get_tag_list_with_preferred_order(preferred): - tags = [] - - # Preferred tags first - for tag in preferred: - if tag not in tags: - tags.append(tag) - - # Remaining tags in alphabetic order - for tag in use_tags_list: - if tag not in tags: - tags.append(tag) - - #print('Effective tag order: %r' % tags) - return tags - -def rst_format(text): - # XXX: placeholder, need to decide on markup conventions for YAML files - ret = [] - for para in text.split('\n'): - if para == '': - continue - ret.append(para) - return '\n\n'.join(ret) - -def cint_encode(x): - if not isinstance(x, (int, long)): - raise Exception('invalid input: %r' % x) - - # XXX: unsigned constants? - if x > 0x7fffffff or x < -0x80000000: - return '%dLL' % x - elif x > 0x7fff or x < -0x8000: - return '%dL' % x - else: - return '%d' % x - -def cstr_encode(x): - if isinstance(x, unicode): - x = x.encode('utf-8') - if not isinstance(x, str): - raise Exception('invalid input: %r' % x) - - res = '"' - term = False - has_terms = False - for c in x: - if term: - # Avoid ambiguous hex escapes - res += '" "' - term = False - has_terms = True - o = ord(c) - if o < 0x20 or o > 0x7e or c in '"\\': - res += '\\x%02x' % o - term = True - else: - res += c - res += '"' - - if has_terms: - res = '(' + res + ')' - - return res - -# -# Autogeneration of option documentation -# - -# Shared helper to generate DUK_OPT_xxx and DUK_USE_xxx documentation. -# XXX: unfinished placeholder -def generate_option_documentation(opts, opt_list=None, rst_title=None, include_default=False): - ret = FileBuilder(use_cpp_warning=opts.use_cpp_warning) - - tags = get_tag_list_with_preferred_order(doc_tag_order) - - title = rst_title - ret.rst_heading(title, '=', doubled=True) - - handled = {} - - for tag in tags: - first = True - - for doc in opt_list: - if tag != doc['tags'][0]: # sort under primary tag - continue - dname = doc['define'] - desc = doc.get('description', None) - - if handled.has_key(dname): - raise Exception('define handled twice, should not happen: %r' % dname) - handled[dname] = True - - if first: # emit tag heading only if there are subsections - ret.empty() - ret.rst_heading(get_tag_title(tag), '=') - - tag_desc = get_tag_description(tag) - if tag_desc is not None: - ret.empty() - ret.line(rst_format(tag_desc)) - first = False - - ret.empty() - ret.rst_heading(dname, '-') - - if desc is not None: - ret.empty() - ret.line(rst_format(desc)) - - if include_default: - ret.empty() - ret.line('Default: ``' + str(doc['default']) + '``') # XXX: rst or other format - - for doc in opt_list: - dname = doc['define'] - if not handled.has_key(dname): - raise Exception('unhandled define (maybe missing from tags list?): %r' % dname) - - ret.empty() - return ret.join() - -def generate_feature_option_documentation(opts): - defs = get_opt_defs() - return generate_option_documentation(opts, opt_list=defs, rst_title='Duktape feature options', include_default=False) - -def generate_config_option_documentation(opts): - defs = get_use_defs() - return generate_option_documentation(opts, opt_list=defs, rst_title='Duktape config options', include_default=True) - -# -# Helpers for duk_config.h generation -# - -def get_forced_options(opts): - # Forced options, last occurrence wins (allows a base config file to be - # overridden by a more specific one). - forced_opts = {} - for val in opts.force_options_yaml: - doc = yaml.load(StringIO(val)) - for k in doc.keys(): - if use_defs.has_key(k): - pass # key is known - else: - print('WARNING: option override key %s not defined in metadata, ignoring' % k) - forced_opts[k] = doc[k] # shallow copy - - if len(forced_opts.keys()) > 0: - print('Overrides: %s' % json.dumps(forced_opts)) - - return forced_opts - -# Emit a default #define / #undef for an option based on -# a config option metadata node (parsed YAML doc). -def emit_default_from_config_meta(ret, doc, forced_opts, undef_done): - defname = doc['define'] - defval = forced_opts.get(defname, doc['default']) - - if defval == True: - ret.line('#define ' + defname) - elif defval == False: - if not undef_done: - ret.line('#undef ' + defname) - else: - # Default value is false, and caller has emitted - # an unconditional #undef, so don't emit a duplicate - pass - elif isinstance(defval, (int, long)): - # integer value - ret.line('#define ' + defname + ' ' + cint_encode(defval)) - elif isinstance(defval, (str, unicode)): - # verbatim value - ret.line('#define ' + defname + ' ' + defval) - elif isinstance(defval, dict): - if defval.has_key('verbatim'): - # verbatim text for the entire line - ret.line(defval['verbatim']) - elif defval.has_key('string'): - # C string value - ret.line('#define ' + defname + ' ' + cstr_encode(defval['string'])) - else: - raise Exception('unsupported value for option %s: %r' % (defname, defval)) - else: - raise Exception('unsupported value for option %s: %r' % (defname, defval)) - -# Add a header snippet for detecting presence of DUK_OPT_xxx feature -# options which will be removed in Duktape 2.x. -def add_legacy_feature_option_checks(opts, ret): - ret.chdr_block_heading('Checks for legacy feature options (DUK_OPT_xxx)') - ret.empty() - - defs = [] - for doc in get_opt_defs(): - if doc['define'] not in defs: - defs.append(doc['define']) - for doc in get_opt_defs(): - for dname in doc.get('related_feature_defines', []): - if dname not in defs: - defs.append(dname) - defs.sort() - - for optname in defs: - suggested = [] - for doc in get_use_defs(): - if optname in doc.get('related_feature_defines', []): - suggested.append(doc['define']) - ret.line('#if defined(%s)' % optname) - if len(suggested) > 0: - ret.cpp_warning_or_error('unsupported legacy feature option %s used, consider options: %s' % (optname, ', '.join(suggested)), opts.sanity_strict) - else: - ret.cpp_warning_or_error('unsupported legacy feature option %s used' % optname, opts.sanity_strict) - ret.line('#endif') - - ret.empty() - -# Add a header snippet for checking consistency of DUK_USE_xxx config -# options, e.g. inconsistent options, invalid option values. -def add_config_option_checks(opts, ret): - ret.chdr_block_heading('Checks for config option consistency (DUK_USE_xxx)') - ret.empty() - - defs = [] - for doc in get_use_defs(): - if doc['define'] not in defs: - defs.append(doc['define']) - defs.sort() - - for optname in defs: - doc = use_defs[optname] - dname = doc['define'] - - # XXX: more checks - - if doc.get('removed', None) is not None: - ret.line('#if defined(%s)' % dname) - ret.cpp_warning_or_error('unsupported config option used (option has been removed): %s' % dname, opts.sanity_strict) - ret.line('#endif') - elif doc.get('deprecated', None) is not None: - ret.line('#if defined(%s)' % dname) - ret.cpp_warning_or_error('unsupported config option used (option has been deprecated): %s' % dname, opts.sanity_strict) - ret.line('#endif') - - for req in doc.get('requires', []): - ret.line('#if defined(%s) && !defined(%s)' % (dname, req)) - ret.cpp_warning_or_error('config option %s requires option %s (which is missing)' % (dname, req), opts.sanity_strict) - ret.line('#endif') - - for req in doc.get('conflicts', []): - ret.line('#if defined(%s) && defined(%s)' % (dname, req)) - ret.cpp_warning_or_error('config option %s conflicts with option %s (which is also defined)' % (dname, req), opts.sanity_strict) - ret.line('#endif') - - ret.empty() - ret.snippet_relative('cpp_exception_sanity.h.in') - ret.empty() - -# Add a header snippet for providing a __OVERRIDE_DEFINES__ section. -def add_override_defines_section(opts, ret): - ret.empty() - ret.line('/*') - ret.line(' * You may add overriding #define/#undef directives below for') - ret.line(' * customization. You of course cannot un-#include or un-typedef') - ret.line(' * anything; these require direct changes above.') - ret.line(' */') - ret.empty() - ret.line('/* __OVERRIDE_DEFINES__ */') - ret.empty() - -# Add automatic DUK_OPT_XXX and DUK_OPT_NO_XXX handling for backwards -# compatibility with Duktape 1.2 and before. -def add_feature_option_handling(opts, ret, forced_opts, already_provided_keys): - ret.chdr_block_heading('Feature option handling') - - for doc in get_use_defs(removed=False, deprecated=False, unused=False): - # If a related feature option exists, it can be used to force - # enable/disable the target feature. If neither feature option - # (DUK_OPT_xxx or DUK_OPT_NO_xxx) is given, revert to default. - - config_define = doc['define'] - - feature_define = None - feature_no_define = None - inverted = False - if doc.has_key('feature_enables'): - feature_define = doc['feature_enables'] - elif doc.has_key('feature_disables'): - feature_define = doc['feature_disables'] - inverted = True - else: - pass - - if feature_define is not None: - feature_no_define = 'DUK_OPT_NO_' + feature_define[8:] - ret.line('#if defined(%s)' % feature_define) - if inverted: - ret.line('#undef %s' % config_define) - else: - ret.line('#define %s' % config_define) - ret.line('#elif defined(%s)' % feature_no_define) - if inverted: - ret.line('#define %s' % config_define) - else: - ret.line('#undef %s' % config_define) - ret.line('#else') - undef_done = False - - # For some options like DUK_OPT_PACKED_TVAL the default comes - # from platform definition. - if doc.get('feature_no_default', False): - print('Skip default for option %s' % config_define) - ret.line('/* Already provided above */') - elif already_provided_keys.has_key(config_define): - # This is a fallback in case config option metadata is wrong. - print('Skip default for option %s (already provided but not flagged in metadata!)' % config_define) - ret.line('/* Already provided above */') - else: - emit_default_from_config_meta(ret, doc, forced_opts, undef_done) - ret.line('#endif') - elif doc.has_key('feature_snippet'): - ret.lines(doc['feature_snippet']) - else: - pass - - ret.empty() - - ret.empty() - -# Development time helper: add DUK_ACTIVE which provides a runtime C string -# indicating what DUK_USE_xxx config options are active at run time. This -# is useful in genconfig development so that one can e.g. diff the active -# run time options of two headers. This is intended just for genconfig -# development and is not available in normal headers. -def add_duk_active_defines_macro(ret): - ret.chdr_block_heading('DUK_ACTIVE_DEFINES macro (development only)') - - idx = 0 - for doc in get_use_defs(): - defname = doc['define'] - - ret.line('#if defined(%s)' % defname) - ret.line('#define DUK_ACTIVE_DEF%d " %s"' % (idx, defname)) - ret.line('#else') - ret.line('#define DUK_ACTIVE_DEF%d ""' % idx) - ret.line('#endif') - - idx += 1 - - tmp = [] - for i in xrange(idx): - tmp.append('DUK_ACTIVE_DEF%d' % i) - - ret.line('#define DUK_ACTIVE_DEFINES ("Active: ["' + ' '.join(tmp) + ' " ]")') - -# -# duk_config.h generation -# - -# Generate a duk_config.h where platform, architecture, and compiler are -# all either autodetected or specified by user. -# -# Autodetection is based on a configured list of supported platforms, -# architectures, and compilers. For example, platforms.yaml defines the -# supported platforms and provides a helper define (DUK_F_xxx) to use for -# detecting that platform, and names the header snippet to provide the -# platform-specific definitions. Necessary dependencies (DUK_F_xxx) are -# automatically pulled in. -# -# Automatic "fill ins" are used for mandatory platform, architecture, and -# compiler defines which have a reasonable portable default. This reduces -# e.g. compiler-specific define count because there are a lot compiler -# macros which have a good default. -def generate_duk_config_header(opts, meta_dir): - ret = FileBuilder(base_dir=os.path.join(meta_dir, 'header-snippets'), \ - use_cpp_warning=opts.use_cpp_warning) - - forced_opts = get_forced_options(opts) - - platforms = None - with open(os.path.join(meta_dir, 'platforms.yaml'), 'rb') as f: - platforms = yaml.load(f) - architectures = None - with open(os.path.join(meta_dir, 'architectures.yaml'), 'rb') as f: - architectures = yaml.load(f) - compilers = None - with open(os.path.join(meta_dir, 'compilers.yaml'), 'rb') as f: - compilers = yaml.load(f) - - # XXX: indicate feature option support, sanity checks enabled, etc - # in general summary of options, perhaps genconfig command line? - - ret.line('/*') - ret.line(' * duk_config.h configuration header generated by genconfig.py.') - ret.line(' *') - ret.line(' * Git commit: %s' % opts.git_commit or 'n/a') - ret.line(' * Git describe: %s' % opts.git_describe or 'n/a') - ret.line(' * Git branch: %s' % opts.git_branch or 'n/a') - ret.line(' *') - if opts.platform is not None: - ret.line(' * Platform: ' + opts.platform) - else: - ret.line(' * Supported platforms:') - for platf in platforms['autodetect']: - ret.line(' * - %s' % platf.get('name', platf.get('check'))) - ret.line(' *') - if opts.architecture is not None: - ret.line(' * Architecture: ' + opts.architecture) - else: - ret.line(' * Supported architectures:') - for arch in architectures['autodetect']: - ret.line(' * - %s' % arch.get('name', arch.get('check'))) - ret.line(' *') - if opts.compiler is not None: - ret.line(' * Compiler: ' + opts.compiler) - else: - ret.line(' * Supported compilers:') - for comp in compilers['autodetect']: - ret.line(' * - %s' % comp.get('name', comp.get('check'))) - ret.line(' *') - ret.line(' */') - ret.empty() - ret.line('#if !defined(DUK_CONFIG_H_INCLUDED)') - ret.line('#define DUK_CONFIG_H_INCLUDED') - ret.empty() - - ret.chdr_block_heading('Intermediate helper defines') - - # DLL build affects visibility attributes on Windows but unfortunately - # cannot be detected automatically from preprocessor defines or such. - # DLL build status is hidden behind DUK_F_DLL_BUILD and there are two - # ways for that to be set: - # - # - Duktape 1.3 backwards compatible DUK_OPT_DLL_BUILD - # - Genconfig --dll option - ret.chdr_comment_line('DLL build detection') - ret.line('#if defined(DUK_OPT_DLL_BUILD)') - ret.line('#define DUK_F_DLL_BUILD') - ret.line('#elif defined(DUK_OPT_NO_DLL_BUILD)') - ret.line('#undef DUK_F_DLL_BUILD') - ret.line('#else') - if opts.dll: - ret.line('/* configured for DLL build */') - ret.line('#define DUK_F_DLL_BUILD') - else: - ret.line('/* not configured for DLL build */') - ret.line('#undef DUK_F_DLL_BUILD') - ret.line('#endif') - ret.empty() - - idx_deps = len(ret.vals) # position where to emit DUK_F_xxx dependencies - - # Feature selection, system include, Date provider - # Most #include statements are here - - if opts.platform is not None: - ret.chdr_block_heading('Platform: ' + opts.platform) - - ret.snippet_relative('platform_cppextras.h.in') - ret.empty() - - # XXX: better to lookup platforms metadata - include = 'platform_%s.h.in' % opts.platform - abs_fn = os.path.join(meta_dir, 'platforms', include) - validate_platform_file(abs_fn) - ret.snippet_absolute(abs_fn) - else: - ret.chdr_block_heading('Platform autodetection') - - ret.snippet_relative('platform_cppextras.h.in') - ret.empty() - - for idx, platf in enumerate(platforms['autodetect']): - check = platf.get('check', None) - include = platf['include'] - abs_fn = os.path.join(meta_dir, 'platforms', include) - - validate_platform_file(abs_fn) - - if idx == 0: - ret.line('#if defined(%s)' % check) - else: - if check is None: - ret.line('#else') - else: - ret.line('#elif defined(%s)' % check) - ret.line('/* --- %s --- */' % platf.get('name', '???')) - ret.snippet_absolute(abs_fn) - ret.line('#endif /* autodetect platform */') - - ret.empty() - ret.snippet_relative('platform_sharedincludes.h.in') - ret.empty() - - byteorder_provided_by_all = True # byteorder provided by all architecture files - alignment_provided_by_all = True # alignment provided by all architecture files - packedtval_provided_by_all = True # packed tval provided by all architecture files - - if opts.architecture is not None: - ret.chdr_block_heading('Architecture: ' + opts.architecture) - - # XXX: better to lookup architectures metadata - include = 'architecture_%s.h.in' % opts.architecture - abs_fn = os.path.join(meta_dir, 'architectures', include) - validate_architecture_file(abs_fn) - sn = ret.snippet_absolute(abs_fn) - if not sn.provides.get('DUK_USE_BYTEORDER', False): - byteorder_provided_by_all = False - if not sn.provides.get('DUK_USE_ALIGN_BY', False): - alignment_provided_by_all = False - if sn.provides.get('DUK_USE_PACKED_TVAL', False): - ret.line('#define DUK_F_PACKED_TVAL_PROVIDED') # signal to fillin - else: - packedtval_provided_by_all = False - else: - ret.chdr_block_heading('Architecture autodetection') - - for idx, arch in enumerate(architectures['autodetect']): - check = arch.get('check', None) - include = arch['include'] - abs_fn = os.path.join(meta_dir, 'architectures', include) - - validate_architecture_file(abs_fn) - - if idx == 0: - ret.line('#if defined(%s)' % check) - else: - if check is None: - ret.line('#else') - else: - ret.line('#elif defined(%s)' % check) - ret.line('/* --- %s --- */' % arch.get('name', '???')) - sn = ret.snippet_absolute(abs_fn) - if not sn.provides.get('DUK_USE_BYTEORDER', False): - byteorder_provided_by_all = False - if not sn.provides.get('DUK_USE_ALIGN_BY', False): - alignment_provided_by_all = False - if sn.provides.get('DUK_USE_PACKED_TVAL', False): - ret.line('#define DUK_F_PACKED_TVAL_PROVIDED') # signal to fillin - else: - packedtval_provided_by_all = False - ret.line('#endif /* autodetect architecture */') - - ret.empty() - - if opts.compiler is not None: - ret.chdr_block_heading('Compiler: ' + opts.compiler) - - # XXX: better to lookup compilers metadata - include = 'compiler_%s.h.in' % opts.compiler - abs_fn = os.path.join(meta_dir, 'compilers', include) - validate_compiler_file(abs_fn) - sn = ret.snippet_absolute(abs_fn) - else: - ret.chdr_block_heading('Compiler autodetection') - - for idx, comp in enumerate(compilers['autodetect']): - check = comp.get('check', None) - include = comp['include'] - abs_fn = os.path.join(meta_dir, 'compilers', include) - - validate_compiler_file(abs_fn) - - if idx == 0: - ret.line('#if defined(%s)' % check) - else: - if check is None: - ret.line('#else') - else: - ret.line('#elif defined(%s)' % check) - ret.line('/* --- %s --- */' % comp.get('name', '???')) - sn = ret.snippet_absolute(abs_fn) - ret.line('#endif /* autodetect compiler */') - - ret.empty() - - # DUK_F_UCLIBC is special because __UCLIBC__ is provided by an #include - # file, so the check must happen after platform includes. It'd be nice - # for this to be automatic (e.g. DUK_F_UCLIBC.h.in could indicate the - # dependency somehow). - - ret.snippet_absolute(os.path.join(meta_dir, 'helper-snippets', 'DUK_F_UCLIBC.h.in')) - ret.empty() - - # XXX: platform/compiler could provide types; if so, need some signaling - # defines like DUK_F_TYPEDEFS_DEFINED - - # Number types - if opts.c99_types_only: - ret.snippet_relative('types1.h.in') - ret.line('/* C99 types assumed */') - ret.snippet_relative('types_c99.h.in') - ret.empty() - else: - ret.snippet_relative('types1.h.in') - ret.line('#if defined(DUK_F_HAVE_INTTYPES)') - ret.line('/* C99 or compatible */') - ret.empty() - ret.snippet_relative('types_c99.h.in') - ret.empty() - ret.line('#else /* C99 types */') - ret.empty() - ret.snippet_relative('types_legacy.h.in') - ret.empty() - ret.line('#endif /* C99 types */') - ret.empty() - ret.snippet_relative('types2.h.in') - ret.empty() - ret.snippet_relative('64bitops.h.in') - ret.empty() - - # Platform, architecture, compiler fillins. These are after all - # detection so that e.g. DUK_SPRINTF() can be provided by platform - # or compiler before trying a fill-in. - - ret.chdr_block_heading('Fill-ins for platform, architecture, and compiler') - - ret.snippet_relative('platform_fillins.h.in') - ret.empty() - ret.snippet_relative('architecture_fillins.h.in') - if not byteorder_provided_by_all: - ret.empty() - ret.snippet_relative('byteorder_fillin.h.in') - if not alignment_provided_by_all: - ret.empty() - ret.snippet_relative('alignment_fillin.h.in') - ret.empty() - ret.snippet_relative('compiler_fillins.h.in') - ret.empty() - ret.snippet_relative('inline_workaround.h.in') - ret.empty() - if not packedtval_provided_by_all: - ret.empty() - ret.snippet_relative('packed_tval_fillin.h.in') - - # Object layout - ret.snippet_relative('object_layout.h.in') - ret.empty() - - # Detect and reject 'fast math' - ret.snippet_relative('reject_fast_math.h.in') - ret.empty() - - # Automatic DUK_OPT_xxx feature option handling - if opts.support_feature_options: - print('Autogenerating feature option (DUK_OPT_xxx) support') - tmp = Snippet(ret.join().split('\n')) - add_feature_option_handling(opts, ret, forced_opts, tmp.provides) - - # Emit forced options. If a corresponding option is already defined - # by a snippet above, #undef it first. - - tmp = Snippet(ret.join().split('\n')) - first_forced = True - for doc in get_use_defs(removed=not opts.omit_removed_config_options, - deprecated=not opts.omit_deprecated_config_options, - unused=not opts.omit_unused_config_options): - defname = doc['define'] - - if not forced_opts.has_key(defname): - continue - - if not doc.has_key('default'): - raise Exception('config option %s is missing default value' % defname) - - if first_forced: - ret.chdr_block_heading('Forced options') - first_forced = False - - undef_done = False - if tmp.provides.has_key(defname): - ret.line('#undef ' + defname) - undef_done = True - - emit_default_from_config_meta(ret, doc, forced_opts, undef_done) - - ret.empty() - - # If manually-edited snippets don't #define or #undef a certain - # config option, emit a default value here. This is useful to - # fill-in for new config options not covered by manual snippets - # (which is intentional). - - tmp = Snippet(ret.join().split('\n')) - need = {} - for doc in get_use_defs(removed=False): - need[doc['define']] = True - for k in tmp.provides.keys(): - if need.has_key(k): - del need[k] - need_keys = sorted(need.keys()) - - if len(need_keys) > 0: - ret.chdr_block_heading('Autogenerated defaults') - - for k in need_keys: - #print('config option %s not covered by manual snippets, emitting default automatically' % k) - emit_default_from_config_meta(ret, use_defs[k], {}, False) - - ret.empty() - - ret.snippet_relative('custom_header.h.in') - ret.empty() - - if len(opts.fixup_header_lines) > 0: - ret.chdr_block_heading('Fixups') - for line in opts.fixup_header_lines: - ret.line(line) - ret.empty() - - add_override_defines_section(opts, ret) - - # Date provider snippet is after custom header and overrides, so that - # the user may define e.g. DUK_USE_DATE_NOW_GETTIMEOFDAY in their - # custom header. - ret.snippet_relative('date_provider.h.in') - ret.empty() - - ret.fill_dependencies_for_snippets(idx_deps) - - if opts.emit_legacy_feature_check: - add_legacy_feature_option_checks(opts, ret) - if opts.emit_config_sanity_check: - add_config_option_checks(opts, ret) - if opts.add_active_defines_macro: - add_duk_active_defines_macro(ret) - - # Derived defines (DUK_USE_INTEGER_LE, etc) from DUK_USE_BYTEORDER. - # Duktape internals currently rely on the derived defines. This is - # after sanity checks because the derived defines are marked removed. - ret.snippet_relative('byteorder_derived.h.in') - ret.empty() - - ret.line('#endif /* DUK_CONFIG_H_INCLUDED */') - ret.empty() # for trailing newline - return remove_duplicate_newlines(ret.join()) - -# -# Main -# - -def main(): - # Forced options from multiple sources are gathered into a shared list - # so that the override order remains the same as on the command line. - force_options_yaml = [] - def add_force_option_yaml(option, opt, value, parser): - # XXX: check that YAML parses - force_options_yaml.append(value) - def add_force_option_file(option, opt, value, parser): - # XXX: check that YAML parses - with open(value, 'rb') as f: - force_options_yaml.append(f.read()) - def add_force_option_define(option, opt, value, parser): - tmp = value.split('=') - if len(tmp) == 1: - doc = { tmp[0]: True } - elif len(tmp) == 2: - doc = { tmp[0]: tmp[1] } - else: - raise Exception('invalid option value: %r' % value) - force_options_yaml.append(yaml.safe_dump(doc)) - def add_force_option_undefine(option, opt, value, parser): - tmp = value.split('=') - if len(tmp) == 1: - doc = { tmp[0]: False } - else: - raise Exception('invalid option value: %r' % value) - force_options_yaml.append(yaml.safe_dump(doc)) - - fixup_header_lines = [] - def add_fixup_header_line(option, opt, value, parser): - fixup_header_lines.append(value) - def add_fixup_header_file(option, opt, value, parser): - with open(value, 'rb') as f: - for line in f: - if line[-1] == '\n': - line = line[:-1] - fixup_header_lines.append(line) - - commands = [ - 'duk-config-header', - 'feature-documentation', - 'config-documentation' - ] - parser = optparse.OptionParser( - usage='Usage: %prog [options] COMMAND', - description='Generate a duk_config.h or config option documentation based on config metadata.', - epilog='COMMAND can be one of: ' + ', '.join(commands) + '.' - ) - - parser.add_option('--metadata', dest='metadata', default=None, help='metadata directory or metadata tar.gz file') - parser.add_option('--output', dest='output', default=None, help='output filename for C header or RST documentation file') - parser.add_option('--platform', dest='platform', default=None, help='platform (for "barebones-header" command)') - parser.add_option('--compiler', dest='compiler', default=None, help='compiler (for "barebones-header" command)') - parser.add_option('--architecture', dest='architecture', default=None, help='architecture (for "barebones-header" command)') - parser.add_option('--c99-types-only', dest='c99_types_only', action='store_true', default=False, help='assume C99 types, no legacy type detection') - parser.add_option('--dll', dest='dll', action='store_true', default=False, help='dll build of Duktape, affects symbol visibility macros especially on Windows') - parser.add_option('--support-feature-options', dest='support_feature_options', action='store_true', default=False, help='support DUK_OPT_xxx feature options in duk_config.h') - parser.add_option('--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help='emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)') - parser.add_option('--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help='emit preprocessor checks for config option consistency (DUK_OPT_xxx)') - parser.add_option('--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers') - parser.add_option('--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers') - parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers') - parser.add_option('--add-active-defines-macro', dest='add_active_defines_macro', action='store_true', default=False, help='add DUK_ACTIVE_DEFINES macro, for development only') - parser.add_option('--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"') - parser.add_option('-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"') - parser.add_option('--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"') - parser.add_option('-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"') - parser.add_option('--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help='force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")') - parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides') - parser.add_option('--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help='C header snippet file(s) to be appended to generated header, useful for manual option fixups') - parser.add_option('--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help='C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")') - parser.add_option('--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues') - parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate') - parser.add_option('--git-commit', dest='git_commit', default=None, help='git commit hash to be included in header comments') - parser.add_option('--git-describe', dest='git_describe', default=None, help='git describe string to be included in header comments') - parser.add_option('--git-branch', dest='git_branch', default=None, help='git branch string to be included in header comments') - (opts, args) = parser.parse_args() - - meta_dir = opts.metadata - if opts.metadata is None: - if os.path.isfile(os.path.join('.', 'genconfig_metadata.tar.gz')): - opts.metadata = 'genconfig_metadata.tar.gz' - elif os.path.isdir(os.path.join('.', 'config-options')): - opts.metadata = '.' - - if opts.metadata is not None and os.path.isdir(opts.metadata): - meta_dir = opts.metadata - metadata_src_text = 'Using metadata directory: %r' % meta_dir - elif opts.metadata is not None and os.path.isfile(opts.metadata) and tarfile.is_tarfile(opts.metadata): - meta_dir = get_auto_delete_tempdir() - tar = tarfile.open(name=opts.metadata, mode='r:*') - tar.extractall(path=meta_dir) - metadata_src_text = 'Using metadata tar file %r, unpacked to directory: %r' % (opts.metadata, meta_dir) - else: - raise Exception('metadata source must be a directory or a tar.gz file') - - scan_helper_snippets(os.path.join(meta_dir, 'helper-snippets')) - scan_use_defs(os.path.join(meta_dir, 'config-options')) - scan_opt_defs(os.path.join(meta_dir, 'feature-options')) - scan_use_tags() - scan_tags_meta(os.path.join(meta_dir, 'tags.yaml')) - print('%s, scanned %d DUK_OPT_xxx, %d DUK_USE_XXX, %d helper snippets' % \ - (metadata_src_text, len(opt_defs.keys()), len(use_defs.keys()), len(helper_snippets))) - #print('Tags: %r' % use_tags_list) - - if len(args) == 0: - raise Exception('missing command') - cmd = args[0] - - # Compatibility with Duktape 1.3 - if cmd == 'autodetect-header': - cmd = 'duk-config-header' - if cmd == 'barebones-header': - cmd = 'duk-config-header' - - if cmd == 'duk-config-header': - # Generate a duk_config.h header with platform, compiler, and - # architecture either autodetected (default) or specified by - # user. Support for autogenerated DUK_OPT_xxx flags is also - # selected by user. - result = generate_duk_config_header(opts, meta_dir) - with open(opts.output, 'wb') as f: - f.write(result) - elif cmd == 'feature-documentation': - result = generate_feature_option_documentation(opts) - with open(opts.output, 'wb') as f: - f.write(result) - elif cmd == 'config-documentation': - result = generate_config_option_documentation(opts) - with open(opts.output, 'wb') as f: - f.write(result) - else: - raise Exception('invalid command: %r' % cmd) - -if __name__ == '__main__': - main() diff --git a/debugger/merge_debug_meta.py b/debugger/merge_debug_meta.py deleted file mode 100644 index ba9b38e3..00000000 --- a/debugger/merge_debug_meta.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python2 -# -# Merge debugger YAML metadata files and output a merged JSON metadata file. -# - -import os, sys, json, yaml -import optparse - -if __name__ == '__main__': - parser = optparse.OptionParser() - parser.add_option('--output', dest='output', default=None, help='output JSON filename') - parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names') - parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands') - parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes') - parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes') - (opts, args) = parser.parse_args() - - res = {} - def merge(fn): - with open(fn, 'rb') as f: - doc = yaml.load(f) - for k in doc.keys(): - res[k] = doc[k] - - merge(opts.class_names) - merge(opts.debug_commands) - merge(opts.debug_errors) - merge(opts.opcodes) - - with open(opts.output, 'wb') as f: - f.write(json.dumps(res, indent=4) + '\n') - print('Wrote merged debugger metadata to ' + str(opts.output)) diff --git a/debugger/util/heapjson_convert.py b/debugger/util/heapjson_convert.py index a3310274..cea14d2a 100644 --- a/debugger/util/heapjson_convert.py +++ b/debugger/util/heapjson_convert.py @@ -13,40 +13,40 @@ add_proto = False add_props = True def main(): - f = open(sys.argv[1], 'rb') - heapdump = json.loads(f.read()) - f.close() - - objs = {} - for obj in heapdump['heapObjects']: - objs[obj['ptr']['HEAPPTR']] = obj - - f = sys.stdout - - def is_obj(x): - if not objs.has_key(x): - return False - return objs[x]['type'] == 2 - - def emit(x, y): - # XXX: only emit edges between objects (not strings or buffers) - if is_obj(y): - f.write('h%s,h%s\n' % (x, y)) - - #f.write('digraph heap {\n') - f.write('Source,Target\n') - for obj in heapdump['heapObjects']: - x = obj['ptr']['HEAPPTR'] - if add_proto and obj.has_key('proto'): - #f.write('h%s -> h%s;\n' % (x, obj['proto']['HEAPPTR'])) - f.write('h%s,h%s\n' % (x, obj['proto']['HEAPPTR'])) - if add_props and obj.has_key('props'): - for p in obj['props']: - if p.has_key('key'): - emit(x, p['key']['HEAPPTR']) - if p.has_key('value') and isinstance(p['value'], dict) and p['value'].has_key('HEAPPTR'): - emit(x, p['value']['HEAPPTR']) - #f.write('}\n') + f = open(sys.argv[1], 'rb') + heapdump = json.loads(f.read()) + f.close() + + objs = {} + for obj in heapdump['heapObjects']: + objs[obj['ptr']['HEAPPTR']] = obj + + f = sys.stdout + + def is_obj(x): + if not objs.has_key(x): + return False + return objs[x]['type'] == 2 + + def emit(x, y): + # XXX: only emit edges between objects (not strings or buffers) + if is_obj(y): + f.write('h%s,h%s\n' % (x, y)) + + #f.write('digraph heap {\n') + f.write('Source,Target\n') + for obj in heapdump['heapObjects']: + x = obj['ptr']['HEAPPTR'] + if add_proto and obj.has_key('proto'): + #f.write('h%s -> h%s;\n' % (x, obj['proto']['HEAPPTR'])) + f.write('h%s,h%s\n' % (x, obj['proto']['HEAPPTR'])) + if add_props and obj.has_key('props'): + for p in obj['props']: + if p.has_key('key'): + emit(x, p['key']['HEAPPTR']) + if p.has_key('value') and isinstance(p['value'], dict) and p['value'].has_key('HEAPPTR'): + emit(x, p['value']['HEAPPTR']) + #f.write('}\n') if __name__ == '__main__': - main() + main() diff --git a/examples/alloc-logging/log2gnuplot.py b/examples/alloc-logging/log2gnuplot.py index 0528259d..2201f9d0 100644 --- a/examples/alloc-logging/log2gnuplot.py +++ b/examples/alloc-logging/log2gnuplot.py @@ -12,30 +12,30 @@ import os import sys def main(): - allocated = 0 + allocated = 0 - for line in sys.stdin: - line = line.strip() - parts = line.split(' ') + for line in sys.stdin: + line = line.strip() + parts = line.split(' ') - # A ptr/NULL/FAIL size - # F ptr/NULL size - # R ptr/NULL oldsize ptr/NULL/FAIL newsize + # A ptr/NULL/FAIL size + # F ptr/NULL size + # R ptr/NULL oldsize ptr/NULL/FAIL newsize - # Note: ajduk doesn't log oldsize (uses -1 instead) + # Note: ajduk doesn't log oldsize (uses -1 instead) - if parts[0] == 'A': - if parts[1] != 'NULL' and parts[1] != 'FAIL': - allocated += long(parts[2]) - elif parts[0] == 'F': - allocated -= long(parts[2]) - elif parts[0] == 'R': - allocated -= long(parts[2]) - if parts[3] != 'NULL' and parts[3] != 'FAIL': - allocated += long(parts[4]) - print(allocated) + if parts[0] == 'A': + if parts[1] != 'NULL' and parts[1] != 'FAIL': + allocated += long(parts[2]) + elif parts[0] == 'F': + allocated -= long(parts[2]) + elif parts[0] == 'R': + allocated -= long(parts[2]) + if parts[3] != 'NULL' and parts[3] != 'FAIL': + allocated += long(parts[4]) + print(allocated) - print(allocated) + print(allocated) if __name__ == '__main__': - main() + main() diff --git a/examples/alloc-logging/pool_simulator.py b/examples/alloc-logging/pool_simulator.py index 03032226..a0e2d7ef 100644 --- a/examples/alloc-logging/pool_simulator.py +++ b/examples/alloc-logging/pool_simulator.py @@ -31,42 +31,42 @@ import optparse # def dprint(x): - sys.stderr.write('%s\n' % x) - sys.stderr.flush() + sys.stderr.write('%s\n' % x) + sys.stderr.flush() def readJson(fn): - f = open(fn, 'rb') - d = f.read() - f.close() - return json.loads(d) + f = open(fn, 'rb') + d = f.read() + f.close() + return json.loads(d) def readFile(fn): - f = open(fn, 'rb') - d = f.read() - f.close() - return d + f = open(fn, 'rb') + d = f.read() + f.close() + return d def writeJson(fn, val): - f = open(fn, 'wb') - f.write(json.dumps(val, indent=4, ensure_ascii=True, sort_keys=True)) - f.close() + f = open(fn, 'wb') + f.write(json.dumps(val, indent=4, ensure_ascii=True, sort_keys=True)) + f.close() def writeFile(fn, val): - f = open(fn, 'wb') - f.write(val) - f.close() + f = open(fn, 'wb') + f.write(val) + f.close() # Clone a pool config (state), with all runtime fields intact def clonePool(pool): - return json.loads(json.dumps(pool)) + return json.loads(json.dumps(pool)) # Clone a pool config, but clean it of any runtime fields def clonePoolCleaned(pool): - p = json.loads(json.dumps(pool)) - for k in [ 'entries', 'ajs_use', 'ajs_hwm', 'ajs_min', 'ajs_max' ]: - if p.has_key(k): - del p[k] - return p + p = json.loads(json.dumps(pool)) + for k in [ 'entries', 'ajs_use', 'ajs_hwm', 'ajs_min', 'ajs_max' ]: + if p.has_key(k): + del p[k] + return p #--------------------------------------------------------------------------- # @@ -82,232 +82,232 @@ nextPtr = 1 HUGE = 0x100000000 # used for min() class AllocFailedException(Exception): - pass + pass class PoolSimulator: - state = None - config = None - allow_borrow = True # matches ajs_heap.c - auto_extend = True # for getting hwm w/o borrowing - ignore_zero_alloc = False # matches ajs_heap.c - - def __init__(self, config, borrow=True, extend=False): - global nextPtr - - self.allow_borrow = borrow - self.auto_extend = extend - self.state = { 'pools': [] } - self.config = json.loads(json.dumps(config)) # verify and clone - - for cfg in config['pools']: - st = json.loads(json.dumps(cfg)) - st['entries'] = [] - st['ajs_use'] = 0 # entries in use - st['ajs_hwm'] = 0 # max entries in use - #st['ajs_min'] = None # min alloc size - #st['ajs_max'] = None # max alloc size - st['heap_index'] = st.get('heap_index', 0) # ajs specific - for i in xrange(cfg['count']): - ent = { 'alloc_size': None, - 'entry_size': st['size'], - 'borrowed': False } # free - ent['pointer'] = nextPtr - nextPtr += 1 - st['entries'].append(ent) - self.state['pools'].append(st) - - def alloc(self, size): - global nextPtr - - #print('alloc %d' % size) - - if size == 0 and self.ignore_zero_alloc: - return nullPtr - - borrowed = False - - def alloc_match(e): - e['alloc_size'] = size - e['borrowed'] = borrowed - p['ajs_use'] += 1 - p['ajs_hwm'] = max(p['ajs_use'], p['ajs_hwm']) - p['ajs_min'] = min(p.get('ajs_min', HUGE), size) - p['ajs_max'] = max(p.get('ajs_max', 0), size) - return e['pointer'] - - for p in self.state['pools']: - if p['size'] < size: - continue - for e in p['entries']: - if e['alloc_size'] is not None: - continue - return alloc_match(e) - - # Auto extend for measuring pool hwm without borrowing - if self.auto_extend: - ent = { 'alloc_size': None, - 'entry_size': p['size'], - 'borrowed': False, - 'extended': True } - ent['pointer'] = nextPtr - nextPtr += 1 - p['entries'].append(ent) - return alloc_match(ent) - - if not self.allow_borrow or not p['borrow']: - raise AllocFailedException('alloc failure for size %d: pool full, no borrow' % size) - borrowed = True - - raise AllocFailedException('alloc failure for size %d: went through all pools, no space' % size) - - def realloc(self, ptr, size): - #print('realloc %d %d' % (ptr, size)) - - if ptr == nullPtr: - return self.alloc(size) - - if size == 0: - self.free(ptr) - return nullPtr - - # ptr != NULL and size != 0 here - - for idx in xrange(len(self.state['pools'])): - p = self.state['pools'][idx] - prev_p = None - if idx >= 0: - prev_p = self.state['pools'][idx - 1] - - for e in p['entries']: - if e['pointer'] == ptr: - if e['alloc_size'] is None: - raise AllocFailedException('realloc failure for pointer %d: entry not allocated (double free)' % ptr) - - fits_current = (size <= p['size']) - fits_previous = (prev_p is not None and size <= prev_p['size']) - - if fits_current and not fits_previous: - # New alloc size fits current pool and won't fit into - # previous pool (so it could be shrunk). - - p['ajs_max'] = max(p.get('ajs_max', 0), size) - return ptr - - # Reallocate entry (smaller or larger). - # Note: when shrinking, ajs_heap.c doesn't make sure - # there's actually a free entry in the smaller pool. - # This affects only some corner cases, but match - # that behavior here. - - newPtr = self.alloc(size) - self.free(ptr) - return newPtr - - raise AllocFailedException('free failure for pointer %d: cannot find pointer' % ptr) - - def free(self, ptr): - #print('free %d' % ptr) - - if ptr == nullPtr: - return - - for p in self.state['pools']: - for e in p['entries']: - if e['pointer'] == ptr: - if e['alloc_size'] is None: - raise AllocFailedException('free failure for pointer %d: entry not allocated (double free)' % ptr) - e['alloc_size'] = None - e['borrowed'] = False - p['ajs_use'] -= 1 - return - - raise AllocFailedException('free failure for pointer %d: cannot find pointer' % ptr) - - # Get a list of pool byte sizes. - def getSizes(self): - res = [] - for p in self.state['pools']: - res.append(p['size']) - return res - - # Get stats from current allocation state. - def stats(self): - alloc_bytes = 0 - waste_bytes = 0 - free_bytes = 0 - - ajs_hwm_bytes = 0 # these correspond to runtime values from ajs_heap.c - ajs_use_bytes = 0 # and are approximate - ajs_waste_bytes = 0 - - by_pool = [] - - for p in self.state['pools']: - alloc_bytes_pool = 0 - waste_bytes_pool = 0 - free_bytes_pool = 0 - - for e in p['entries']: - if e['alloc_size'] is None: - free_bytes_pool += e['entry_size'] - else: - alloc_bytes_pool += e['alloc_size'] - waste_bytes_pool += e['entry_size'] - e['alloc_size'] - - ajs_use_count_pool = p['ajs_use'] - ajs_hwm_count_pool = p['ajs_hwm'] - ajs_min_bytes_pool = p.get('ajs_min', 0) - ajs_max_bytes_pool = p.get('ajs_max', 0) - ajs_hwm_bytes_pool = p['ajs_hwm'] * p['size'] - ajs_use_bytes_pool = p['ajs_use'] * p['size'] - ajs_waste_bytes_pool = p['ajs_hwm'] * (p['size'] - p.get('ajs_max', 0)) - - by_pool.append({ - 'size': p['size'], - 'alloc': alloc_bytes_pool, - 'waste': waste_bytes_pool, - 'free': free_bytes_pool, - 'ajs_use_count': ajs_use_count_pool, - 'ajs_hwm_count': ajs_hwm_count_pool, - 'ajs_min_bytes': ajs_min_bytes_pool, - 'ajs_max_bytes': ajs_max_bytes_pool, - 'ajs_hwm_bytes': ajs_hwm_bytes_pool, - 'ajs_use_bytes': ajs_use_bytes_pool, - 'ajs_waste_bytes': ajs_waste_bytes_pool - }) - - alloc_bytes += alloc_bytes_pool - waste_bytes += waste_bytes_pool - free_bytes += free_bytes_pool - - ajs_hwm_bytes += ajs_hwm_bytes_pool - ajs_use_bytes += ajs_use_bytes_pool - ajs_waste_bytes += ajs_waste_bytes_pool - - return { - 'alloc_bytes': alloc_bytes, - 'waste_bytes': waste_bytes, - 'free_bytes': free_bytes, - - 'ajs_hwm_bytes': ajs_hwm_bytes, - 'ajs_use_bytes': ajs_use_bytes, - 'ajs_waste_bytes': ajs_waste_bytes, - - 'byPool': by_pool - } - - # Get "tight" pool config based on hwm of each pool size. - def getTightHwmConfig(self): - pools = [] - cfg = { 'pools': pools } - total_bytes = 0 - for p in self.state['pools']: - pool = clonePoolCleaned(p) - pool['count'] = p['ajs_hwm'] - pools.append(pool) - total_bytes += pool['size'] * pool['count'] - cfg['total_bytes'] = total_bytes - return cfg + state = None + config = None + allow_borrow = True # matches ajs_heap.c + auto_extend = True # for getting hwm w/o borrowing + ignore_zero_alloc = False # matches ajs_heap.c + + def __init__(self, config, borrow=True, extend=False): + global nextPtr + + self.allow_borrow = borrow + self.auto_extend = extend + self.state = { 'pools': [] } + self.config = json.loads(json.dumps(config)) # verify and clone + + for cfg in config['pools']: + st = json.loads(json.dumps(cfg)) + st['entries'] = [] + st['ajs_use'] = 0 # entries in use + st['ajs_hwm'] = 0 # max entries in use + #st['ajs_min'] = None # min alloc size + #st['ajs_max'] = None # max alloc size + st['heap_index'] = st.get('heap_index', 0) # ajs specific + for i in xrange(cfg['count']): + ent = { 'alloc_size': None, + 'entry_size': st['size'], + 'borrowed': False } # free + ent['pointer'] = nextPtr + nextPtr += 1 + st['entries'].append(ent) + self.state['pools'].append(st) + + def alloc(self, size): + global nextPtr + + #print('alloc %d' % size) + + if size == 0 and self.ignore_zero_alloc: + return nullPtr + + borrowed = False + + def alloc_match(e): + e['alloc_size'] = size + e['borrowed'] = borrowed + p['ajs_use'] += 1 + p['ajs_hwm'] = max(p['ajs_use'], p['ajs_hwm']) + p['ajs_min'] = min(p.get('ajs_min', HUGE), size) + p['ajs_max'] = max(p.get('ajs_max', 0), size) + return e['pointer'] + + for p in self.state['pools']: + if p['size'] < size: + continue + for e in p['entries']: + if e['alloc_size'] is not None: + continue + return alloc_match(e) + + # Auto extend for measuring pool hwm without borrowing + if self.auto_extend: + ent = { 'alloc_size': None, + 'entry_size': p['size'], + 'borrowed': False, + 'extended': True } + ent['pointer'] = nextPtr + nextPtr += 1 + p['entries'].append(ent) + return alloc_match(ent) + + if not self.allow_borrow or not p['borrow']: + raise AllocFailedException('alloc failure for size %d: pool full, no borrow' % size) + borrowed = True + + raise AllocFailedException('alloc failure for size %d: went through all pools, no space' % size) + + def realloc(self, ptr, size): + #print('realloc %d %d' % (ptr, size)) + + if ptr == nullPtr: + return self.alloc(size) + + if size == 0: + self.free(ptr) + return nullPtr + + # ptr != NULL and size != 0 here + + for idx in xrange(len(self.state['pools'])): + p = self.state['pools'][idx] + prev_p = None + if idx >= 0: + prev_p = self.state['pools'][idx - 1] + + for e in p['entries']: + if e['pointer'] == ptr: + if e['alloc_size'] is None: + raise AllocFailedException('realloc failure for pointer %d: entry not allocated (double free)' % ptr) + + fits_current = (size <= p['size']) + fits_previous = (prev_p is not None and size <= prev_p['size']) + + if fits_current and not fits_previous: + # New alloc size fits current pool and won't fit into + # previous pool (so it could be shrunk). + + p['ajs_max'] = max(p.get('ajs_max', 0), size) + return ptr + + # Reallocate entry (smaller or larger). + # Note: when shrinking, ajs_heap.c doesn't make sure + # there's actually a free entry in the smaller pool. + # This affects only some corner cases, but match + # that behavior here. + + newPtr = self.alloc(size) + self.free(ptr) + return newPtr + + raise AllocFailedException('free failure for pointer %d: cannot find pointer' % ptr) + + def free(self, ptr): + #print('free %d' % ptr) + + if ptr == nullPtr: + return + + for p in self.state['pools']: + for e in p['entries']: + if e['pointer'] == ptr: + if e['alloc_size'] is None: + raise AllocFailedException('free failure for pointer %d: entry not allocated (double free)' % ptr) + e['alloc_size'] = None + e['borrowed'] = False + p['ajs_use'] -= 1 + return + + raise AllocFailedException('free failure for pointer %d: cannot find pointer' % ptr) + + # Get a list of pool byte sizes. + def getSizes(self): + res = [] + for p in self.state['pools']: + res.append(p['size']) + return res + + # Get stats from current allocation state. + def stats(self): + alloc_bytes = 0 + waste_bytes = 0 + free_bytes = 0 + + ajs_hwm_bytes = 0 # these correspond to runtime values from ajs_heap.c + ajs_use_bytes = 0 # and are approximate + ajs_waste_bytes = 0 + + by_pool = [] + + for p in self.state['pools']: + alloc_bytes_pool = 0 + waste_bytes_pool = 0 + free_bytes_pool = 0 + + for e in p['entries']: + if e['alloc_size'] is None: + free_bytes_pool += e['entry_size'] + else: + alloc_bytes_pool += e['alloc_size'] + waste_bytes_pool += e['entry_size'] - e['alloc_size'] + + ajs_use_count_pool = p['ajs_use'] + ajs_hwm_count_pool = p['ajs_hwm'] + ajs_min_bytes_pool = p.get('ajs_min', 0) + ajs_max_bytes_pool = p.get('ajs_max', 0) + ajs_hwm_bytes_pool = p['ajs_hwm'] * p['size'] + ajs_use_bytes_pool = p['ajs_use'] * p['size'] + ajs_waste_bytes_pool = p['ajs_hwm'] * (p['size'] - p.get('ajs_max', 0)) + + by_pool.append({ + 'size': p['size'], + 'alloc': alloc_bytes_pool, + 'waste': waste_bytes_pool, + 'free': free_bytes_pool, + 'ajs_use_count': ajs_use_count_pool, + 'ajs_hwm_count': ajs_hwm_count_pool, + 'ajs_min_bytes': ajs_min_bytes_pool, + 'ajs_max_bytes': ajs_max_bytes_pool, + 'ajs_hwm_bytes': ajs_hwm_bytes_pool, + 'ajs_use_bytes': ajs_use_bytes_pool, + 'ajs_waste_bytes': ajs_waste_bytes_pool + }) + + alloc_bytes += alloc_bytes_pool + waste_bytes += waste_bytes_pool + free_bytes += free_bytes_pool + + ajs_hwm_bytes += ajs_hwm_bytes_pool + ajs_use_bytes += ajs_use_bytes_pool + ajs_waste_bytes += ajs_waste_bytes_pool + + return { + 'alloc_bytes': alloc_bytes, + 'waste_bytes': waste_bytes, + 'free_bytes': free_bytes, + + 'ajs_hwm_bytes': ajs_hwm_bytes, + 'ajs_use_bytes': ajs_use_bytes, + 'ajs_waste_bytes': ajs_waste_bytes, + + 'byPool': by_pool + } + + # Get "tight" pool config based on hwm of each pool size. + def getTightHwmConfig(self): + pools = [] + cfg = { 'pools': pools } + total_bytes = 0 + for p in self.state['pools']: + pool = clonePoolCleaned(p) + pool['count'] = p['ajs_hwm'] + pools.append(pool) + total_bytes += pool['size'] * pool['count'] + cfg['total_bytes'] = total_bytes + return cfg #--------------------------------------------------------------------------- # @@ -317,140 +317,140 @@ class PoolSimulator: xIndex = 0 def processAllocLog(ps, f_log, out_dir, throw_on_oom=True, emit_files=True): - # map native pointer to current simulator pointer - ptrmap = {} - - def writeFile(fn, line): - f = open(fn, 'ab') - f.write(line + '\n') - f.close() - - def emitStats(): - global xIndex - - if not emit_files: - return - - stats = ps.stats() - writeFile(os.path.join(out_dir, 'alloc_bytes_all.txt'), '%d %d' % (xIndex, stats['alloc_bytes'])) - writeFile(os.path.join(out_dir, 'waste_bytes_all.txt'), '%d %d' % (xIndex, stats['waste_bytes'])) - writeFile(os.path.join(out_dir, 'free_bytes_all.txt'), '%d %d' % (xIndex, stats['free_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_hwm_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_use_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_use_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_waste_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_waste_bytes'])) - - for p in stats['byPool']: - writeFile(os.path.join(out_dir, 'alloc_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['alloc'])) - writeFile(os.path.join(out_dir, 'waste_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['waste'])) - writeFile(os.path.join(out_dir, 'free_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['free'])) - writeFile(os.path.join(out_dir, 'ajs_use_count_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_use_count'])) - writeFile(os.path.join(out_dir, 'ajs_hwm_count_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_hwm_count'])) - writeFile(os.path.join(out_dir, 'ajs_min_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_min_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_max_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_max_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_hwm_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_use_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_use_bytes'])) - writeFile(os.path.join(out_dir, 'ajs_waste_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_waste_bytes'])) - xIndex += 1 - - def emitSnapshot(count): - if not emit_files: - return - - f = open(os.path.join(out_dir, 'state_%d.json' % count), 'wb') - f.write(json.dumps(ps.state, indent=4)) - f.close() - - stats = ps.stats() - for p in stats['byPool']: - logsize = math.log(p['size'], 2) - writeFile(os.path.join(out_dir, 'alloc_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['alloc'], p['size'])) - writeFile(os.path.join(out_dir, 'waste_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['waste'], p['size'])) - writeFile(os.path.join(out_dir, 'free_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['free'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_use_count_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_use_count'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_hwm_count_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_hwm_count'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_min_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_min_bytes'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_max_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_max_bytes'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_hwm_bytes'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_use_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_use_bytes'], p['size'])) - writeFile(os.path.join(out_dir, 'ajs_waste_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_waste_bytes'], p['size'])) - - sys.stdout.write('Simulating...') - sys.stdout.flush() - - success = False - - try: - count = 0 - for line in f_log: - count += 1 - if (count % 1000) == 0: - sys.stdout.write('.') - sys.stdout.flush() - emitSnapshot(count) - - emitStats() - - line = line.strip() - parts = line.split(' ') - - # A ptr/NULL/FAIL size - # F ptr/NULL size - # R ptr/NULL oldsize ptr/NULL/FAIL newsize - - if len(parts) < 1: - pass # ignore - elif parts[0] == 'A': - if parts[1] == 'FAIL': - pass - elif parts[1] == 'NULL': - ps.alloc(nullPtr) - else: - ptrmap[parts[1]] = ps.alloc(long(parts[2])) - elif parts[0] == 'F': - if parts[1] == 'NULL': - ps.free(nullPtr) - else: - ptr = ptrmap[parts[1]] - ps.free(ptr) - del ptrmap[parts[1]] - elif parts[0] == 'R': - # oldsize is not needed; don't use because e.g. ajduk - # log stats don't provide it - - if parts[1] == 'NULL': - oldptr = nullPtr - else: - oldptr = ptrmap[parts[1]] - - if parts[3] == 'FAIL': - pass - else: - newsize = long(parts[4]) - newptr = ps.realloc(oldptr, newsize) - if newptr == nullPtr and newsize > 0: - # Failed/freed, don't update pointers - pass - else: - if parts[1] != 'NULL' and ptrmap.has_key(parts[1]): - del ptrmap[parts[1]] - if parts[3] != 'NULL': - ptrmap[parts[3]] = newptr - else: - pass # ignore - - sys.stdout.write(' done\n') - sys.stdout.flush() - success = True - except AllocFailedException: - sys.stdout.write(' failed, out of memory\n') - sys.stdout.flush() - if throw_on_oom: - raise Exception('out of memory') - - emitSnapshot(count) - emitStats() - - return success + # map native pointer to current simulator pointer + ptrmap = {} + + def writeFile(fn, line): + f = open(fn, 'ab') + f.write(line + '\n') + f.close() + + def emitStats(): + global xIndex + + if not emit_files: + return + + stats = ps.stats() + writeFile(os.path.join(out_dir, 'alloc_bytes_all.txt'), '%d %d' % (xIndex, stats['alloc_bytes'])) + writeFile(os.path.join(out_dir, 'waste_bytes_all.txt'), '%d %d' % (xIndex, stats['waste_bytes'])) + writeFile(os.path.join(out_dir, 'free_bytes_all.txt'), '%d %d' % (xIndex, stats['free_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_hwm_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_use_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_use_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_waste_bytes_all.txt'), '%d %d' % (xIndex, stats['ajs_waste_bytes'])) + + for p in stats['byPool']: + writeFile(os.path.join(out_dir, 'alloc_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['alloc'])) + writeFile(os.path.join(out_dir, 'waste_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['waste'])) + writeFile(os.path.join(out_dir, 'free_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['free'])) + writeFile(os.path.join(out_dir, 'ajs_use_count_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_use_count'])) + writeFile(os.path.join(out_dir, 'ajs_hwm_count_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_hwm_count'])) + writeFile(os.path.join(out_dir, 'ajs_min_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_min_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_max_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_max_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_hwm_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_use_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_use_bytes'])) + writeFile(os.path.join(out_dir, 'ajs_waste_bytes_%d.txt' % p['size']), '%d %d' % (xIndex, p['ajs_waste_bytes'])) + xIndex += 1 + + def emitSnapshot(count): + if not emit_files: + return + + f = open(os.path.join(out_dir, 'state_%d.json' % count), 'wb') + f.write(json.dumps(ps.state, indent=4)) + f.close() + + stats = ps.stats() + for p in stats['byPool']: + logsize = math.log(p['size'], 2) + writeFile(os.path.join(out_dir, 'alloc_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['alloc'], p['size'])) + writeFile(os.path.join(out_dir, 'waste_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['waste'], p['size'])) + writeFile(os.path.join(out_dir, 'free_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['free'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_use_count_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_use_count'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_hwm_count_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_hwm_count'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_min_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_min_bytes'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_max_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_max_bytes'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_hwm_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_hwm_bytes'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_use_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_use_bytes'], p['size'])) + writeFile(os.path.join(out_dir, 'ajs_waste_bytes_bypool_%d.txt' % count), '%f %d # size=%d' % (logsize, p['ajs_waste_bytes'], p['size'])) + + sys.stdout.write('Simulating...') + sys.stdout.flush() + + success = False + + try: + count = 0 + for line in f_log: + count += 1 + if (count % 1000) == 0: + sys.stdout.write('.') + sys.stdout.flush() + emitSnapshot(count) + + emitStats() + + line = line.strip() + parts = line.split(' ') + + # A ptr/NULL/FAIL size + # F ptr/NULL size + # R ptr/NULL oldsize ptr/NULL/FAIL newsize + + if len(parts) < 1: + pass # ignore + elif parts[0] == 'A': + if parts[1] == 'FAIL': + pass + elif parts[1] == 'NULL': + ps.alloc(nullPtr) + else: + ptrmap[parts[1]] = ps.alloc(long(parts[2])) + elif parts[0] == 'F': + if parts[1] == 'NULL': + ps.free(nullPtr) + else: + ptr = ptrmap[parts[1]] + ps.free(ptr) + del ptrmap[parts[1]] + elif parts[0] == 'R': + # oldsize is not needed; don't use because e.g. ajduk + # log stats don't provide it + + if parts[1] == 'NULL': + oldptr = nullPtr + else: + oldptr = ptrmap[parts[1]] + + if parts[3] == 'FAIL': + pass + else: + newsize = long(parts[4]) + newptr = ps.realloc(oldptr, newsize) + if newptr == nullPtr and newsize > 0: + # Failed/freed, don't update pointers + pass + else: + if parts[1] != 'NULL' and ptrmap.has_key(parts[1]): + del ptrmap[parts[1]] + if parts[3] != 'NULL': + ptrmap[parts[3]] = newptr + else: + pass # ignore + + sys.stdout.write(' done\n') + sys.stdout.flush() + success = True + except AllocFailedException: + sys.stdout.write(' failed, out of memory\n') + sys.stdout.flush() + if throw_on_oom: + raise Exception('out of memory') + + emitSnapshot(count) + emitStats() + + return success #--------------------------------------------------------------------------- # @@ -458,77 +458,77 @@ def processAllocLog(ps, f_log, out_dir, throw_on_oom=True, emit_files=True): # def gnuplotGraphs(ps, out_dir): - def plot(files, out_fn): - f = open('/tmp/gnuplot-commands', 'wb') - f.write('set terminal dumb\n') - for idx, fn in enumerate(files): - full_fn = os.path.join(out_dir, fn) - cmd = 'plot' - if idx > 0: - cmd = 'replot' - f.write('%s "%s" with lines\n' % (cmd, full_fn)) - #f.write('%s "%s" with boxes\n' % (cmd, full_fn)) - f.write('set terminal pngcairo size 1024,768\n') - f.write('set output "%s"\n' % os.path.join(out_dir, out_fn)) - f.write('replot\n') - f.close() - - os.system('gnuplot /dev/null 2>/dev/null') - - plot([ 'alloc_bytes_all.txt', - 'waste_bytes_all.txt', - 'free_bytes_all.txt' ], 'alloc_waste_free_all.png') - plot([ 'alloc_bytes_all.txt', - 'waste_bytes_all.txt', - 'free_bytes_all.txt', - 'ajs_hwm_bytes_all.txt', - 'ajs_use_bytes_all.txt', - 'ajs_waste_bytes_all.txt' ], 'alloc_waste_free_withajs_all.png') - plot([ 'alloc_bytes_all.txt', - 'waste_bytes_all.txt' ], 'alloc_waste_all.png') - plot([ 'alloc_bytes_all.txt', - 'waste_bytes_all.txt', - 'ajs_hwm_bytes_all.txt', - 'ajs_use_bytes_all.txt', - 'ajs_waste_bytes_all.txt' ], 'alloc_waste_withajs_all.png') - - for sz in ps.getSizes(): - plot([ 'alloc_bytes_%d.txt' % sz, - 'waste_bytes_%d.txt' % sz, - 'free_bytes_%d.txt' % sz ], 'alloc_waste_free_%d.png' % sz) - plot([ 'alloc_bytes_%d.txt' % sz, - 'waste_bytes_%d.txt' % sz, - 'free_bytes_%d.txt' % sz, - 'ajs_hwm_bytes_%d.txt' % sz, - 'ajs_use_bytes_%d.txt' % sz, - 'ajs_waste_bytes_%d.txt' % sz ], 'alloc_waste_free_withajs_%d.png' % sz) - plot([ 'alloc_bytes_%d.txt' % sz, - 'waste_bytes_%d.txt' % sz ], 'alloc_waste_%d.png' % sz) - plot([ 'alloc_bytes_%d.txt' % sz, - 'waste_bytes_%d.txt' % sz, - 'ajs_hwm_bytes_%d.txt' % sz, - 'ajs_use_bytes_%d.txt' % sz, - 'ajs_waste_bytes_%d.txt' % sz ], 'alloc_waste_withajs_%d.png' % sz) - - # plots containing all pool sizes in a timeline - for name in [ 'alloc', 'waste' ]: - files = [] - for sz in ps.getSizes(): - files.append('%s_bytes_%d.txt' % (name, sz)) - plot(files, '%s_bytes_allpools.png' % name) - - # autoplot for all data files - for fn in os.listdir(out_dir): - fn_txt = os.path.join(out_dir, fn) - if not fn_txt.endswith('.txt'): - continue - fn_png = os.path.splitext(fn_txt)[0] + '.png' - if os.path.exists(fn_png): - continue - - plot([ fn ], fn_png) - - # XXX: plots for snapshots + def plot(files, out_fn): + f = open('/tmp/gnuplot-commands', 'wb') + f.write('set terminal dumb\n') + for idx, fn in enumerate(files): + full_fn = os.path.join(out_dir, fn) + cmd = 'plot' + if idx > 0: + cmd = 'replot' + f.write('%s "%s" with lines\n' % (cmd, full_fn)) + #f.write('%s "%s" with boxes\n' % (cmd, full_fn)) + f.write('set terminal pngcairo size 1024,768\n') + f.write('set output "%s"\n' % os.path.join(out_dir, out_fn)) + f.write('replot\n') + f.close() + + os.system('gnuplot /dev/null 2>/dev/null') + + plot([ 'alloc_bytes_all.txt', + 'waste_bytes_all.txt', + 'free_bytes_all.txt' ], 'alloc_waste_free_all.png') + plot([ 'alloc_bytes_all.txt', + 'waste_bytes_all.txt', + 'free_bytes_all.txt', + 'ajs_hwm_bytes_all.txt', + 'ajs_use_bytes_all.txt', + 'ajs_waste_bytes_all.txt' ], 'alloc_waste_free_withajs_all.png') + plot([ 'alloc_bytes_all.txt', + 'waste_bytes_all.txt' ], 'alloc_waste_all.png') + plot([ 'alloc_bytes_all.txt', + 'waste_bytes_all.txt', + 'ajs_hwm_bytes_all.txt', + 'ajs_use_bytes_all.txt', + 'ajs_waste_bytes_all.txt' ], 'alloc_waste_withajs_all.png') + + for sz in ps.getSizes(): + plot([ 'alloc_bytes_%d.txt' % sz, + 'waste_bytes_%d.txt' % sz, + 'free_bytes_%d.txt' % sz ], 'alloc_waste_free_%d.png' % sz) + plot([ 'alloc_bytes_%d.txt' % sz, + 'waste_bytes_%d.txt' % sz, + 'free_bytes_%d.txt' % sz, + 'ajs_hwm_bytes_%d.txt' % sz, + 'ajs_use_bytes_%d.txt' % sz, + 'ajs_waste_bytes_%d.txt' % sz ], 'alloc_waste_free_withajs_%d.png' % sz) + plot([ 'alloc_bytes_%d.txt' % sz, + 'waste_bytes_%d.txt' % sz ], 'alloc_waste_%d.png' % sz) + plot([ 'alloc_bytes_%d.txt' % sz, + 'waste_bytes_%d.txt' % sz, + 'ajs_hwm_bytes_%d.txt' % sz, + 'ajs_use_bytes_%d.txt' % sz, + 'ajs_waste_bytes_%d.txt' % sz ], 'alloc_waste_withajs_%d.png' % sz) + + # plots containing all pool sizes in a timeline + for name in [ 'alloc', 'waste' ]: + files = [] + for sz in ps.getSizes(): + files.append('%s_bytes_%d.txt' % (name, sz)) + plot(files, '%s_bytes_allpools.png' % name) + + # autoplot for all data files + for fn in os.listdir(out_dir): + fn_txt = os.path.join(out_dir, fn) + if not fn_txt.endswith('.txt'): + continue + fn_png = os.path.splitext(fn_txt)[0] + '.png' + if os.path.exists(fn_png): + continue + + plot([ fn ], fn_png) + + # XXX: plots for snapshots #--------------------------------------------------------------------------- # @@ -537,190 +537,190 @@ def gnuplotGraphs(ps, out_dir): # Summary a pool config into a one-line string. def configOneLiner(cfg): - total_bytes = 0 - res = '' - for i in xrange(len(cfg['pools'])): - p1 = cfg['pools'][i] - total_bytes += p1['size'] * p1['count'] - res += ' %r=%r' % (p1['size'], p1['count']) + total_bytes = 0 + res = '' + for i in xrange(len(cfg['pools'])): + p1 = cfg['pools'][i] + total_bytes += p1['size'] * p1['count'] + res += ' %r=%r' % (p1['size'], p1['count']) - res = ('total %d:' % total_bytes) + res - return res + res = ('total %d:' % total_bytes) + res + return res # Convert a pool config into an ajs_heap.c AJS_HeapConfig initializer. def configToAjsHeader(cfg): - ind = ' ' - cfgName = 'heapConfig' - - res = [] - res.append('/* optimized using pool_simulator.py */') - res.append('static const AJS_HeapConfig %s[] = {' % cfgName) - res.append('%s/* %d bytes total */' % (ind, cfg['total_bytes'])) - for i in xrange(len(cfg['pools'])): - p = cfg['pools'][i] - if p['count'] == 0: - continue - borrow = '0' - if p.get('borrow', False): - borrow = 'AJS_POOL_BORROW' - comma = ',' # could remove, need to know which line is last (zero counts affect it) - res.append('%s{ %-7d, %-5d, %-16s, %d }%s /* %7d bytes */' % \ - (ind, p['size'], p['count'], borrow, p.get('heap_index', 0), comma, - p['size'] * p['count'])) - res.append('};') - return '\n'.join(res) + '\n' + ind = ' ' + cfgName = 'heapConfig' + + res = [] + res.append('/* optimized using pool_simulator.py */') + res.append('static const AJS_HeapConfig %s[] = {' % cfgName) + res.append('%s/* %d bytes total */' % (ind, cfg['total_bytes'])) + for i in xrange(len(cfg['pools'])): + p = cfg['pools'][i] + if p['count'] == 0: + continue + borrow = '0' + if p.get('borrow', False): + borrow = 'AJS_POOL_BORROW' + comma = ',' # could remove, need to know which line is last (zero counts affect it) + res.append('%s{ %-7d, %-5d, %-16s, %d }%s /* %7d bytes */' % \ + (ind, p['size'], p['count'], borrow, p.get('heap_index', 0), comma, + p['size'] * p['count'])) + res.append('};') + return '\n'.join(res) + '\n' # Recompute 'total_bytes' of the pool (useful after modifications). def recomputePoolTotal(cfg): - total_bytes = 0 - for i in xrange(len(cfg['pools'])): - p1 = cfg['pools'][i] - total_bytes += p1['size'] * p1['count'] - cfg['total_bytes'] = total_bytes - return cfg # in-place + total_bytes = 0 + for i in xrange(len(cfg['pools'])): + p1 = cfg['pools'][i] + total_bytes += p1['size'] * p1['count'] + cfg['total_bytes'] = total_bytes + return cfg # in-place # Create a new pool config with pool counts added together. def addPoolCounts(cfg1, cfg2): - pools = [] - cfg = { 'pools': pools } - - if len(cfg1['pools']) != len(cfg2['pools']): - raise Exception('incompatible pool configs') - for i in xrange(len(cfg1['pools'])): - p1 = cfg1['pools'][i] - p2 = cfg2['pools'][i] - if p1['size'] != p2['size']: - raise Exception('incompatible pool configs') - p3 = clonePoolCleaned(p1) - p3['count'] = p1['count'] + p2['count'] - pools.append(p3) - recomputePoolTotal(cfg) - return cfg + pools = [] + cfg = { 'pools': pools } + + if len(cfg1['pools']) != len(cfg2['pools']): + raise Exception('incompatible pool configs') + for i in xrange(len(cfg1['pools'])): + p1 = cfg1['pools'][i] + p2 = cfg2['pools'][i] + if p1['size'] != p2['size']: + raise Exception('incompatible pool configs') + p3 = clonePoolCleaned(p1) + p3['count'] = p1['count'] + p2['count'] + pools.append(p3) + recomputePoolTotal(cfg) + return cfg # Create a new pool config with pool counts subtracts (result = cfg1 - cfg2). def subtractPoolCounts(cfg1, cfg2): - pools = [] - cfg = { 'pools': pools } - - if len(cfg1['pools']) != len(cfg2['pools']): - raise Exception('incompatible pool configs') - for i in xrange(len(cfg1['pools'])): - p1 = cfg1['pools'][i] - p2 = cfg2['pools'][i] - if p1['size'] != p2['size']: - raise Exception('incompatible pool configs') - p3 = clonePoolCleaned(p1) - p3['count'] = p1['count'] - p2['count'] - if p3['count'] < 0: - print 'Warning: pool count went negative, replace with zero' - p3['count'] = 0 - #raise Exception('pool count went negative') - pools.append(p3) - recomputePoolTotal(cfg) - return cfg + pools = [] + cfg = { 'pools': pools } + + if len(cfg1['pools']) != len(cfg2['pools']): + raise Exception('incompatible pool configs') + for i in xrange(len(cfg1['pools'])): + p1 = cfg1['pools'][i] + p2 = cfg2['pools'][i] + if p1['size'] != p2['size']: + raise Exception('incompatible pool configs') + p3 = clonePoolCleaned(p1) + p3['count'] = p1['count'] - p2['count'] + if p3['count'] < 0: + print 'Warning: pool count went negative, replace with zero' + p3['count'] = 0 + #raise Exception('pool count went negative') + pools.append(p3) + recomputePoolTotal(cfg) + return cfg # Create a new pool config with pool count being the maximum of all input # configs (for each pool size). def maxPoolCounts(cfglist): - cfg1 = json.loads(json.dumps(cfglist[0])) # start from clone of first config - - for cfg2 in cfglist: - if len(cfg1['pools']) != len(cfg2['pools']): - raise Exception('incompatible pool configs') - for i in xrange(len(cfg1['pools'])): - p1 = cfg1['pools'][i] - p2 = cfg2['pools'][i] - if p1['size'] != p2['size']: - raise Exception('incompatible pool configs') - p1['count'] = max(p1['count'], p2['count']) - recomputePoolTotal(cfg1) - return cfg1 + cfg1 = json.loads(json.dumps(cfglist[0])) # start from clone of first config + + for cfg2 in cfglist: + if len(cfg1['pools']) != len(cfg2['pools']): + raise Exception('incompatible pool configs') + for i in xrange(len(cfg1['pools'])): + p1 = cfg1['pools'][i] + p2 = cfg2['pools'][i] + if p1['size'] != p2['size']: + raise Exception('incompatible pool configs') + p1['count'] = max(p1['count'], p2['count']) + recomputePoolTotal(cfg1) + return cfg1 # Scale pool counts with a factor, leaving pool counts fractional. def scalePoolCountsFractional(cfg1, factor): - pools = [] - cfg = { 'pools': pools } + pools = [] + cfg = { 'pools': pools } - for i in xrange(len(cfg1['pools'])): - p1 = cfg1['pools'][i] - p2 = clonePoolCleaned(p1) - p2['count'] = factor * p1['count'] # fractional - pools.append(p2) - recomputePoolTotal(cfg) - return cfg + for i in xrange(len(cfg1['pools'])): + p1 = cfg1['pools'][i] + p2 = clonePoolCleaned(p1) + p2['count'] = factor * p1['count'] # fractional + pools.append(p2) + recomputePoolTotal(cfg) + return cfg # Round pool counts to integer values with a configurable threshold. def roundPoolCounts(cfg1, threshold): - pools = [] - cfg = { 'pools': pools } - - for i in xrange(len(cfg1['pools'])): - p1 = cfg1['pools'][i] - count = math.floor(p1['count']) - if p1['count'] - count > threshold: - count += 1 - p2 = clonePoolCleaned(p1) - p2['count'] = int(count) - pools.append(p2) - recomputePoolTotal(cfg) - return cfg + pools = [] + cfg = { 'pools': pools } + + for i in xrange(len(cfg1['pools'])): + p1 = cfg1['pools'][i] + count = math.floor(p1['count']) + if p1['count'] - count > threshold: + count += 1 + p2 = clonePoolCleaned(p1) + p2['count'] = int(count) + pools.append(p2) + recomputePoolTotal(cfg) + return cfg def optimizePoolCountsForMemory(cfg_duktape, cfg_apps, target_memory): - print('Duktape baseline: %s' % configOneLiner(cfg_duktape)) - - # Subtract Duktape baseline from app memory usage - for i in xrange(len(cfg_apps)): - print('App with Duktape baseline: %s' % configOneLiner(cfg_apps[i])) - cfg = subtractPoolCounts(cfg_apps[i], cfg_duktape) - cfg_apps[i] = cfg - print('App minus Duktape baseline: %s' % configOneLiner(cfg)) - - # Normalize app memory usage - normalized_memory = 1024.0 * 1024.0 # number doesn't really matter, fractions used - for i in xrange(len(cfg_apps)): - cfg = cfg_apps[i] - factor = normalized_memory / cfg['total_bytes'] - cfg = scalePoolCountsFractional(cfg, factor) - cfg_apps[i] = cfg - print('Scaled app %d: %s' % (i, configOneLiner(cfg))) - - # Establish a representative profile over normalized application - # profiles (over Duktape baseline). - cfg_rep = maxPoolCounts(cfg_apps) - print('Representative: %s' % configOneLiner(cfg_rep)) - - # Scale (fractionally) to total bytes - factor = (target_memory - cfg_duktape['total_bytes']) / cfg_rep['total_bytes'] - cfg_res = scalePoolCountsFractional(cfg_rep, factor) - cfg_res = addPoolCounts(cfg_duktape, cfg_res) - print('Fractional result: %s' % configOneLiner(cfg_res)) - - # Round to integer pool counts with a sliding rounding - # threshold so that we meet target memory as closely - # as possible - round_threshold = 1.0 - round_step = 0.0001 - round_threshold += round_step - while True: - cfg_tmp = roundPoolCounts(cfg_res, round_threshold - round_step) - #print('rounding... %f -> %d total bytes' % (round_threshold, cfg_tmp['total_bytes'])) - if cfg_tmp['total_bytes'] > target_memory: - # previous value was good - break - - round_threshold -= round_step - if round_threshold < 0.0: - print('should not happen') - round_threshold = 0.0 - break - - print('Final round threshold: %f' % round_threshold) - cfg_final = roundPoolCounts(cfg_res, round_threshold) - - # XXX: negative pool counts - - print('Final pools: %s' % configOneLiner(cfg_final)) - return cfg_final + print('Duktape baseline: %s' % configOneLiner(cfg_duktape)) + + # Subtract Duktape baseline from app memory usage + for i in xrange(len(cfg_apps)): + print('App with Duktape baseline: %s' % configOneLiner(cfg_apps[i])) + cfg = subtractPoolCounts(cfg_apps[i], cfg_duktape) + cfg_apps[i] = cfg + print('App minus Duktape baseline: %s' % configOneLiner(cfg)) + + # Normalize app memory usage + normalized_memory = 1024.0 * 1024.0 # number doesn't really matter, fractions used + for i in xrange(len(cfg_apps)): + cfg = cfg_apps[i] + factor = normalized_memory / cfg['total_bytes'] + cfg = scalePoolCountsFractional(cfg, factor) + cfg_apps[i] = cfg + print('Scaled app %d: %s' % (i, configOneLiner(cfg))) + + # Establish a representative profile over normalized application + # profiles (over Duktape baseline). + cfg_rep = maxPoolCounts(cfg_apps) + print('Representative: %s' % configOneLiner(cfg_rep)) + + # Scale (fractionally) to total bytes + factor = (target_memory - cfg_duktape['total_bytes']) / cfg_rep['total_bytes'] + cfg_res = scalePoolCountsFractional(cfg_rep, factor) + cfg_res = addPoolCounts(cfg_duktape, cfg_res) + print('Fractional result: %s' % configOneLiner(cfg_res)) + + # Round to integer pool counts with a sliding rounding + # threshold so that we meet target memory as closely + # as possible + round_threshold = 1.0 + round_step = 0.0001 + round_threshold += round_step + while True: + cfg_tmp = roundPoolCounts(cfg_res, round_threshold - round_step) + #print('rounding... %f -> %d total bytes' % (round_threshold, cfg_tmp['total_bytes'])) + if cfg_tmp['total_bytes'] > target_memory: + # previous value was good + break + + round_threshold -= round_step + if round_threshold < 0.0: + print('should not happen') + round_threshold = 0.0 + break + + print('Final round threshold: %f' % round_threshold) + cfg_final = roundPoolCounts(cfg_res, round_threshold) + + # XXX: negative pool counts + + print('Final pools: %s' % configOneLiner(cfg_final)) + return cfg_final #--------------------------------------------------------------------------- # @@ -729,26 +729,26 @@ def optimizePoolCountsForMemory(cfg_duktape, cfg_apps, target_memory): # Simulate an allocation log and write out a lot of statistics and graphs. def cmd_simulate(opts, args): - dprint('Init pool simulator') - ps = PoolSimulator(readJson(opts.pool_config), borrow=True, extend=False) + dprint('Init pool simulator') + ps = PoolSimulator(readJson(opts.pool_config), borrow=True, extend=False) - dprint('Process allocation log') - f = open(opts.alloc_log) - processAllocLog(ps, f, opts.out_dir) - f.close() + dprint('Process allocation log') + f = open(opts.alloc_log) + processAllocLog(ps, f, opts.out_dir) + f.close() - dprint('Write tight pool config based on hwm') - cfg = ps.getTightHwmConfig() - f = open(os.path.join(opts.out_dir, 'config_tight.json'), 'wb') - f.write(json.dumps(cfg, indent=4)) - f.close() + dprint('Write tight pool config based on hwm') + cfg = ps.getTightHwmConfig() + f = open(os.path.join(opts.out_dir, 'config_tight.json'), 'wb') + f.write(json.dumps(cfg, indent=4)) + f.close() - dprint('Plot graphs (gnuplot)') - gnuplotGraphs(ps, opts.out_dir) + dprint('Plot graphs (gnuplot)') + gnuplotGraphs(ps, opts.out_dir) - dprint('Finished, output is in: ' + str(opts.out_dir)) + dprint('Finished, output is in: ' + str(opts.out_dir)) - #print(json.dumps(ps.state)) + #print(json.dumps(ps.state)) # Simulate an allocation log and optimize pool counts to tight values. # @@ -761,116 +761,116 @@ def cmd_simulate(opts, args): # results in pool counts which should be close to minimum values when # borrowing behavior is taken into account. def cmd_tight_counts(opts, args, borrow_optimize): - # Get hwm profile with "autoextend", i.e. no borrowing + # Get hwm profile with "autoextend", i.e. no borrowing - print('Get hwm pool count profile with autoextend enabled (= no borrowing)') - ps = PoolSimulator(readJson(opts.pool_config), borrow=False, extend=True) - f = open(opts.alloc_log) - processAllocLog(ps, f, opts.out_dir, throw_on_oom=True, emit_files=False) - f.close() + print('Get hwm pool count profile with autoextend enabled (= no borrowing)') + ps = PoolSimulator(readJson(opts.pool_config), borrow=False, extend=True) + f = open(opts.alloc_log) + processAllocLog(ps, f, opts.out_dir, throw_on_oom=True, emit_files=False) + f.close() - cfg = ps.getTightHwmConfig() - print('Tight config based on hwm, no borrowing: %s' % configOneLiner(cfg)) - f = open(os.path.join(opts.out_dir, 'config_tight.json'), 'wb') - f.write(json.dumps(cfg, indent=4)) - f.close() + cfg = ps.getTightHwmConfig() + print('Tight config based on hwm, no borrowing: %s' % configOneLiner(cfg)) + f = open(os.path.join(opts.out_dir, 'config_tight.json'), 'wb') + f.write(json.dumps(cfg, indent=4)) + f.close() - if not borrow_optimize: - return cfg + if not borrow_optimize: + return cfg - # Optimize pool counts taking borrowing into account. Not very - # optimal but step resizing ensures there shouldn't be pathological - # cases (which might happen if step was -1). + # Optimize pool counts taking borrowing into account. Not very + # optimal but step resizing ensures there shouldn't be pathological + # cases (which might happen if step was -1). - print('Optimizing pool counts taking borrowing into account (takes a while)...') + print('Optimizing pool counts taking borrowing into account (takes a while)...') - for i in xrange(len(cfg['pools']) - 1, -1, -1): - p = cfg['pools'][i] + for i in xrange(len(cfg['pools']) - 1, -1, -1): + p = cfg['pools'][i] - step = 1 - while step < p['count']: - step *= 2 - highest_fail = -1 + step = 1 + while step < p['count']: + step *= 2 + highest_fail = -1 - while p['count'] > 0 and step > 0: - prev_count = p['count'] - p['count'] -= step - print('Reduce count for pool size %d bytes from %r to %r and resimulate' % (p['size'], prev_count, p['count'])) + while p['count'] > 0 and step > 0: + prev_count = p['count'] + p['count'] -= step + print('Reduce count for pool size %d bytes from %r to %r and resimulate' % (p['size'], prev_count, p['count'])) - # XXX: emits unused snapshots, optimize + # XXX: emits unused snapshots, optimize - if p['count'] <= highest_fail: - # we know this will fail - success = False - else: - ps = PoolSimulator(cfg, borrow=True, extend=False) - f = open(opts.alloc_log) - success = processAllocLog(ps, f, opts.out_dir, throw_on_oom=False, emit_files=False) - f.close() + if p['count'] <= highest_fail: + # we know this will fail + success = False + else: + ps = PoolSimulator(cfg, borrow=True, extend=False) + f = open(opts.alloc_log) + success = processAllocLog(ps, f, opts.out_dir, throw_on_oom=False, emit_files=False) + f.close() - if not success: - highest_fail = max(highest_fail, p['count']) - p['count'] = prev_count - step /= 2 + if not success: + highest_fail = max(highest_fail, p['count']) + p['count'] = prev_count + step /= 2 - print('Pool config after size %d: %s' % (p['size'], configOneLiner(cfg))) + print('Pool config after size %d: %s' % (p['size'], configOneLiner(cfg))) - print('Tight config based on hwm and optimizing borrowing: %s' % configOneLiner(cfg)) - return cfg + print('Tight config based on hwm and optimizing borrowing: %s' % configOneLiner(cfg)) + return cfg # Main entry point. def main(): - parser = optparse.OptionParser() - parser.add_option('--out-dir', dest='out_dir') - parser.add_option('--pool-config', dest='pool_config') - parser.add_option('--alloc-log', dest='alloc_log') - parser.add_option('--out-pool-config', dest='out_pool_config') - parser.add_option('--out-ajsheap-config', dest='out_ajsheap_config', default=None) - (opts, args) = parser.parse_args() - - if not os.path.isdir(opts.out_dir): - raise Exception('--out-dir argument is not a directory') - if len(os.listdir(opts.out_dir)) > 0: - raise Exception('--out-dir argument is not empty') - - def writeOutputs(cfg): - writeJson(opts.out_pool_config, cfg) - if opts.out_ajsheap_config is not None: - writeFile(opts.out_ajsheap_config, configToAjsHeader(cfg)) - - cmd = args[0] - if cmd == 'simulate': - cmd_simulate(opts, args) - elif cmd == 'tight_counts_noborrow': - cfg = cmd_tight_counts(opts, args, False) - writeOutputs(cfg) - elif cmd == 'tight_counts_borrow': - cfg = cmd_tight_counts(opts, args, True) - writeOutputs(cfg) - elif cmd == 'subtract_pool_counts': - # XXX: unused - cfg1 = readJson(args[1]) - cfg2 = readJson(args[2]) - cfg3 = subtractPoolCounts(cfg1, cfg2) - writeOutputs(cfg3) - elif cmd == 'max_pool_counts': - # XXX: unused - # Not very useful without normalization. - cfg = maxPoolCounts(args[1:]) - writeOutputs(cfg) - elif cmd == 'pool_counts_for_memory': - target_memory = long(args[1]) - cfg_duktape = readJson(args[2]) - print('Duktape baseline: %d bytes' % cfg_duktape['total_bytes']) - cfg_apps = [] - for arg in args[3:]: - cfg = readJson(arg) - cfg_apps.append(cfg) - print('Application: %d bytes' % cfg['total_bytes']) - cfg = optimizePoolCountsForMemory(cfg_duktape, cfg_apps, target_memory) - writeOutputs(cfg) - else: - raise Exception('invalid command ' + str(cmd)) + parser = optparse.OptionParser() + parser.add_option('--out-dir', dest='out_dir') + parser.add_option('--pool-config', dest='pool_config') + parser.add_option('--alloc-log', dest='alloc_log') + parser.add_option('--out-pool-config', dest='out_pool_config') + parser.add_option('--out-ajsheap-config', dest='out_ajsheap_config', default=None) + (opts, args) = parser.parse_args() + + if not os.path.isdir(opts.out_dir): + raise Exception('--out-dir argument is not a directory') + if len(os.listdir(opts.out_dir)) > 0: + raise Exception('--out-dir argument is not empty') + + def writeOutputs(cfg): + writeJson(opts.out_pool_config, cfg) + if opts.out_ajsheap_config is not None: + writeFile(opts.out_ajsheap_config, configToAjsHeader(cfg)) + + cmd = args[0] + if cmd == 'simulate': + cmd_simulate(opts, args) + elif cmd == 'tight_counts_noborrow': + cfg = cmd_tight_counts(opts, args, False) + writeOutputs(cfg) + elif cmd == 'tight_counts_borrow': + cfg = cmd_tight_counts(opts, args, True) + writeOutputs(cfg) + elif cmd == 'subtract_pool_counts': + # XXX: unused + cfg1 = readJson(args[1]) + cfg2 = readJson(args[2]) + cfg3 = subtractPoolCounts(cfg1, cfg2) + writeOutputs(cfg3) + elif cmd == 'max_pool_counts': + # XXX: unused + # Not very useful without normalization. + cfg = maxPoolCounts(args[1:]) + writeOutputs(cfg) + elif cmd == 'pool_counts_for_memory': + target_memory = long(args[1]) + cfg_duktape = readJson(args[2]) + print('Duktape baseline: %d bytes' % cfg_duktape['total_bytes']) + cfg_apps = [] + for arg in args[3:]: + cfg = readJson(arg) + cfg_apps.append(cfg) + print('Application: %d bytes' % cfg['total_bytes']) + cfg = optimizePoolCountsForMemory(cfg_duktape, cfg_apps, target_memory) + writeOutputs(cfg) + else: + raise Exception('invalid command ' + str(cmd)) if __name__ == '__main__': - main() + main() diff --git a/misc/bin2img.py b/misc/bin2img.py index 7c5cdf9b..55dbe7c6 100644 --- a/misc/bin2img.py +++ b/misc/bin2img.py @@ -2,51 +2,50 @@ import sys from PIL import Image def main(): - f = open(sys.argv[1], 'rb') - data = f.read() - f.close() - - use_bits = True - - BYTESPERLINE = 128 - BITSPERLINE = BYTESPERLINE * 8 - - if use_bits: - width = BITSPERLINE - height = (len(data) * 8 + BITSPERLINE - 1) / BITSPERLINE - else: - width = BYTESPERLINE - height = (len(data) + BYTESPERLINE - 1) / BYTESPERLINE - - img = Image.new('RGBA', (width, height)) - for y in xrange(height): - if use_bits: - for x in xrange(width): - idx = y * BYTESPERLINE + (x / 8) - bitidx = x % 8 # 0 = topmost - - if idx >= len(data): - img.putpixel((x,y), (255, 255, 255, 255)) - else: - v = ord(data[idx]) - v = (v >> (7 - bitidx)) & 0x01 - if v > 0: - v = 0 - else: - v = 255 - img.putpixel((x,y), (v, v, v, 255)) - else: - for x in xrange(width): - idx = y * BYTESPERLINE + x - - if idx >= len(data): - img.putpixel((x,y), (255, 255, 255, 255)) - else: - v = ord(data[idx]) - img.putpixel((x,y), (v, v, v, 255)) - - img.save(sys.argv[2]) + f = open(sys.argv[1], 'rb') + data = f.read() + f.close() + + use_bits = True + + BYTESPERLINE = 128 + BITSPERLINE = BYTESPERLINE * 8 + + if use_bits: + width = BITSPERLINE + height = (len(data) * 8 + BITSPERLINE - 1) / BITSPERLINE + else: + width = BYTESPERLINE + height = (len(data) + BYTESPERLINE - 1) / BYTESPERLINE + + img = Image.new('RGBA', (width, height)) + for y in xrange(height): + if use_bits: + for x in xrange(width): + idx = y * BYTESPERLINE + (x / 8) + bitidx = x % 8 # 0 = topmost + + if idx >= len(data): + img.putpixel((x,y), (255, 255, 255, 255)) + else: + v = ord(data[idx]) + v = (v >> (7 - bitidx)) & 0x01 + if v > 0: + v = 0 + else: + v = 255 + img.putpixel((x,y), (v, v, v, 255)) + else: + for x in xrange(width): + idx = y * BYTESPERLINE + x + + if idx >= len(data): + img.putpixel((x,y), (255, 255, 255, 255)) + else: + v = ord(data[idx]) + img.putpixel((x,y), (v, v, v, 255)) + + img.save(sys.argv[2]) if __name__ == '__main__': - main() - + main() diff --git a/misc/c_overflow_test.py b/misc/c_overflow_test.py index 196f3a50..999de034 100644 --- a/misc/c_overflow_test.py +++ b/misc/c_overflow_test.py @@ -4,23 +4,23 @@ import math limit = (1 << 32) - 1 for i in xrange(65536 + 10): - if i == 0: - continue + if i == 0: + continue - temp = float(1 << 32) / float(i) - approx1 = int(math.floor(temp) - 3) - approx2 = int(math.floor(temp + 3)) - for j in xrange(approx1, approx2 + 1): - if i*j >= (1 << 32): - exact = True - else: - exact = False + temp = float(1 << 32) / float(i) + approx1 = int(math.floor(temp) - 3) + approx2 = int(math.floor(temp + 3)) + for j in xrange(approx1, approx2 + 1): + if i*j >= (1 << 32): + exact = True + else: + exact = False - if i > limit / j: - check = True - else: - check = False + if i > limit / j: + check = True + else: + check = False - #print(i, j, exact, check) - if exact != check: - print('inexact', i, j) + #print(i, j, exact, check) + if exact != check: + print('inexact', i, j) diff --git a/misc/chaos.py b/misc/chaos.py index 82d5d6f0..76b3ca50 100644 --- a/misc/chaos.py +++ b/misc/chaos.py @@ -10,4 +10,3 @@ data = sys.stdin.read() data = data.strip() data = data.decode('hex') sys.stdout.write(data) - diff --git a/src/dukutil.py b/src/dukutil.py deleted file mode 100644 index ef85ff6a..00000000 --- a/src/dukutil.py +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/bin/env python2 -# -# Python utilities shared by the build scripts. -# - -import datetime -import json - -class BitEncoder: - "Bitstream encoder." - - _bits = None - - def __init__(self): - self._bits = [] - - def bits(self, x, nbits): - if (x >> nbits) != 0: - raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits)) - for i in xrange(nbits): - t = (x >> (nbits - i - 1)) & 0x01 - self._bits.append(t) - - def string(self, x): - nbits = len(x) * 8 - - for i in xrange(nbits): - byteidx = i / 8 - bitidx = i % 8 - if byteidx < 0 or byteidx >= len(x): - self._bits.append(0) - else: - t = (ord(x[byteidx]) >> (7 - bitidx)) & 0x01 - self._bits.append(t) - - def getNumBits(self): - "Get current number of encoded bits." - return len(self._bits) - - def getNumBytes(self): - "Get current number of encoded bytes, rounded up." - nbits = len(self._bits) - while (nbits % 8) != 0: - nbits += 1 - return nbits / 8 - - def getBytes(self): - "Get current bitstream as a byte sequence, padded with zero bits." - bytes = [] - - for i in xrange(self.getNumBytes()): - t = 0 - for j in xrange(8): - off = i*8 + j - if off >= len(self._bits): - t = (t << 1) - else: - t = (t << 1) + self._bits[off] - bytes.append(t) - - return bytes - - def getByteString(self): - "Get current bitstream as a string." - return ''.join([chr(i) for i in self.getBytes()]) - -class GenerateC: - "Helper for generating C source and header files." - - _data = None - wrap_col = 76 - - def __init__(self): - self._data = [] - - def emitRaw(self, text): - "Emit raw text (without automatic newline)." - self._data.append(text) - - def emitLine(self, text): - "Emit a raw line (with automatic newline)." - self._data.append(text + '\n') - - def emitHeader(self, autogen_by): - "Emit file header comments." - - # Note: a timestamp would be nice but it breaks incremental building - self.emitLine('/*') - self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by) - self.emitLine(' */') - self.emitLine('') - - def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True): - "Emit an array as a C array." - - # lenient input - if isinstance(data, unicode): - data = data.encode('utf-8') - if isinstance(data, str): - tmp = [] - for i in xrange(len(data)): - tmp.append(ord(data[i])) - data = tmp - - size_spec = '' - if size is not None: - size_spec = '%d' % size - visib_qual = '' - if visibility is not None: - visib_qual = visibility + ' ' - const_qual = '' - if const: - const_qual = 'const ' - self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec)) - - line = '' - for i in xrange(len(data)): - if intvalues: - suffix = '' - if data[i] < -32768 or data[i] > 32767: - suffix = 'L' - t = "%d%s," % (data[i], suffix) - else: - t = "(%s)'\\x%02x', " % (typename, data[i]) - if len(line) + len(t) >= self.wrap_col: - self.emitLine(line) - line = t - else: - line += t - if line != '': - self.emitLine(line) - self.emitLine('};') - - def emitDefine(self, name, value, comment=None): - "Emit a C define with an optional comment." - - # XXX: there is no escaping right now (for comment or value) - if comment is not None: - self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment)) - else: - self.emitLine('#define %-60s %s' % (name, value)) - - def getString(self): - "Get the entire file as a string." - return ''.join(self._data) - -def json_encode(x): - "JSON encode a value." - try: - return json.dumps(x) - except AttributeError: - pass - - # for older library versions - return json.write(x) - -def json_decode(x): - "JSON decode a value." - try: - return json.loads(x) - except AttributeError: - pass - - # for older library versions - return json.read(x) - -# Compute a byte hash identical to duk_util_hashbytes(). -DUK__MAGIC_M = 0x5bd1e995 -DUK__MAGIC_R = 24 -def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian): - h = (str_seed ^ nbytes) & 0xffffffff - - while nbytes >= 4: - # 4-byte fetch byte order: - # - native (endian dependent) if unaligned accesses allowed - # - little endian if unaligned accesses not allowed - - if big_endian: - k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \ - (ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24) - else: - k = ord(x[off]) + (ord(x[off + 1]) << 8) + \ - (ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24) - - k = (k * DUK__MAGIC_M) & 0xffffffff - k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff - k = (k * DUK__MAGIC_M) & 0xffffffff - h = (h * DUK__MAGIC_M) & 0xffffffff - h = (h ^ k) & 0xffffffff - - off += 4 - nbytes -= 4 - - if nbytes >= 3: - h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff - if nbytes >= 2: - h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff - if nbytes >= 1: - h = (h ^ ord(x[off])) & 0xffffffff - h = (h * DUK__MAGIC_M) & 0xffffffff - - h = (h ^ (h >> 13)) & 0xffffffff - h = (h * DUK__MAGIC_M) & 0xffffffff - h = (h ^ (h >> 15)) & 0xffffffff - - return h - -# Compute a string hash identical to duk_heap_hashstring() when dense -# hashing is enabled. -DUK__STRHASH_SHORTSTRING = 4096 -DUK__STRHASH_MEDIUMSTRING = 256 * 1024 -DUK__STRHASH_BLOCKSIZE = 256 -def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False): - str_seed = (hash_seed ^ len(x)) & 0xffffffff - - if len(x) <= DUK__STRHASH_SHORTSTRING: - res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian) - else: - if len(x) <= DUK__STRHASH_MEDIUMSTRING: - skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE - else: - skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE - - res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian) - off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256 - - while off < len(x): - left = len(x) - off - now = left - if now > DUK__STRHASH_BLOCKSIZE: - now = DUK__STRHASH_BLOCKSIZE - res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff - off += skip - - if strhash16: - res &= 0xffff - - return res - -# Compute a string hash identical to duk_heap_hashstring() when sparse -# hashing is enabled. -DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value -def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False): - res = (hash_seed ^ len(x)) & 0xffffffff - - step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1 - off = len(x) - while off >= step: - assert(off >= 1) - res = ((res * 33) + ord(x[off - 1])) & 0xffffffff - off -= step - - if strhash16: - res &= 0xffff - - return res - -# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length(). -def duk_unicode_unvalidated_utf8_length(x): - assert(isinstance(x, str)) - clen = 0 - for c in x: - t = ord(c) - if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted - clen += 1 - return clen diff --git a/src/extract_caseconv.py b/src/extract_caseconv.py deleted file mode 100644 index a5a67f17..00000000 --- a/src/extract_caseconv.py +++ /dev/null @@ -1,444 +0,0 @@ -#!/usr/bin/env python2 -# -# Extract rules for Unicode case conversion, specifically the behavior -# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The -# bitstream encoded rules are used for the slow path at run time, so -# compactness is favored over speed. -# -# There is no support for context or locale sensitive rules, as they -# are handled directly in C code before consulting tables generated -# here. Ecmascript requires case conversion both with and without -# locale/language specific rules (e.g. String.prototype.toLowerCase() -# and String.prototype.toLocaleLowerCase()), so they are best handled -# in C anyway. -# -# Case conversion rules for ASCII are also excluded as they are -# handled by C fast path. Rules for non-BMP characters (codepoints -# above U+FFFF) are omitted as they're not required for standard -# Ecmascript. -# - -import os, sys, math -import optparse -import dukutil - -class UnicodeData: - "Read UnicodeData.txt into an internal representation." - - def __init__(self, filename): - self.data = self.read_unicode_data(filename) - print 'read %d unicode data entries' % len(self.data) - - def read_unicode_data(self, filename): - res = [] - f = open(filename, 'rb') - for line in f: - if line.startswith('#'): - continue - line = line.strip() - if line == '': - continue - parts = line.split(';') - if len(parts) != 15: - raise Exception('invalid unicode data line') - res.append(parts) - f.close() - - # Sort based on Unicode codepoint - def mycmp(a,b): - return cmp(long(a[0], 16), long(b[0], 16)) - - res.sort(cmp=mycmp) - return res - -class SpecialCasing: - "Read SpecialCasing.txt into an internal representation." - - def __init__(self, filename): - self.data = self.read_special_casing_data(filename) - print 'read %d special casing entries' % len(self.data) - - def read_special_casing_data(self, filename): - res = [] - f = open(filename, 'rb') - for line in f: - try: - idx = line.index('#') - line = line[:idx] - except ValueError: - pass - line = line.strip() - if line == '': - continue - parts = line.split(';') - parts = [i.strip() for i in parts] - while len(parts) < 6: - parts.append('') - res.append(parts) - f.close() - return res - -def parse_unicode_sequence(x): - res = '' - for i in x.split(' '): - i = i.strip() - if i == '': - continue - res += unichr(long(i, 16)) - return res - -def get_base_conversion_maps(unicode_data): - "Create case conversion tables without handling special casing yet." - - uc = {} # codepoint (number) -> string - lc = {} - tc = {} # titlecase - - for x in unicode_data.data: - c1 = long(x[0], 16) - - # just 16-bit support needed - if c1 >= 0x10000: - continue - - if x[12] != '': - # field 12: simple uppercase mapping - c2 = parse_unicode_sequence(x[12]) - uc[c1] = c2 - tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary - if x[13] != '': - # field 13: simple lowercase mapping - c2 = parse_unicode_sequence(x[13]) - lc[c1] = c2 - if x[14] != '': - # field 14: simple titlecase mapping - c2 = parse_unicode_sequence(x[14]) - tc[c1] = c2 - - return uc, lc, tc - -def update_special_casings(uc, lc, tc, special_casing): - "Update case conversion tables with special case conversion rules." - - for x in special_casing.data: - c1 = long(x[0], 16) - - if x[4] != '': - # conditions - continue - - lower = parse_unicode_sequence(x[1]) - title = parse_unicode_sequence(x[2]) - upper = parse_unicode_sequence(x[3]) - - if len(lower) > 1: - lc[c1] = lower - if len(upper) > 1: - uc[c1] = upper - if len(title) > 1: - tc[c1] = title - - print 'special case: %d %d %d' % (len(lower), len(upper), len(title)) - -def remove_ascii_part(convmap): - "Remove ASCII case conversion parts (handled by C fast path)." - - for i in xrange(128): - if convmap.has_key(i): - del convmap[i] - -def scan_range_with_skip(convmap, start_idx, skip): - "Scan for a range of continuous case conversion with a certain 'skip'." - - conv_i = start_idx - if not convmap.has_key(conv_i): - return None, None, None - elif len(convmap[conv_i]) > 1: - return None, None, None - else: - conv_o = ord(convmap[conv_i]) - - start_i = conv_i - start_o = conv_o - - while True: - new_i = conv_i + skip - new_o = conv_o + skip - - if not convmap.has_key(new_i): - break - if len(convmap[new_i]) > 1: - break - if ord(convmap[new_i]) != new_o: - break - - conv_i = new_i - conv_o = new_o - - # [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char - count = (conv_i - start_i) / skip + 1 - if count <= 1: - return None, None, None - - # we have an acceptable range, remove them from the convmap here - for i in xrange(start_i, conv_i + skip, skip): - del convmap[i] - - return start_i, start_o, count - -def find_first_range_with_skip(convmap, skip): - "Find first range with a certain 'skip' value." - - for i in xrange(65536): - start_i, start_o, count = scan_range_with_skip(convmap, i, skip) - if start_i is None: - continue - return start_i, start_o, count - - return None, None, None - -def generate_tables(convmap): - "Generate bit-packed case conversion table for a given conversion map." - - # The bitstream encoding is based on manual inspection for whatever - # regularity the Unicode case conversion rules have. - # - # Start with a full description of case conversions which does not - # cover all codepoints; unmapped codepoints convert to themselves. - # Scan for range-to-range mappings with a range of skips starting from 1. - # Whenever a valid range is found, remove it from the map. Finally, - # output the remaining case conversions (1:1 and 1:n) on a per codepoint - # basis. - # - # This is very slow because we always scan from scratch, but its the - # most reliable and simple way to scan - - ranges = [] # range mappings (2 or more consecutive mappings with a certain skip) - singles = [] # 1:1 character mappings - complex = [] # 1:n character mappings - - # Ranges with skips - - for skip in xrange(1,6+1): # skips 1...6 are useful - while True: - start_i, start_o, count = find_first_range_with_skip(convmap, skip) - if start_i is None: - break - print 'skip %d: %d %d %d' % (skip, start_i, start_o, count) - ranges.append([start_i, start_o, count, skip]) - - # 1:1 conversions - - k = convmap.keys() - k.sort() - for i in k: - if len(convmap[i]) > 1: - continue - singles.append([i, ord(convmap[i])]) # codepoint, codepoint - del convmap[i] - - # There are many mappings to 2-char sequences with latter char being U+0399. - # These could be handled as a special case, but we don't do that right now. - # - # [8064L, u'\u1f08\u0399'] - # [8065L, u'\u1f09\u0399'] - # [8066L, u'\u1f0a\u0399'] - # [8067L, u'\u1f0b\u0399'] - # [8068L, u'\u1f0c\u0399'] - # [8069L, u'\u1f0d\u0399'] - # [8070L, u'\u1f0e\u0399'] - # [8071L, u'\u1f0f\u0399'] - # ... - # - # tmp = {} - # k = convmap.keys() - # k.sort() - # for i in k: - # if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399': - # tmp[i] = convmap[i][0] - # del convmap[i] - # print repr(tmp) - # - # skip = 1 - # while True: - # start_i, start_o, count = find_first_range_with_skip(tmp, skip) - # if start_i is None: - # break - # print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count) - # print len(tmp.keys()) - # print repr(tmp) - # XXX: need to put 12 remaining mappings back to convmap... - - # 1:n conversions - - k = convmap.keys() - k.sort() - for i in k: - complex.append([i, convmap[i]]) # codepoint, string - del convmap[i] - - for t in singles: - print repr(t) - - for t in complex: - print repr(t) - - print 'range mappings: %d' % len(ranges) - print 'single character mappings: %d' % len(singles) - print 'complex mappings (1:n): %d' % len(complex) - print 'remaining (should be zero): %d' % len(convmap.keys()) - - # XXX: opportunities for diff encoding skip=3 ranges? - prev = None - for t in ranges: - # range: [start_i, start_o, count, skip] - if t[3] != 3: - continue - if prev is not None: - print '%d %d' % (t[0] - prev[0], t[1] - prev[1]) - else: - print 'start: %d %d' % (t[0], t[1]) - prev = t - - # bit packed encoding - - be = dukutil.BitEncoder() - - for curr_skip in xrange(1, 7): # 1...6 - count = 0 - for r in ranges: - start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] - if skip != curr_skip: - continue - count += 1 - be.bits(count, 6) - print 'encode: skip=%d, count=%d' % (curr_skip, count) - - for r in ranges: - start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] - if skip != curr_skip: - continue - be.bits(start_i, 16) - be.bits(start_o, 16) - be.bits(r_count, 7) - be.bits(0x3f, 6) # maximum count value = end of skips - - count = len(singles) - be.bits(count, 6) - for t in singles: - cp_i, cp_o = t[0], t[1] - be.bits(cp_i, 16) - be.bits(cp_o, 16) - - count = len(complex) - be.bits(count, 7) - for t in complex: - cp_i, str_o = t[0], t[1] - be.bits(cp_i, 16) - be.bits(len(str_o), 2) - for i in xrange(len(str_o)): - be.bits(ord(str_o[i]), 16) - - return be.getBytes(), be.getNumBits() - -def generate_regexp_canonicalize_lookup(convmap): - res = [] - - highest_nonid = -1 - - for cp in xrange(65536): - res_cp = cp # default to as is - if convmap.has_key(cp): - tmp = convmap[cp] - if len(tmp) == 1: - # Multiple codepoints from input, ignore - res_cp = ord(tmp[0]) - if cp >= 0x80 and res_cp < 0x80: - res_cp = cp # non-ASCII mapped to ASCII, ignore - - if cp != res_cp: - highest_nonid = cp - - res.append(res_cp) - - # At the moment this is 65370, which means there's very little - # gain in assuming 1:1 mapping above a certain BMP codepoint. - print('HIGHEST NON-ID MAPPING: %d' % highest_nonid) - return res - -def clonedict(x): - "Shallow clone of input dict." - res = {} - for k in x.keys(): - res[k] = x[k] - return res - -def main(): - parser = optparse.OptionParser() - parser.add_option('--command', dest='command', default='caseconv_bitpacked') - parser.add_option('--unicode-data', dest='unicode_data') - parser.add_option('--special-casing', dest='special_casing') - parser.add_option('--out-source', dest='out_source') - parser.add_option('--out-header', dest='out_header') - parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc') - parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc') - parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup') - (opts, args) = parser.parse_args() - - unicode_data = UnicodeData(opts.unicode_data) - special_casing = SpecialCasing(opts.special_casing) - - uc, lc, tc = get_base_conversion_maps(unicode_data) - update_special_casings(uc, lc, tc, special_casing) - - if opts.command == 'caseconv_bitpacked': - # XXX: ASCII and non-BMP filtering could be an option but is now hardcoded - - # ascii is handled with 'fast path' so not needed here - t = clonedict(uc) - remove_ascii_part(t) - uc_bytes, uc_nbits = generate_tables(t) - - t = clonedict(lc) - remove_ascii_part(t) - lc_bytes, lc_nbits = generate_tables(t) - - # Generate C source and header files - genc = dukutil.GenerateC() - genc.emitHeader('extract_caseconv.py') - genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True) - genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True) - f = open(opts.out_source, 'wb') - f.write(genc.getString()) - f.close() - - genc = dukutil.GenerateC() - genc.emitHeader('extract_caseconv.py') - genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes))) - genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes))) - f = open(opts.out_header, 'wb') - f.write(genc.getString()) - f.close() - elif opts.command == 're_canon_lookup': - # direct canonicalization lookup for case insensitive regexps, includes ascii part - t = clonedict(uc) - re_canon_lookup = generate_regexp_canonicalize_lookup(t) - - genc = dukutil.GenerateC() - genc.emitHeader('extract_caseconv.py') - genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True) - f = open(opts.out_source, 'wb') - f.write(genc.getString()) - f.close() - - genc = dukutil.GenerateC() - genc.emitHeader('extract_caseconv.py') - genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup))) - f = open(opts.out_header, 'wb') - f.write(genc.getString()) - f.close() - else: - raise Exception('invalid command: %r' % opts.command) - -if __name__ == '__main__': - main() diff --git a/src/extract_chars.py b/src/extract_chars.py deleted file mode 100644 index 3e4f1641..00000000 --- a/src/extract_chars.py +++ /dev/null @@ -1,353 +0,0 @@ -#!/usr/bin/env python2 -# -# Select a set of Unicode characters (based on included/excluded categories -# etc) and write out a compact bitstream for matching a character against -# the set at runtime. This is for the slow path, where we're especially -# concerned with compactness. A C source file with the table is written, -# together with a matching C header. -# -# Unicode categories (such as 'Z') can be used. Two pseudo-categories -# are also available for exclusion only: ASCII and NONBMP. "ASCII" -# category excludes ASCII codepoints which is useful because C code -# typically contains an ASCII fast path so ASCII characters don't need -# to be considered in the Unicode tables. "NONBMP" excludes codepoints -# above U+FFFF which is useful because such codepoints don't need to be -# supported in standard Ecmascript. -# - -import os, sys, math -import optparse -import dukutil - -def read_unicode_data(unidata, catsinc, catsexc, filterfunc): - "Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc." - res = [] - f = open(unidata, 'rb') - for line in f: - line = line.strip() - parts = line.split(';') - codepoint = parts[0] - category = parts[2] - - if filterfunc is not None and not filterfunc(long(codepoint, 16)): - continue - - excluded = False - for cat in catsexc: - if category.startswith(cat) or codepoint == cat: - excluded = True - if excluded: - continue - - for cat in catsinc: - if category.startswith(cat) or codepoint == cat: - res.append(line) - f.close() - - # Sort based on Unicode codepoint - def mycmp(a,b): - t1 = a.split(';') - t2 = b.split(';') - n1 = long(t1[0], 16) - n2 = long(t2[0], 16) - return cmp(n1, n2) - - res.sort(cmp=mycmp) - - return res - -def scan_ranges(lines): - "Scan continuous ranges from (filtered) UnicodeData.txt lines." - ranges = [] - range_start = None - prev = None - - for line in lines: - t = line.split(';') - n = long(t[0], 16) - if range_start is None: - range_start = n - else: - if n == prev + 1: - # continue range - pass - else: - ranges.append((range_start, prev)) - range_start = n - prev = n - - if range_start is not None: - ranges.append((range_start, prev)) - - return ranges - -def generate_png(lines, fname): - "Generate an illustrative PNG of the character set." - from PIL import Image - - m = {} - for line in lines: - t = line.split(';') - n = long(t[0], 16) - m[n] = 1 - - codepoints = 0x10ffff + 1 - width = int(256) - height = int(math.ceil(float(codepoints) / float(width))) - im = Image.new('RGB', (width, height)) - black = (0,0,0) - white = (255,255,255) - for cp in xrange(codepoints): - y = cp / width - x = cp % width - - if m.has_key(long(cp)): - im.putpixel((x,y), black) - else: - im.putpixel((x,y), white) - - im.save(fname) - -def generate_match_table1(ranges): - "Unused match table format." - - # This is an earlier match table format which is no longer used. - # IdentifierStart-UnicodeLetter has 445 ranges and generates a - # match table of 2289 bytes. - - data = [] - prev_re = None - - def genrange(rs, re): - if (rs > re): - raise Exception('assumption failed: rs=%d re=%d' % (rs, re)) - - while True: - now = re - rs + 1 - if now > 255: - now = 255 - data.append(now) # range now - data.append(0) # skip 0 - rs = rs + now - else: - data.append(now) # range now - break - - def genskip(ss, se): - if (ss > se): - raise Exception('assumption failed: ss=%d se=%s' % (ss, se)) - - while True: - now = se - ss + 1 - if now > 255: - now = 255 - data.append(now) # skip now - data.append(0) # range 0 - ss = ss + now - else: - data.append(now) # skip now - break - - for rs, re in ranges: - if prev_re is not None: - genskip(prev_re + 1, rs - 1) - genrange(rs, re) - prev_re = re - - num_entries = len(data) - - # header: start of first range - # num entries - hdr = [] - hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over - hdr.append(ranges[0][1] & 0xff) - hdr.append(num_entries >> 8) - hdr.append(num_entries & 0xff) - - return hdr + data - -def generate_match_table2(ranges): - "Unused match table format." - - # Another attempt at a match table which is also unused. - # Total tables for all current classes is now 1472 bytes. - - data = [] - - def enc(x): - while True: - if x < 0x80: - data.append(x) - break - data.append(0x80 + (x & 0x7f)) - x = x >> 7 - - prev_re = 0 - - for rs, re in ranges: - r1 = rs - prev_re # 1 or above (no unjoined ranges) - r2 = re - rs # 0 or above - enc(r1) - enc(r2) - prev_re = re - - enc(0) # end marker - - return data - -def generate_match_table3(ranges): - "Current match table format." - - # Yet another attempt, similar to generate_match_table2 except - # in packing format. - # - # Total match size now (at time of writing): 1194 bytes. - # - # This is the current encoding format used in duk_lexer.c. - - be = dukutil.BitEncoder() - - freq = [0] * (0x10ffff + 1) # informative - - def enc(x): - freq[x] += 1 - - if x <= 0x0e: - # 4-bit encoding - be.bits(x, 4) - return - x -= 0x0e + 1 - if x <= 0xfd: - # 12-bit encoding - be.bits(0x0f, 4) - be.bits(x, 8) - return - x -= 0xfd + 1 - if x <= 0xfff: - # 24-bit encoding - be.bits(0x0f, 4) - be.bits(0xfe, 8) - be.bits(x, 12) - return - x -= 0xfff + 1 - if True: - # 36-bit encoding - be.bits(0x0f, 4) - be.bits(0xff, 8) - be.bits(x, 24) - return - - raise Exception('cannot encode') - - prev_re = 0 - - for rs, re in ranges: - r1 = rs - prev_re # 1 or above (no unjoined ranges) - r2 = re - rs # 0 or above - enc(r1) - enc(r2) - prev_re = re - - enc(0) # end marker - - data, nbits = be.getBytes(), be.getNumBits() - return data, freq - -def main(): - parser = optparse.OptionParser() - parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt - parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt - parser.add_option('--include-categories', dest='include_categories') - parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE') - parser.add_option('--out-source', dest='out_source') - parser.add_option('--out-header', dest='out_header') - parser.add_option('--out-png', dest='out_png') - parser.add_option('--table-name', dest='table_name', default='match_table') - (opts, args) = parser.parse_args() - - unidata = opts.unicode_data - catsinc = [] - if opts.include_categories != '': - catsinc = opts.include_categories.split(',') - catsexc = [] - if opts.exclude_categories != 'NONE': - catsexc = opts.exclude_categories.split(',') - - print 'CATSEXC: %s' % repr(catsexc) - print 'CATSINC: %s' % repr(catsinc) - - # pseudocategories - filter_ascii = ('ASCII' in catsexc) - filter_nonbmp = ('NONBMP' in catsexc) - - # Read raw result - def filter1(x): - if filter_ascii and x <= 0x7f: - # exclude ascii - return False - if filter_nonbmp and x >= 0x10000: - # exclude non-bmp - return False - return True - - res = read_unicode_data(unidata, catsinc, catsexc, filter1) - - # Raw output - print('RAW OUTPUT:') - print('===========') - print('\n'.join(res)) - - # Scan ranges - print('') - print('RANGES:') - print('=======') - ranges = scan_ranges(res) - for i in ranges: - if i[0] == i[1]: - print('0x%04x' % i[0]) - else: - print('0x%04x ... 0x%04x' % (i[0], i[1])) - print('') - print('%d ranges total' % len(ranges)) - - # Generate match table - print('') - print('MATCH TABLE:') - print('============') - #matchtable1 = generate_match_table1(ranges) - #matchtable2 = generate_match_table2(ranges) - matchtable3, freq = generate_match_table3(ranges) - print 'match table: %s' % repr(matchtable3) - print 'match table length: %d bytes' % len(matchtable3) - print 'encoding freq:' - for i in xrange(len(freq)): - if freq[i] == 0: - continue - print ' %6d: %d' % (i, freq[i]) - - print('') - print('MATCH C TABLE -> file %s' % repr(opts.out_header)) - - # Create C source and header files - genc = dukutil.GenerateC() - genc.emitHeader('extract_chars.py') - genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True) - if opts.out_source is not None: - f = open(opts.out_source, 'wb') - f.write(genc.getString()) - f.close() - - genc = dukutil.GenerateC() - genc.emitHeader('extract_chars.py') - genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3))) - if opts.out_header is not None: - f = open(opts.out_header, 'wb') - f.write(genc.getString()) - f.close() - - # Image (for illustrative purposes only) - if opts.out_png is not None: - generate_png(res, opts.out_png) - -if __name__ == '__main__': - main() diff --git a/src/genbuildparams.py b/src/genbuildparams.py deleted file mode 100644 index d78ffe0e..00000000 --- a/src/genbuildparams.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python2 -# -# Generate build parameter files based on build information. -# A C header is generated for C code, and a JSON file for -# build scripts etc which need to know the build config. -# - -import os -import sys -import json -import optparse - -import dukutil - -if __name__ == '__main__': - parser = optparse.OptionParser() - parser.add_option('--version', dest='version') - parser.add_option('--git-commit', dest='git_commit') - parser.add_option('--git-describe', dest='git_describe') - parser.add_option('--git-branch', dest='git_branch') - parser.add_option('--out-json', dest='out_json') - parser.add_option('--out-header', dest='out_header') - (opts, args) = parser.parse_args() - - t = { - 'version': opts.version, - 'git_commit': opts.git_commit, - 'git_describe': opts.git_describe, - 'git_branch': opts.git_branch, - } - - f = open(opts.out_json, 'wb') - f.write(dukutil.json_encode(t).encode('ascii')) - f.close() - - f = open(opts.out_header, 'wb') - f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n') - f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n') - f.write('/* automatically generated by genbuildparams.py, do not edit */\n') - f.write('\n') - f.write('/* DUK_VERSION is defined in duktape.h */') - f.write('\n') - f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n') - f.close() diff --git a/src/genbuiltins.py b/src/genbuiltins.py deleted file mode 100644 index 8e3d964f..00000000 --- a/src/genbuiltins.py +++ /dev/null @@ -1,2983 +0,0 @@ -#!/usr/bin/env python2 -# -# Generate initialization data for built-in strings and objects. -# -# Supports two different initialization approaches: -# -# 1. Bit-packed format for unpacking strings and objects during -# heap or thread init into RAM-based structures. This is the -# default behavior. -# -# 2. Embedding strings and/or objects into a read-only data section -# at compile time. This is useful for low memory targets to reduce -# memory usage. Objects in data section will be immutable. -# -# Both of these have practical complications like endianness differences, -# pointer compression variants, object property table layout variants, -# and so on. Multiple #ifdef'd initializer sections are emitted to cover -# all supported alternatives. -# - -import os -import sys -import re -import traceback -import json -import yaml -import math -import struct -import optparse -import copy - -import dukutil - -# Fixed seed for ROM strings, must match src/duk_heap_alloc.c. -DUK__FIXED_HASH_SEED = 0xabcd1234 - -# Base value for compressed ROM pointers, used range is [ROMPTR_FIRST,0xffff]. -# Must match DUK_USE_ROM_PTRCOMP_FIRST (generated header checks). -ROMPTR_FIRST = 0xf800 # 2048 should be enough; now around ~1000 used - -# -# Miscellaneous helpers -# - -# Convert Unicode to bytes, identifying Unicode U+0000 to U+00FF as bytes. -# This representation is used in YAML metadata and allows invalid UTF-8 to -# be represented exactly (which is necessary). -def unicode_to_bytes(x): - if isinstance(x, str): - return x - tmp = '' - for c in x: - if ord(c) > 0xff: - raise Exception('invalid codepoint: %r' % x) - tmp += chr(ord(c)) - assert(isinstance(tmp, str)) - return tmp - -# Convert bytes to Unicode, identifying bytes as U+0000 to U+00FF. -def bytes_to_unicode(x): - if isinstance(x, unicode): - return x - tmp = u'' - for c in x: - tmp += unichr(ord(c)) - assert(isinstance(tmp, unicode)) - return tmp - -# Convert all strings in an object to bytes recursively. Useful for -# normalizing all strings in a YAML document. -def recursive_strings_to_bytes(doc): - def f(x): - if isinstance(x, unicode): - return unicode_to_bytes(x) - if isinstance(x, dict): - res = {} - for k in x.keys(): - res[f(k)] = f(x[k]) - return res - if isinstance(x, list): - res = [] - for e in x: - res.append(f(e)) - return res - return x - - return f(doc) - -# Convert all strings in an object to from bytes to Unicode recursively. -# Useful for writing back JSON/YAML dumps. -def recursive_bytes_to_strings(doc): - def f(x): - if isinstance(x, str): - return bytes_to_unicode(x) - if isinstance(x, dict): - res = {} - for k in x.keys(): - res[f(k)] = f(x[k]) - return res - if isinstance(x, list): - res = [] - for e in x: - res.append(f(e)) - return res - return x - - return f(doc) - -# Check if string is an "array index" in Ecmascript terms. -def string_is_arridx(v): - is_arridx = False - try: - ival = int(v) - if ival >= 0 and ival <= 0xfffffffe and ('%d' % ival == v): - is_arridx = True - except ValueError: - pass - - return is_arridx - -# -# Metadata loading, merging, and other preprocessing -# -# Final metadata object contains merged and normalized objects and strings. -# Keys added include (see more below): -# -# strings_stridx: string objects which have a stridx, matches stridx index order -# objects_bidx: objects which have a bidx, matches bidx index order -# objects_ram_toplevel: objects which are top level for RAM init -# -# Various helper keys are also added, containing auxiliary object/string -# lists, lookup maps, etc. See code below for details of these. -# - -def metadata_lookup_object(meta, obj_id): - return meta['_objid_to_object'][obj_id] - -def metadata_lookup_object_and_index(meta, obj_id): - for i,t in enumerate(meta['objects']): - if t['id'] == obj_id: - return t, i - return None, None - -def metadata_lookup_property(obj, key): - for p in obj['properties']: - if p['key'] == key: - return p - return None - -def metadata_lookup_property_and_index(obj, key): - for i,t in enumerate(obj['properties']): - if t['key'] == key: - return t, i - return None, None - -# Remove disabled objects and properties. -def metadata_remove_disabled(meta): - objlist = [] - for o in meta['objects']: - if o.get('disable', False): - print('Remove disabled object: %s' % o['id']) - else: - objlist.append(o) - - props = [] - for p in o['properties']: - if p.get('disable', False): - print('Remove disabled property: %s, object: %s' % (p['key'], o['id'])) - else: - props.append(p) - - o['properties'] = props - - meta['objects'] = objlist - -# Delete dangling references to removed/missing objects. -def metadata_delete_dangling_references_to_object(meta, obj_id): - for o in meta['objects']: - new_p = [] - for p in o['properties']: - v = p['value'] - ptype = None - if isinstance(v, dict): - ptype = p['value']['type'] - delprop = False - if ptype == 'object' and v['id'] == obj_id: - delprop = True - if ptype == 'accessor' and v.get('getter_id') == obj_id: - p['getter_id'] = None - if ptype == 'accessor' and v.get('setter_id') == obj_id: - p['setter_id'] = None - # XXX: Should empty accessor (= no getter, no setter) be deleted? - # If so, beware of shorthand. - if delprop: - print('Deleted property %s of object %s, points to deleted object %s' % \ - (p['key'], o['id'], obj_id)) - else: - new_p.append(p) - o['properties'] = new_p - -# Merge a user YAML file into current metadata. -def metadata_merge_user_objects(meta, user_meta): - if user_meta.has_key('add_objects'): - raise Exception('"add_objects" removed, use "objects" with "add: True"') - if user_meta.has_key('replace_objects'): - raise Exception('"replace_objects" removed, use "objects" with "replace: True"') - if user_meta.has_key('modify_objects'): - raise Exception('"modify_objects" removed, use "objects" with "modify: True"') - - for o in user_meta.get('objects', []): - if o.get('disable', False): - print('Skip disabled object: %s' % o['id']) - continue - targ, targ_idx = metadata_lookup_object_and_index(meta, o['id']) - - if o.get('delete', False): - print('Delete object: %s' % targ['id']) - if targ is None: - raise Exception('Cannot delete object %s which doesn\'t exist' % o['id']) - meta['objects'].pop(targ_idx) - metadata_delete_dangling_references_to_object(meta, targ['id']) - continue - - if o.get('replace', False): - print('Replace object %s' % o['id']) - if targ is None: - print('WARNING: object to be replaced doesn\'t exist, append new object') - meta['objects'].append(o) - else: - meta['objects'][targ_idx] = o - continue - - if o.get('add', False) or not o.get('modify', False): # 'add' is the default - print('Add object %s' % o['id']) - if targ is not None: - raise Exception('Cannot add object %s which already exists' % o['id']) - meta['objects'].append(o) - continue - - assert(o.get('modify', False)) # modify handling - if targ is None: - raise Exception('Cannot modify object %s which doesn\'t exist' % o['id']) - - for k in sorted(o.keys()): - # Merge top level keys by copying over, except 'properties' - if k == 'properties': - continue - targ[k] = o[k] - for p in o.get('properties', []): - if p.get('disable', False): - print('Skip disabled property: %s' % p['key']) - continue - prop = None - prop_idx = None - prop, prop_idx = metadata_lookup_property_and_index(targ, p['key']) - if prop is not None: - if p.get('delete', False): - print('Delete property %s of %s' % (p['key'], o['id'])) - targ['properties'].pop(prop_idx) - else: - print('Replace property %s of %s' % (p['key'], o['id'])) - targ['properties'][prop_idx] = p - else: - if p.get('delete', False): - print('Deleting property %s of %s: doesn\'t exist, nop' % (p['key'], o['id'])) - else: - print('Add property %s of %s' % (p['key'], o['id'])) - targ['properties'].append(p) - -# Normalize nargs for top level functions by defaulting 'nargs' from 'length'. -def metadata_normalize_nargs_length(meta): - # Default 'nargs' from 'length' for top level function objects. - for o in meta['objects']: - if o.has_key('nargs'): - continue - if not o.get('callable', False): - continue - for p in o['properties']: - if p['key'] != 'length': - continue - #print('Default nargs for top level: %r' % p) - assert(isinstance(p['value'], int)) - o['nargs'] = p['value'] - break - assert(o.has_key('nargs')) - - # Default 'nargs' from 'length' for function property shorthand. - for o in meta['objects']: - for p in o['properties']: - if not (isinstance(p['value'], dict) and p['value']['type'] == 'function'): - continue - pval = p['value'] - if not pval.has_key('length'): - print('Default length for function shorthand: %r' % p) - pval['length'] = 0 - if not pval.has_key('nargs'): - #print('Default nargs for function shorthand: %r' % p) - pval['nargs'] = pval['length'] - -# Prepare a list of built-in objects which need a runtime 'bidx'. -def metadata_prepare_objects_bidx(meta): - objlist = meta['objects'] - meta['objects'] = [] - meta['objects_bidx'] = [] - objid_map = {} # temp map - - # Build helper index. - for o in objlist: - objid_map[o['id']] = o - - # Use 'builtins' as the bidx list with no filtering for now. - # Ideally we'd scan the actually needed indices from the source. - for o in meta['builtins']: - # No filtering now, just use list as is - obj = objid_map[o['id']] - obj['bidx_used'] = True - meta['objects'].append(obj) - meta['objects_bidx'].append(obj) - - # Append remaining objects. - for o in objlist: - if o.get('bidx_used', False): - # Already in meta['objects']. - pass - else: - meta['objects'].append(o) - -# Normalize metadata property shorthand. For example, if a proprety value -# is a shorthand function, create a function object and change the property -# to point to that function object. -def metadata_normalize_shorthand(meta): - # Gather objects through the top level built-ins list. - objs = [] - subobjs = [] - - def getSubObject(): - obj = {} - obj['id'] = 'subobj_%d' % len(subobjs) # synthetic ID - obj['properties'] = [] - obj['auto_generated'] = True # mark as autogenerated (just FYI) - subobjs.append(obj) - return obj - - def decodeFunctionShorthand(funprop): - # Convert the built-in function property "shorthand" into an actual - # object for ROM built-ins. - assert(funprop['value']['type'] == 'function') - val = funprop['value'] - obj = getSubObject() - props = obj['properties'] - obj['native'] = val['native'] - obj['nargs'] = val.get('nargs', val['length']) - obj['varargs'] = val.get('varargs', False) - obj['magic'] = val.get('magic', 0) - obj['internal_prototype'] = 'bi_function_prototype' - obj['class'] = 'Function' - obj['callable'] = True - obj['constructable'] = val.get('constructable', False) - props.append({ 'key': 'length', 'value': val['length'], 'attributes': '' }) - props.append({ 'key': 'name', 'value': funprop['key'], 'attributes': '' }) - return obj - - def addAccessor(funprop, magic, nargs, length, name, native_func): - assert(funprop['value']['type'] == 'accessor') - obj = getSubObject() - props = obj['properties'] - obj['native'] = native_func - obj['nargs'] = nargs - obj['varargs'] = False - obj['magic'] = magic - obj['internal_prototype'] = 'bi_function_prototype' - obj['class'] = 'Function' - obj['callable'] = True - obj['constructable'] = False - # Shorthand accessors are minimal and have no .length or .name - # right now. Use longhand if these matter. - #props.append({ 'key': 'length', 'value': length, 'attributes': '' }) - #props.append({ 'key': 'name', 'value': name, 'attributes': '' }) - return obj - - def decodeGetterShorthand(key, funprop): - assert(funprop['value']['type'] == 'accessor') - val = funprop['value'] - return addAccessor(funprop, - val['getter_magic'], - val['getter_nargs'], - val.get('getter_length', 0), - key, - val['getter']) - - def decodeSetterShorthand(key, funprop): - assert(funprop['value']['type'] == 'accessor') - val = funprop['value'] - return addAccessor(funprop, - val['setter_magic'], - val['setter_nargs'], - val.get('setter_length', 0), - key, - val['setter']) - - def decodeStructuredValue(val): - #print('Decode structured value: %r' % val) - if isinstance(val, (int, long, float, str)): - return val # as is - elif isinstance(val, (dict)): - # Object: decode recursively - obj = decodeStructuredObject(val) - return { 'type': 'object', 'id': obj['id'] } - elif isinstance(val, (list)): - raise Exception('structured shorthand does not yet support array literals') - else: - raise Exception('unsupported value in structured shorthand: %r' % v) - - def decodeStructuredObject(val): - # XXX: We'd like to preserve dict order from YAML source but - # Python doesn't do that. Use sorted order to make the result - # deterministic. User can always use longhand for exact - # property control. - - #print('Decode structured object: %r' % val) - obj = getSubObject() - obj['class'] = 'Object' - obj['internal_prototype'] = 'bi_object_prototype' - - props = obj['properties'] - keys = sorted(val.keys()) - for k in keys: - #print('Decode property %s' % k) - prop = { 'key': k, 'value': decodeStructuredValue(val[k]), 'attributes': 'wec' } - props.append(prop) - - return obj - - def decodeStructuredShorthand(structprop): - assert(structprop['value']['type'] == 'structured') - val = structprop['value']['value'] - return decodeStructuredValue(val) - - def clonePropShared(prop): - res = {} - for k in [ 'key', 'attributes', 'autoLightfunc' ]: - if prop.has_key(k): - res[k] = prop[k] - return res - - for idx,obj in enumerate(meta['objects']): - props = [] - repl_props = [] - - for val in obj['properties']: - # Date.prototype.toGMTString must point to the same Function object - # as Date.prototype.toUTCString, so special case hack it here. - if obj['id'] == 'bi_date_prototype' and val['key'] == 'toGMTString': - #print('Skip Date.prototype.toGMTString') - continue - - if isinstance(val['value'], dict) and val['value']['type'] == 'function': - # Function shorthand. - subfun = decodeFunctionShorthand(val) - prop = clonePropShared(val) - prop['value'] = { 'type': 'object', 'id': subfun['id'] } - repl_props.append(prop) - elif isinstance(val['value'], dict) and val['value']['type'] == 'accessor' and \ - (val['value'].has_key('getter') or val['value'].has_key('setter')): - # Accessor normal and shorthand forms both use the type 'accessor', - # but are differentiated by properties. - sub_getter = decodeGetterShorthand(val['key'], val) - sub_setter = decodeSetterShorthand(val['key'], val) - prop = clonePropShared(val) - prop['value'] = { 'type': 'accessor', 'getter_id': sub_getter['id'], 'setter_id': sub_setter['id'] } - assert('a' in prop['attributes']) # If missing, weird things happen runtime - #print('Expand accessor shorthand: %r -> %r' % (val, prop)) - repl_props.append(prop) - elif isinstance(val['value'], dict) and val['value']['type'] == 'structured': - # Structured shorthand. - subval = decodeStructuredShorthand(val) - prop = clonePropShared(val) - prop['value'] = subval - repl_props.append(prop) - print('Decoded structured shorthand for object %s, property %s' % (obj['id'], val['key'])) - elif isinstance(val['value'], dict) and val['value']['type'] == 'buffer': - # Duktape buffer type not yet supported. - raise Exception('Buffer type not yet supported for builtins: %r' % val) - elif isinstance(val['value'], dict) and val['value']['type'] == 'pointer': - # Duktape pointer type not yet supported. - raise Exception('Pointer type not yet supported for builtins: %r' % val) - else: - # Property already in normalized form. - repl_props.append(val) - - if obj['id'] == 'bi_date_prototype' and val['key'] == 'toUTCString': - #print('Clone Date.prototype.toUTCString to Date.prototype.toGMTString') - prop2 = copy.deepcopy(repl_props[-1]) - prop2['key'] = 'toGMTString' - repl_props.append(prop2) - - # Replace properties with a variant where function properties - # point to built-ins rather than using an inline syntax. - obj['properties'] = repl_props - - len_before = len(meta['objects']) - meta['objects'] += subobjs - len_after = len(meta['objects']) - - print('Normalized metadata shorthand, %d objects -> %d final objects' % (len_before, len_after)) - -# Normalize property attribute order, default attributes, etc. -def metadata_normalize_property_attributes(meta): - for o in meta['objects']: - for p in o['properties']: - orig_attrs = p.get('attributes', None) - is_accessor = (isinstance(p['value'], dict) and p['value']['type'] == 'accessor') - - # If missing, set default attributes. - attrs = orig_attrs - if attrs is None: - if is_accessor: - attrs = 'ca' # accessor default is configurable - else: - attrs = 'wc' # default is writable, configurable - #print('Defaulted attributes of %s/%s to %s' % (o['id'], p['key'], attrs)) - - # Decode flags to normalize their order in the end. - writable = 'w' in attrs - enumerable = 'e' in attrs - configurable = 'c' in attrs - accessor = 'a' in attrs - - # Force 'accessor' attribute for accessors. - if is_accessor and not accessor: - #print('Property %s is accessor but has no "a" attribute, add attribute' % p['key']) - accessor = True - - # Normalize order and write back. - attrs = '' - if writable: - attrs += 'w' - if enumerable: - attrs += 'e' - if configurable: - attrs += 'c' - if accessor: - attrs += 'a' - p['attributes'] = attrs - - if orig_attrs != attrs: - #print('Updated attributes of %s/%s from %r to %r' % (o['id'], p['key'], orig_attrs, attrs)) - pass - -# Normalize ROM property attributes. -def metadata_normalize_rom_property_attributes(meta): - for o in meta['objects']: - for p in o['properties']: - # ROM properties must not be configurable (runtime code - # depends on this). Writability is kept so that instance - # objects can override parent properties. - p['attributes'] = p['attributes'].replace('c', '') - -# Add a 'name' property for all top level functions; expected by RAM -# initialization code. -def metadata_normalize_ram_function_names(meta): - for o in meta['objects']: - if not o.get('callable', False): - continue - name_prop = None - for p in o['properties']: - if p['key'] == 'name': - name_prop = p - break - if name_prop is None: - print('Adding missing "name" property for top level function %s' % o['id']) - o['properties'].append({ 'key': 'name', 'value': '', 'attributes': '' }) - -# Add a built-in objects list for RAM initialization. -def metadata_add_ram_filtered_object_list(meta): - # For RAM init data to support user objects, we need to prepare a - # filtered top level object list, containing only those objects which - # need a value stack index during duk_hthread_builtins.c init process. - # - # Objects in meta['objects'] which are covered by inline property - # notation in the init data (this includes e.g. member functions like - # Math.cos) must not be present. - - objlist = [] - for o in meta['objects']: - keep = o.get('bidx_used', False) - if o.has_key('native') and not o.has_key('bidx'): - # Handled inline by run-time init code - pass - else: - # Top level object - keep = True - if keep: - objlist.append(o) - - print('Filtered RAM object list: %d objects with bidx, %d total top level objects' % \ - (len(meta['objects_bidx']), len(objlist))) - - meta['objects_ram_toplevel'] = objlist - -# Add missing strings into strings metadata. For example, if an object -# property key is not part of the strings list, append it there. This -# is critical for ROM builtins because all property keys etc must also -# be in ROM. -def metadata_normalize_missing_strings(meta, user_meta): - # We just need plain strings here. - strs_have = {} - for s in meta['strings']: - strs_have[s['str']] = True - - # For ROM builtins all the strings must be in the strings list, - # so scan objects for any strings not explicitly listed in metadata. - for idx, obj in enumerate(meta['objects']): - for prop in obj['properties']: - key = prop['key'] - if not strs_have.get(key): - #print('Add missing string: %r' % key) - meta['strings'].append({ 'str': key, '_auto_add_ref': True }) - strs_have[key] = True - if prop.has_key('value') and isinstance(prop['value'], (str, unicode)): - val = unicode_to_bytes(prop['value']) # XXX: should already be - if not strs_have.get(val): - #print('Add missing string: %r' % val) - meta['strings'].append({ 'str': val, '_auto_add_ref': True }) - strs_have[val] = True - - # Force user strings to be in ROM data. - for s in user_meta.get('add_forced_strings', []): - if not strs_have.get(s['str']): - #print('Add user string: %r' % s['str']) - s['_auto_add_user'] = True - meta['strings'].append(s) - -# Convert built-in function properties into lightfuncs where applicable. -def metadata_convert_lightfuncs(meta): - num_converted = 0 - num_skipped = 0 - - for o in meta['objects']: - for p in o['properties']: - v = p['value'] - ptype = None - if isinstance(v, dict): - ptype = p['value']['type'] - if ptype != 'object': - continue - targ, targ_idx = metadata_lookup_object_and_index(meta, p['value']['id']) - - reasons = [] - if not targ.get('callable', False): - reasons.append('not-callable') - #if targ.get('constructable', False): - # reasons.append('constructable') - - lf_len = 0 - for p2 in targ['properties']: - # Don't convert if function has more properties than - # we're willing to sacrifice. - #print(' - Check %r . %s' % (o.get('id', None), p2['key'])) - if p2['key'] == 'length' and isinstance(p2['value'], (int, long)): - lf_len = p2['value'] - if p2['key'] not in [ 'length', 'name' ]: - reasons.append('nonallowed-property') - - if not p.get('autoLightfunc', True): - print('Automatic lightfunc conversion rejected for key %s, explicitly requested in metadata' % p['key']) - reasons.append('no-auto-lightfunc') - - # lf_len comes from actual property table (after normalization) - if targ.has_key('magic'): - try: - # Magic values which resolve to 'bidx' indices cannot - # be resolved here yet, because the bidx map is not - # yet ready. If so, reject the lightfunc conversion - # for now. In practice this doesn't matter. - lf_magic = resolve_magic(targ.get('magic'), {}) # empty map is a "fake" bidx map - #print('resolved magic ok -> %r' % lf_magic) - except Exception, e: - #print('Failed to resolve magic for %r: %r' % (p['key'], e)) - reasons.append('magic-resolve-failed') - lf_magic = 0xffffffff # dummy, will be out of bounds - else: - lf_magic = 0 - if targ.get('varargs', True): - lf_nargs = None - lf_varargs = True - else: - lf_nargs = targ['nargs'] - lf_varargs = False - - if lf_len < 0 or lf_len > 15: - #print('lf_len out of bounds: %r' % lf_len) - reasons.append('len-bounds') - if lf_magic < -0x80 or lf_magic > 0x7f: - #print('lf_magic out of bounds: %r' % lf_magic) - reasons.append('magic-bounds') - if not lf_varargs and (lf_nargs < 0 or lf_nargs > 14): - #print('lf_nargs out of bounds: %r' % lf_nargs) - reasons.append('nargs-bounds') - - if len(reasons) > 0: - #print('Don\'t convert to lightfunc: %r %r (%r): %r' % (o.get('id', None), p.get('key', None), p['value']['id'], reasons)) - num_skipped += 1 - continue - - p_id = p['value']['id'] - p['value'] = { - 'type': 'lightfunc', - 'native': targ['native'], - 'length': lf_len, - 'magic': lf_magic, - 'nargs': lf_nargs, - 'varargs': lf_varargs - } - #print(' - Convert to lightfunc: %r %r (%r) -> %r' % (o.get('id', None), p.get('key', None), p_id, p['value'])) - - num_converted += 1 - - print('Converted %d built-in function properties to lightfuncs, %d skipped as non-eligible' % (num_converted, num_skipped)) - -# Detect objects not reachable from any object with a 'bidx'. This is usually -# a user error because such objects can't be reached at runtime so they're -# useless in RAM or ROM init data. -def metadata_remove_orphan_objects(meta): - reachable = {} - - for o in meta['objects']: - if o.get('bidx_used', False): - reachable[o['id']] = True - - while True: - reachable_count = len(reachable.keys()) - - def _markId(obj_id): - if obj_id is None: - return - reachable[obj_id] = True - - for o in meta['objects']: - if not reachable.has_key(o['id']): - continue - for p in o['properties']: - # Shorthand has been normalized so no need - # to support it here. - v = p['value'] - ptype = None - if isinstance(v, dict): - ptype = p['value']['type'] - if ptype == 'object': - _markId(v['id']) - if ptype == 'accessor': - _markId(v.get('getter_id')) - _markId(v.get('setter_id')) - - print('Mark reachable: reachable count initially %d, now %d' % \ - (reachable_count, len(reachable.keys()))) - if reachable_count == len(reachable.keys()): - break - - num_deleted = 0 - deleted = True - while deleted: - deleted = False - for i,o in enumerate(meta['objects']): - if not reachable.has_key(o['id']): - #print('WARNING: object %s not reachable, dropping' % o['id']) - meta['objects'].pop(i) - deleted = True - num_deleted += 1 - break - - print('Deleted %d unreachable objects' % num_deleted) - -# Add C define names for builtin strings. These defines are added to all -# strings, even when they won't get a stridx because the define names are -# used to autodetect referenced strings. -def metadata_add_string_define_names(strlist, special_defs): - for s in strlist: - v = s['str'] - - if special_defs.has_key(v): - s['define'] = 'DUK_STRIDX_' + special_defs[v] - continue - - if len(v) >= 1 and v[0] == '\xff': - pfx = 'DUK_STRIDX_INT_' - v = v[1:] - else: - pfx = 'DUK_STRIDX_' - - t = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', v) # add underscores: aB -> a_B - s['define'] = pfx + t.upper() - -# Add a 'stridx_used' flag for strings which need a stridx. -def metadata_add_string_used_stridx(strlist, used_stridx_meta): - defs_needed = {} - defs_found = {} - for s in used_stridx_meta['used_stridx_defines']: - defs_needed[s] = True - - # strings whose define is referenced - for s in strlist: - if s.has_key('define') and defs_needed.has_key(s['define']): - s['stridx_used'] = True - defs_found[s['define']] = True - - # duk_lexer.h needs all reserved words - for s in strlist: - if s.get('reserved_word', False): - s['stridx_used'] = True - - # ensure all needed defines are provided - defs_found['DUK_STRIDX_START_RESERVED'] = True # special defines provided automatically - defs_found['DUK_STRIDX_START_STRICT_RESERVED'] = True - defs_found['DUK_STRIDX_END_RESERVED'] = True - defs_found['DUK_STRIDX_TO_TOK'] = True - for k in sorted(defs_needed.keys()): - if not defs_found.has_key(k): - raise Exception('source code needs define %s not provided by strings' % repr(k)) - -# Merge duplicate strings in string metadata. -def metadata_merge_string_entries(strlist): - # The raw string list may contain duplicates so merge entries. - # The list is processed in reverse because the last entry should - # "win" and keep its place (this matters for reserved words). - - strs = [] - str_map = {} # plain string -> object in strs[] - tmp = copy.deepcopy(strlist) - tmp.reverse() - for s in tmp: - prev = str_map.get(s['str']) - if prev is not None: - for k in s.keys(): - if prev.has_key(k) and prev[k] != s[k]: - raise Exception('fail to merge string entry, conflicting keys: %r <-> %r' % (prev, s)) - prev[k] = s[k] - else: - strs.append(s) - str_map[s['str']] = s - strs.reverse() - return strs - -# Order builtin strings (strings with a stridx) into an order satisfying -# multiple constraints. -def metadata_order_builtin_strings(input_strlist, keyword_list, strip_unused_stridx=False): - # Strings are ordered in the result as follows: - # 1. Non-reserved words requiring 8-bit indices - # 2. Non-reserved words not requiring 8-bit indices - # 3. Reserved words in non-strict mode only - # 4. Reserved words in strict mode - # - # Reserved words must follow an exact order because they are - # translated to/from token numbers by addition/subtraction. - # Some strings require an 8-bit index and must be in the - # beginning. - - tmp_strs = [] - for s in copy.deepcopy(input_strlist): - if not s.get('stridx_used', False): - # Drop strings which are not actually needed by src/*.(c|h). - # Such strings won't be in heap->strs[] or ROM legacy list. - pass - else: - tmp_strs.append(s) - - # The reserved word list must match token order in duk_lexer.h - # exactly, so pluck them out first. - - str_index = {} - kw_index = {} - keywords = [] - strs = [] - for idx,s in enumerate(tmp_strs): - str_index[s['str']] = s - for idx,s in enumerate(keyword_list): - keywords.append(str_index[s]) - kw_index[s] = True - for idx,s in enumerate(tmp_strs): - if not kw_index.has_key(s['str']): - strs.append(s) - - # Sort the strings by category number; within category keep - # previous order. - - for idx,s in enumerate(strs): - s['_idx'] = idx # for ensuring stable sort - - def req8Bit(s): - return s.get('class_name', False) # currently just class names - - def getCat(s): - req8 = req8Bit(s) - if s.get('reserved_word', False): - # XXX: unused path now, because keywords are "plucked out" - # explicitly. - assert(not req8) - if s.get('future_reserved_word_strict', False): - return 4 - else: - return 3 - elif req8: - return 1 - else: - return 2 - - def sortCmp(a,b): - return cmp( (getCat(a),a['_idx']), (getCat(b),b['_idx']) ) - - strs.sort(cmp=sortCmp) - - for idx,s in enumerate(strs): - # Remove temporary _idx properties - del s['_idx'] - - for idx,s in enumerate(strs): - if req8Bit(s) and i >= 256: - raise Exception('8-bit string index not satisfied: ' + repr(s)) - - return strs + keywords - -# Dump metadata into a JSON file. -def dump_metadata(meta, fn): - tmp = json.dumps(recursive_bytes_to_strings(meta), indent=4) - with open(fn, 'wb') as f: - f.write(tmp) - print('Wrote metadata dump to %s' % fn) - -# Main metadata loading function: load metadata from multiple sources, -# merge and normalize, prepare various indexes etc. -def load_metadata(opts, rom=False, build_info=None): - # Load built-in strings and objects. - with open(opts.strings_metadata, 'rb') as f: - strings_metadata = recursive_strings_to_bytes(yaml.load(f)) - with open(opts.objects_metadata, 'rb') as f: - objects_metadata = recursive_strings_to_bytes(yaml.load(f)) - - # Merge strings and objects metadata as simple top level key merge. - meta = {} - for k in objects_metadata.keys(): - meta[k] = objects_metadata[k] - for k in strings_metadata.keys(): - meta[k] = strings_metadata[k] - - # Add user objects. - user_meta = {} - for fn in opts.user_builtin_metadata: - print('Merging user builtin metadata file %s' % fn) - with open(fn, 'rb') as f: - user_meta = recursive_strings_to_bytes(yaml.load(f)) - metadata_merge_user_objects(meta, user_meta) - - # Remove disabled objects and properties. - metadata_remove_disabled(meta) - - # Normalize 'nargs' and 'length' defaults. - metadata_normalize_nargs_length(meta) - - # Normalize property attributes. - metadata_normalize_property_attributes(meta) - - # Normalize property shorthand into full objects. - metadata_normalize_shorthand(meta) - - # RAM top-level functions must have a 'name'. - if not rom: - metadata_normalize_ram_function_names(meta) - - # Add Duktape.version and (Duktape.env for ROM case). - for o in meta['objects']: - if o['id'] == 'bi_duktape': - o['properties'].insert(0, { 'key': 'version', 'value': int(build_info['version']), 'attributes': '' }) - if rom: - # Use a fixed (quite dummy for now) Duktape.env - # when ROM builtins are in use. In the RAM case - # this is added during global object initialization - # based on config options in use. - o['properties'].insert(0, { 'key': 'env', 'value': 'ROM', 'attributes': '' }) - - # Normalize property attributes (just in case shorthand handling - # didn't add attributes to all properties). - metadata_normalize_property_attributes(meta) - - # For ROM objects, mark all properties non-configurable. - if rom: - metadata_normalize_rom_property_attributes(meta) - - # Convert built-in function properties automatically into - # lightfuncs if requested and function is eligible. - if rom and opts.rom_auto_lightfunc: - metadata_convert_lightfuncs(meta) - - # Create a list of objects needing a 'bidx'. This is now just - # based on the 'builtins' metadata list but could be dynamically - # scanned somehow. Ensure 'objects' and 'objects_bidx' match - # in order for shared length. - metadata_prepare_objects_bidx(meta) - - # Merge duplicate strings. - meta['strings'] = metadata_merge_string_entries(meta['strings']) - - # Prepare an ordered list of strings with 'stridx': - # - Add a 'stridx_used' flag for strings which need an index in current code base - # - Add a C define (DUK_STRIDX_xxx) for such strings - # - Compute a stridx string order satisfying current runtime constraints - # - # The meta['strings_stridx'] result will be in proper order and stripped of - # any strings which don't need a stridx. - metadata_add_string_define_names(meta['strings'], meta['special_define_names']) - with open(opts.used_stridx_metadata, 'rb') as f: - metadata_add_string_used_stridx(meta['strings'], json.loads(f.read())) - meta['strings_stridx'] = metadata_order_builtin_strings(meta['strings'], meta['reserved_word_token_order']) - - # For the ROM build: add any strings referenced by built-in objects - # into the string list (not the 'stridx' list though): all strings - # referenced by ROM objects must also be in ROM. - if rom: - for fn in opts.user_builtin_metadata: - # XXX: awkward second pass - with open(fn, 'rb') as f: - user_meta = recursive_strings_to_bytes(yaml.load(f)) - metadata_normalize_missing_strings(meta, user_meta) - metadata_normalize_missing_strings(meta, {}) # in case no files - - # Check for orphan objects and remove them. - metadata_remove_orphan_objects(meta) - - # Add final stridx and bidx indices to metadata objects and strings. - idx = 0 - for o in meta['objects']: - if o.get('bidx_used', False): - o['bidx'] = idx - idx += 1 - idx = 0 - for s in meta['strings']: - if s.get('stridx_used', False): - s['stridx'] = idx - idx += 1 - - # Prepare a filtered RAM top level object list, needed for technical - # reasons during RAM init handling. - if not rom: - metadata_add_ram_filtered_object_list(meta) - - # Sanity check: object index must match 'bidx' for all objects - # which have a runtime 'bidx'. This is assumed by e.g. RAM - # thread init. - for i,o in enumerate(meta['objects']): - if i < len(meta['objects_bidx']): - assert(meta['objects_bidx'][i] == meta['objects'][i]) - if o.has_key('bidx'): - assert(o['bidx'] == i) - - # Create a set of helper lists and maps now that the metadata is - # in its final form. - meta['_strings_plain'] = [] - meta['_strings_stridx_plain'] = [] - meta['_stridx_to_string'] = {} - meta['_idx_to_string'] = {} - meta['_stridx_to_plain'] = {} - meta['_idx_to_plain'] = {} - meta['_string_to_stridx'] = {} - meta['_plain_to_stridx'] = {} - meta['_string_to_idx'] = {} - meta['_plain_to_idx'] = {} - meta['_define_to_stridx'] = {} - meta['_stridx_to_define'] = {} - meta['_is_plain_reserved_word'] = {} - meta['_is_plain_strict_reserved_word'] = {} - meta['_objid_to_object'] = {} - meta['_objid_to_bidx'] = {} - meta['_objid_to_idx'] = {} - meta['_objid_to_ramidx'] = {} - meta['_bidx_to_objid'] = {} - meta['_idx_to_objid'] = {} - meta['_bidx_to_object'] = {} - meta['_idx_to_object'] = {} - - for i,s in enumerate(meta['strings']): - assert(s['str'] not in meta['_strings_plain']) - meta['_strings_plain'].append(s['str']) - if s.get('reserved_word', False): - meta['_is_plain_reserved_word'][s['str']] = True # includes also strict reserved words - if s.get('future_reserved_word_strict', False): - meta['_is_plain_strict_reserved_word'][s['str']] = True - meta['_idx_to_string'][i] = s - meta['_idx_to_plain'][i] = s['str'] - meta['_plain_to_idx'][s['str']] = i - #meta['_string_to_idx'][s] = i - for i,s in enumerate(meta['strings_stridx']): - assert(s.get('stridx_used', False) == True) - meta['_strings_stridx_plain'].append(s['str']) - meta['_stridx_to_string'][i] = s - meta['_stridx_to_plain'][i] = s['str'] - #meta['_string_to_stridx'][s] = i - meta['_plain_to_stridx'][s['str']] = i - meta['_define_to_stridx'][s['define']] = i - meta['_stridx_to_define'][i] = s['define'] - for i,o in enumerate(meta['objects']): - meta['_objid_to_object'][o['id']] = o - meta['_objid_to_idx'][o['id']] = i - meta['_idx_to_objid'][i] = o['id'] - meta['_idx_to_object'][i] = o - for i,o in enumerate(meta['objects_bidx']): - assert(o.get('bidx_used', False) == True) - meta['_objid_to_bidx'][o['id']] = i - meta['_bidx_to_objid'][i] = o['id'] - meta['_bidx_to_object'][i] = o - if meta.has_key('objects_ram_toplevel'): - for i,o in enumerate(meta['objects_ram_toplevel']): - meta['_objid_to_ramidx'][o['id']] = i - - # Dump stats. - - if rom: - meta_name = 'ROM' - else: - meta_name = 'RAM' - - count_add_ref = 0 - count_add_user = 0 - for s in meta['strings']: - if s.get('_auto_add_ref', False): - count_add_ref += 1 - if s.get('_auto_add_user', False): - count_add_user += 1 - count_add = count_add_ref + count_add_user - - print(('Prepared %s metadata: %d objects, %d objects with bidx, ' + \ - '%d strings, %d strings with stridx, %d strings added ' + \ - '(%d property key references, %d user strings)') % \ - (meta_name, len(meta['objects']), len(meta['objects_bidx']), \ - len(meta['strings']), len(meta['strings_stridx']), \ - count_add, count_add_ref, count_add_user)) - - return meta - -# -# Metadata helpers -# - -# Magic values for Math built-in. -math_onearg_magic = { - 'fabs': 0, # BI_MATH_FABS_IDX - 'acos': 1, # BI_MATH_ACOS_IDX - 'asin': 2, # BI_MATH_ASIN_IDX - 'atan': 3, # BI_MATH_ATAN_IDX - 'ceil': 4, # BI_MATH_CEIL_IDX - 'cos': 5, # BI_MATH_COS_IDX - 'exp': 6, # BI_MATH_EXP_IDX - 'floor': 7, # BI_MATH_FLOOR_IDX - 'log': 8, # BI_MATH_LOG_IDX - 'round': 9, # BI_MATH_ROUND_IDX - 'sin': 10, # BI_MATH_SIN_IDX - 'sqrt': 11, # BI_MATH_SQRT_IDX - 'tan': 12 # BI_MATH_TAN_IDX -} -math_twoarg_magic = { - 'atan2': 0, # BI_MATH_ATAN2_IDX - 'pow': 1 # BI_MATH_POW_IDX -} - -# Magic values for Array built-in. -array_iter_magic = { - 'every': 0, # BI_ARRAY_ITER_EVERY - 'some': 1, # BI_ARRAY_ITER_SOME - 'forEach': 2, # BI_ARRAY_ITER_FOREACH - 'map': 3, # BI_ARRAY_ITER_MAP - 'filter': 4 # BI_ARRAY_ITER_FILTER -} - -# Magic value for typedarray/node.js buffer read field operations. -def magic_readfield(elem, signed=None, bigendian=None, typedarray=None): - # Must match duk__FLD_xxx in duk_bi_buffer.c - elemnum = { - '8bit': 0, - '16bit': 1, - '32bit': 2, - 'float': 3, - 'double': 4, - 'varint': 5 - }[elem] - if signed == True: - signednum = 1 - elif signed == False: - signednum = 0 - else: - raise Exception('missing "signed"') - if bigendian == True: - bigendiannum = 1 - elif bigendian == False: - bigendiannum = 0 - else: - raise Exception('missing "bigendian"') - if typedarray == True: - typedarraynum = 1 - elif typedarray == False: - typedarraynum = 0 - else: - raise Exception('missing "typedarray"') - return elemnum + (signednum << 4) + (bigendiannum << 3) + (typedarraynum << 5) - -# Magic value for typedarray/node.js buffer write field operations. -def magic_writefield(elem, signed=None, bigendian=None, typedarray=None): - return magic_readfield(elem, signed=signed, bigendian=bigendian, typedarray=typedarray) - -# Magic value for typedarray constructors. -def magic_typedarray_constructor(elem, shift): - # Must match duk_hbufobj.h header - elemnum = { - 'uint8': 0, - 'uint8clamped': 1, - 'int8': 2, - 'uint16': 3, - 'int16': 4, - 'uint32': 5, - 'int32': 6, - 'float32': 7, - 'float64': 8 - }[elem] - return (elemnum << 2) + shift - -# Resolve a magic value from a YAML metadata element into an integer. -def resolve_magic(elem, objid_to_bidx): - if elem is None: - return 0 - if isinstance(elem, (int, long)): - v = int(elem) - if not (v >= -0x8000 and v <= 0x7fff): - raise Exception('invalid plain value for magic: %s' % repr(v)) - return v - if not isinstance(elem, dict): - raise Exception('invalid magic: %r' % elem) - - assert(elem.has_key('type')) - if elem['type'] == 'bidx': - # Maps to thr->builtins[]. - v = elem['id'] - return objid_to_bidx[v] - elif elem['type'] == 'plain': - v = elem['value'] - if not (v >= -0x8000 and v <= 0x7fff): - raise Exception('invalid plain value for magic: %s' % repr(v)) - return v - elif elem['type'] == 'math_onearg': - return math_onearg_magic[elem['funcname']] - elif elem['type'] == 'math_twoarg': - return math_twoarg_magic[elem['funcname']] - elif elem['type'] == 'array_iter': - return array_iter_magic[elem['funcname']] - elif elem['type'] == 'typedarray_constructor': - return magic_typedarray_constructor(elem['elem'], elem['shift']) - elif elem['type'] == 'buffer_readfield': - return magic_readfield(elem['elem'], elem['signed'], elem['bigendian'], elem['typedarray']) - elif elem['type'] == 'buffer_writefield': - return magic_writefield(elem['elem'], elem['signed'], elem['bigendian'], elem['typedarray']) - else: - raise Exception('invalid magic type: %r' % elem) - -# Helper to find a property from a property list, remove it from the -# property list, and return the removed property. -def steal_prop(props, key): - for idx,prop in enumerate(props): - if prop['key'] == key: - return props.pop(idx) - return None - -# -# RAM initialization data -# -# Init data for built-in strings and objects. The init data for both -# strings and objects is a bit-packed stream tailored to match the decoders -# in duk_heap_alloc.c (strings) and duk_hthread_builtins.c (objects). -# Various bitfield sizes are used to minimize the bitstream size without -# resorting to actual, expensive compression. The goal is to minimize the -# overall size of the init code and the init data. -# -# The built-in data created here is used to set up initial RAM versions -# of the strings and objects. References to these objects are tracked in -# heap->strs[] and thr->builtins[] which allows Duktape internals to refer -# to built-ins e.g. as thr->builtins[DUK_BIDX_STRING_PROTOTYPE]. -# -# Not all strings and objects need to be reachable through heap->strs[] -# or thr->builtins[]: the strings/objects that need to be in these arrays -# is determined based on metadata and source code scanning. -# - -# XXX: Reserved word stridxs could be made to match token numbers -# directly so that a duk_stridx2token[] would not be needed. - -# Default property attributes, see E5 Section 15 beginning. -LENGTH_PROPERTY_ATTRIBUTES = '' -ACCESSOR_PROPERTY_ATTRIBUTES = 'c' -DEFAULT_DATA_PROPERTY_ATTRIBUTES = 'wc' - -# Encoding constants (must match duk_hthread_builtins.c). -CLASS_BITS = 5 -BIDX_BITS = 7 -STRIDX_BITS = 9 # would be nice to optimize to 8 -NATIDX_BITS = 8 -NUM_NORMAL_PROPS_BITS = 6 -NUM_FUNC_PROPS_BITS = 6 -PROP_FLAGS_BITS = 3 -STRING_LENGTH_BITS = 8 -STRING_CHAR_BITS = 7 -LENGTH_PROP_BITS = 3 -NARGS_BITS = 3 -PROP_TYPE_BITS = 3 -MAGIC_BITS = 16 - -NARGS_VARARGS_MARKER = 0x07 -NO_CLASS_MARKER = 0x00 # 0 = DUK_HOBJECT_CLASS_NONE -NO_BIDX_MARKER = 0x7f -NO_STRIDX_MARKER = 0xff - -PROP_TYPE_DOUBLE = 0 -PROP_TYPE_STRING = 1 -PROP_TYPE_STRIDX = 2 -PROP_TYPE_BUILTIN = 3 -PROP_TYPE_UNDEFINED = 4 -PROP_TYPE_BOOLEAN_TRUE = 5 -PROP_TYPE_BOOLEAN_FALSE = 6 -PROP_TYPE_ACCESSOR = 7 - -# must match duk_hobject.h -PROPDESC_FLAG_WRITABLE = (1 << 0) -PROPDESC_FLAG_ENUMERABLE = (1 << 1) -PROPDESC_FLAG_CONFIGURABLE = (1 << 2) -PROPDESC_FLAG_ACCESSOR = (1 << 3) # unused now - -# Class names, numeric indices must match duk_hobject.h class numbers. -class_names = [ - 'Unused', - 'Arguments', - 'Array', - 'Boolean', - 'Date', - 'Error', - 'Function', - 'JSON', - 'Math', - 'Number', - 'Object', - 'RegExp', - 'String', - 'global', - 'ObjEnv', - 'DecEnv', - 'Buffer', - 'Pointer', - 'Thread', -] -class2num = {} -for i,v in enumerate(class_names): - class2num[v] = i - -# Map class name to a class number. -def class_to_number(x): - return class2num[x] - -# Generate bit-packed RAM string init data. -def gen_ramstr_initdata_bitpacked(meta): - be = dukutil.BitEncoder() - - # Strings are encoded as follows: a string begins in lowercase - # mode and recognizes the following 5-bit symbols: - # - # 0-25 'a' ... 'z' - # 26 '_' - # 27 0x00 (actually decoded to 0xff, internal marker) - # 28 reserved - # 29 switch to uppercase for one character - # (next 5-bit symbol must be in range 0-25) - # 30 switch to uppercase - # 31 read a 7-bit character verbatim - # - # Uppercase mode is the same except codes 29 and 30 switch to - # lowercase. - - UNDERSCORE = 26 - ZERO = 27 - SWITCH1 = 29 - SWITCH = 30 - SEVENBIT = 31 - - maxlen = 0 - n_optimal = 0 - n_switch1 = 0 - n_switch = 0 - n_sevenbit = 0 - - for s_obj in meta['strings_stridx']: - s = s_obj['str'] - - be.bits(len(s), 5) - - if len(s) > maxlen: - maxlen = len(s) - - # 5-bit character, mode specific - mode = 'lowercase' - - for idx, c in enumerate(s): - # This encoder is not that optimal, but good enough for now. - - islower = (ord(c) >= ord('a') and ord(c) <= ord('z')) - isupper = (ord(c) >= ord('A') and ord(c) <= ord('Z')) - islast = (idx == len(s) - 1) - isnextlower = False - isnextupper = False - if not islast: - c2 = s[idx+1] - isnextlower = (ord(c2) >= ord('a') and ord(c2) <= ord('z')) - isnextupper = (ord(c2) >= ord('A') and ord(c2) <= ord('Z')) - - if c == '_': - be.bits(UNDERSCORE, 5) - n_optimal += 1 - elif c == '\xff': - # A 0xff prefix (never part of valid UTF-8) is used for internal properties. - # It is encoded as 0x00 in generated init data for technical reasons: it - # keeps lookup table elements 7 bits instead of 8 bits. - be.bits(ZERO, 5) - n_optimal += 1 - elif islower and mode == 'lowercase': - be.bits(ord(c) - ord('a'), 5) - n_optimal += 1 - elif isupper and mode == 'uppercase': - be.bits(ord(c) - ord('A'), 5) - n_optimal += 1 - elif islower and mode == 'uppercase': - if isnextlower: - be.bits(SWITCH, 5) - be.bits(ord(c) - ord('a'), 5) - mode = 'lowercase' - n_switch += 1 - else: - be.bits(SWITCH1, 5) - be.bits(ord(c) - ord('a'), 5) - n_switch1 += 1 - elif isupper and mode == 'lowercase': - if isnextupper: - be.bits(SWITCH, 5) - be.bits(ord(c) - ord('A'), 5) - mode = 'uppercase' - n_switch += 1 - else: - be.bits(SWITCH1, 5) - be.bits(ord(c) - ord('A'), 5) - n_switch1 += 1 - else: - assert(ord(c) >= 0 and ord(c) <= 127) - be.bits(SEVENBIT, 5) - be.bits(ord(c), 7) - n_sevenbit += 1 - #print('sevenbit for: %r' % c) - - # end marker not necessary, C code knows length from define - - res = be.getByteString() - - print('%d ram strings, %d bytes of string init data, %d maximum string length, ' + \ - 'encoding: optimal=%d,switch1=%d,switch=%d,sevenbit=%d') % \ - (len(meta['strings_stridx']), len(res), maxlen, \ - n_optimal, n_switch1, n_switch, n_sevenbit) - - return res, maxlen - -# Functions to emit string-related source/header parts. - -def emit_ramstr_source_strinit_data(genc, strdata): - genc.emitArray(strdata, 'duk_strings_data', visibility='DUK_INTERNAL', typename='duk_uint8_t', intvalues=True, const=True, size=len(strdata)) - -def emit_ramstr_header_strinit_defines(genc, meta, strdata, strmaxlen): - genc.emitLine('#if !defined(DUK_SINGLE_FILE)') - genc.emitLine('DUK_INTERNAL_DECL const duk_uint8_t duk_strings_data[%d];' % len(strdata)) - genc.emitLine('#endif /* !DUK_SINGLE_FILE */') - genc.emitDefine('DUK_STRDATA_MAX_STRLEN', strmaxlen) - genc.emitDefine('DUK_STRDATA_DATA_LENGTH', len(strdata)) - -# This is used for both RAM and ROM strings. -def emit_header_stridx_defines(genc, meta): - strlist = meta['strings_stridx'] - - for idx,s in enumerate(strlist): - genc.emitDefine(s['define'], idx, repr(s['str'])) - defname = s['define'].replace('_STRIDX','_HEAP_STRING') - genc.emitDefine(defname + '(heap)', 'DUK_HEAP_GET_STRING((heap),%s)' % s['define']) - defname = s['define'].replace('_STRIDX', '_HTHREAD_STRING') - genc.emitDefine(defname + '(thr)', 'DUK_HTHREAD_GET_STRING((thr),%s)' % s['define']) - - idx_start_reserved = None - idx_start_strict_reserved = None - for idx,s in enumerate(strlist): - if idx_start_reserved is None and s.get('reserved_word', False): - idx_start_reserved = idx - if idx_start_strict_reserved is None and s.get('future_reserved_word_strict', False): - idx_start_strict_reserved = idx - assert(idx_start_reserved is not None) - assert(idx_start_strict_reserved is not None) - - genc.emitLine('') - genc.emitDefine('DUK_HEAP_NUM_STRINGS', len(strlist)) - genc.emitDefine('DUK_STRIDX_START_RESERVED', idx_start_reserved) - genc.emitDefine('DUK_STRIDX_START_STRICT_RESERVED', idx_start_strict_reserved) - genc.emitDefine('DUK_STRIDX_END_RESERVED', len(strlist), comment='exclusive endpoint') - genc.emitLine('') - genc.emitLine('/* To convert a heap stridx to a token number, subtract') - genc.emitLine(' * DUK_STRIDX_START_RESERVED and add DUK_TOK_START_RESERVED.') - genc.emitLine(' */') - -# Encode property flags for RAM initializers. -def encode_property_flags(flags): - # Note: must match duk_hobject.h - - res = 0 - nflags = 0 - if 'w' in flags: - nflags += 1 - res = res | PROPDESC_FLAG_WRITABLE - if 'e' in flags: - nflags += 1 - res = res | PROPDESC_FLAG_ENUMERABLE - if 'c' in flags: - nflags += 1 - res = res | PROPDESC_FLAG_CONFIGURABLE - if 'a' in flags: - nflags += 1 - res = res | PROPDESC_FLAG_ACCESSOR - - if nflags != len(flags): - raise Exception('unsupported flags: %s' % repr(flags)) - - return res - -# Generate RAM object initdata for an object (but not its properties). -def gen_ramobj_initdata_for_object(meta, be, bi, string_to_stridx, natfunc_name_to_natidx, objid_to_bidx): - def _stridx(strval): - stridx = string_to_stridx[strval] - be.bits(stridx, STRIDX_BITS) - def _stridx_or_string(strval): - # XXX: could share the built-in strings decoder, would save ~200 bytes. - stridx = string_to_stridx.get(strval) - if stridx is not None: - be.bits(0, 1) # marker: stridx - be.bits(stridx, STRIDX_BITS) - else: - be.bits(1, 1) # marker: raw bytes - be.bits(len(strval), STRING_LENGTH_BITS) - for i in xrange(len(strval)): - be.bits(ord(strval[i]), STRING_CHAR_BITS) - def _natidx(native_name): - natidx = natfunc_name_to_natidx[native_name] - be.bits(natidx, NATIDX_BITS) - - class_num = class_to_number(bi['class']) - be.bits(class_num, CLASS_BITS) - - props = [x for x in bi['properties']] # clone - - prop_proto = steal_prop(props, 'prototype') - prop_constr = steal_prop(props, 'constructor') - prop_name = steal_prop(props, 'name') - prop_length = steal_prop(props, 'length') - - length = -1 # default value -1 signifies varargs - if prop_length is not None: - assert(isinstance(prop_length['value'], int)) - length = prop_length['value'] - be.bits(1, 1) # flag: have length - be.bits(length, LENGTH_PROP_BITS) - else: - be.bits(0, 1) # flag: no length - - # The attributes for 'length' are standard ("none") except for - # Array.prototype.length which must be writable (this is handled - # separately in duk_hthread_builtins.c). - - len_attrs = LENGTH_PROPERTY_ATTRIBUTES - if prop_length is not None: - len_attrs = prop_length['attributes'] - - if len_attrs != LENGTH_PROPERTY_ATTRIBUTES: - # Attributes are assumed to be the same, except for Array.prototype. - if bi['class'] != 'Array': # Array.prototype is the only one with this class - raise Exception('non-default length attribute for unexpected object') - - # For 'Function' classed objects, emit the native function stuff. - # Unfortunately this is more or less a copy of what we do for - # function properties now. This should be addressed if a rework - # on the init format is done. - - if bi['class'] == 'Function': - _natidx(bi['native']) - - if bi.get('varargs', False): - be.bits(1, 1) # flag: non-default nargs - be.bits(NARGS_VARARGS_MARKER, NARGS_BITS) - elif bi.has_key('nargs') and bi['nargs'] != length: - be.bits(1, 1) # flag: non-default nargs - be.bits(bi['nargs'], NARGS_BITS) - else: - assert(length is not None) - be.bits(0, 1) # flag: default nargs OK - - # All Function-classed global level objects are callable - # (have [[Call]]) but not all are constructable (have - # [[Construct]]). Flag that. - - assert(bi.has_key('callable')) - assert(bi['callable'] == True) - - assert(prop_name is not None) - assert(isinstance(prop_name['value'], str)) - _stridx_or_string(prop_name['value']) - - if bi.get('constructable', False): - be.bits(1, 1) # flag: constructable - else: - be.bits(0, 1) # flag: not constructable - - # Convert signed magic to 16-bit unsigned for encoding - magic = resolve_magic(bi.get('magic'), objid_to_bidx) & 0xffff - if magic != 0: - assert(magic >= 0) - assert(magic < (1 << MAGIC_BITS)) - be.bits(1, 1) - be.bits(magic, MAGIC_BITS) - else: - be.bits(0, 1) - -# Generate RAM object initdata for an object's properties. -def gen_ramobj_initdata_for_props(meta, be, bi, string_to_stridx, natfunc_name_to_natidx, objid_to_bidx, double_byte_order): - count_normal_props = 0 - count_function_props = 0 - - def _bidx(bi_id): - if bi_id is None: - be.bits(NO_BIDX_MARKER, BIDX_BITS) - else: - be.bits(objid_to_bidx[bi_id], BIDX_BITS) - def _stridx(strval): - stridx = string_to_stridx[strval] - be.bits(stridx, STRIDX_BITS) - def _stridx_or_string(strval): - # XXX: could share the built-in strings decoder, would save ~200 bytes. - stridx = string_to_stridx.get(strval) - if stridx is not None: - be.bits(0, 1) # marker: stridx - be.bits(stridx, STRIDX_BITS) - else: - be.bits(1, 1) # marker: raw bytes - be.bits(len(strval), STRING_LENGTH_BITS) - for i in xrange(len(strval)): - be.bits(ord(strval[i]), STRING_CHAR_BITS) - def _natidx(native_name): - natidx = natfunc_name_to_natidx[native_name] - be.bits(natidx, NATIDX_BITS) - - props = [x for x in bi['properties']] # clone - - # internal prototype: not an actual property so not in property list - if bi.has_key('internal_prototype'): - _bidx(bi['internal_prototype']) - else: - _bidx(None) - - # external prototype: encoded specially, steal from property list - prop_proto = steal_prop(props, 'prototype') - if prop_proto is not None: - assert(prop_proto['value']['type'] == 'object') - assert(prop_proto['attributes'] == '') - _bidx(prop_proto['value']['id']) - else: - _bidx(None) - - # external constructor: encoded specially, steal from property list - prop_constr = steal_prop(props, 'constructor') - if prop_constr is not None: - assert(prop_constr['value']['type'] == 'object') - assert(prop_constr['attributes'] == 'wc') - _bidx(prop_constr['value']['id']) - else: - _bidx(None) - - # name: encoded specially for function objects, so steal and ignore here - if bi['class'] == 'Function': - prop_name = steal_prop(props, 'name') - assert(prop_name is not None) - assert(isinstance(prop_name['value'], str)) - # Function.prototype.name has special handling in duk_hthread_builtins.c - assert((bi['id'] != 'bi_function_prototype' and prop_name['attributes'] == '') or \ - (bi['id'] == 'bi_function_prototype' and prop_name['attributes'] == 'w')) - - # length: encoded specially, so steal and ignore - prop_proto = steal_prop(props, 'length') - - # Date.prototype.toGMTString needs special handling and is handled - # directly in duk_hthread_builtins.c; so steal and ignore here. - if bi['id'] == 'bi_date_prototype': - prop_togmtstring = steal_prop(props, 'toGMTString') - assert(prop_togmtstring is not None) - #print('Stole Date.prototype.toGMTString') - - # Split properties into non-builtin functions and other properties. - # This split is a bit arbitrary, but is used to reduce flag bits in - # the bit stream. - values = [] - functions = [] - for prop in props: - if isinstance(prop['value'], dict) and \ - prop['value']['type'] == 'object' and \ - metadata_lookup_object(meta, prop['value']['id']).has_key('native') and \ - not metadata_lookup_object(meta, prop['value']['id']).has_key('bidx'): - functions.append(prop) - else: - values.append(prop) - - be.bits(len(values), NUM_NORMAL_PROPS_BITS) - - for valspec in values: - count_normal_props += 1 - - val = valspec['value'] - - _stridx_or_string(valspec['key']) - - # Attribute check doesn't check for accessor flag; that is now - # automatically set by C code when value is an accessor type. - # Accessors must not have 'writable', so they'll always have - # non-default attributes (less footprint than adding a different - # default). - default_attrs = DEFAULT_DATA_PROPERTY_ATTRIBUTES - - attrs = valspec.get('attributes', default_attrs) - attrs = attrs.replace('a', '') # ram bitstream doesn't encode 'accessor' attribute - if attrs != default_attrs: - #print('non-default attributes: %s -> %r (default %r)' % (valspec['key'], attrs, default_attrs)) - be.bits(1, 1) # flag: have custom attributes - be.bits(encode_property_flags(attrs), PROP_FLAGS_BITS) - else: - be.bits(0, 1) # flag: no custom attributes - - if val is None: - print('WARNING: RAM init data format doesn\'t support "null" now, value replaced with "undefined": %r' % valspec) - #raise Exception('RAM init format doesn\'t support a "null" value now') - be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) - elif isinstance(val, bool): - if val == True: - be.bits(PROP_TYPE_BOOLEAN_TRUE, PROP_TYPE_BITS) - else: - be.bits(PROP_TYPE_BOOLEAN_FALSE, PROP_TYPE_BITS) - elif isinstance(val, (float, int)) or isinstance(val, dict) and val['type'] == 'double': - # Avoid converting a manually specified NaN temporarily into - # a float to avoid risk of e.g. NaN being replaced by another. - if isinstance(val, dict): - val = val['bytes'].decode('hex') - assert(len(val) == 8) - else: - val = struct.pack('>d', float(val)) - - be.bits(PROP_TYPE_DOUBLE, PROP_TYPE_BITS) - - # encoding of double must match target architecture byte order - indexlist = { - 'big': [ 0, 1, 2, 3, 4, 5, 6, 7 ], - 'little': [ 7, 6, 5, 4, 3, 2, 1, 0 ], - 'mixed': [ 3, 2, 1, 0, 7, 6, 5, 4 ] # some arm platforms - }[double_byte_order] - - data = ''.join([ val[indexlist[idx]] for idx in xrange(8) ]) - - #print('DOUBLE: %s -> %s' % (val.encode('hex'), data.encode('hex'))) - - if len(data) != 8: - raise Exception('internal error') - be.string(data) - elif isinstance(val, str) or isinstance(val, unicode): - if isinstance(val, unicode): - # Note: non-ASCII characters will not currently work, - # because bits/char is too low. - val = val.encode('utf-8') - - if string_to_stridx.has_key(val): - # String value is in built-in string table -> encode - # using a string index. This saves some space, - # especially for the 'name' property of errors - # ('EvalError' etc). - - be.bits(PROP_TYPE_STRIDX, PROP_TYPE_BITS) - _stridx(val) - else: - # Not in string table -> encode as raw 7-bit value - - be.bits(PROP_TYPE_STRING, PROP_TYPE_BITS) - be.bits(len(val), STRING_LENGTH_BITS) - for i in xrange(len(val)): - be.bits(ord(val[i]), STRING_CHAR_BITS) - elif isinstance(val, dict): - if val['type'] == 'object': - be.bits(PROP_TYPE_BUILTIN, PROP_TYPE_BITS) - _bidx(val['id']) - elif val['type'] == 'undefined': - be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) - elif val['type'] == 'accessor': - be.bits(PROP_TYPE_ACCESSOR, PROP_TYPE_BITS) - getter_fn = metadata_lookup_object(meta, val['getter_id']) - setter_fn = metadata_lookup_object(meta, val['setter_id']) - _natidx(getter_fn['native']) - _natidx(setter_fn['native']) - assert(getter_fn['nargs'] == 0) - assert(setter_fn['nargs'] == 1) - assert(getter_fn['magic'] == 0) - assert(setter_fn['magic'] == 0) - elif val['type'] == 'lightfunc': - print('WARNING: RAM init data format doesn\'t support "lightfunc" now, value replaced with "undefined": %r' % valspec) - be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) - else: - raise Exception('unsupported value: %s' % repr(val)) - else: - raise Exception('unsupported value: %s' % repr(val)) - - be.bits(len(functions), NUM_FUNC_PROPS_BITS) - - for funprop in functions: - count_function_props += 1 - - funobj = metadata_lookup_object(meta, funprop['value']['id']) - prop_len = metadata_lookup_property(funobj, 'length') - assert(prop_len is not None) - assert(isinstance(prop_len['value'], (int))) - length = prop_len['value'] - - _stridx_or_string(funprop['key']) - _natidx(funobj['native']) - be.bits(length, LENGTH_PROP_BITS) - - if funobj.get('varargs', False): - be.bits(1, 1) # flag: non-default nargs - be.bits(NARGS_VARARGS_MARKER, NARGS_BITS) - elif funobj.has_key('nargs') and funobj['nargs'] != length: - be.bits(1, 1) # flag: non-default nargs - be.bits(funobj['nargs'], NARGS_BITS) - else: - be.bits(0, 1) # flag: default nargs OK - - # XXX: make this check conditional to minimize bit count - # (there are quite a lot of function properties) - # Convert signed magic to 16-bit unsigned for encoding - magic = resolve_magic(funobj.get('magic'), objid_to_bidx) & 0xffff - if magic != 0: - assert(magic >= 0) - assert(magic < (1 << MAGIC_BITS)) - be.bits(1, 1) - be.bits(magic, MAGIC_BITS) - else: - be.bits(0, 1) - - return count_normal_props, count_function_props - -# Get helper maps for RAM objects. -def get_ramobj_native_func_maps(meta): - # Native function list and index - native_funcs_found = {} - native_funcs = [] - natfunc_name_to_natidx = {} - - for o in meta['objects']: - if o.has_key('native'): - native_funcs_found[o['native']] = True - for v in o['properties']: - val = v['value'] - if isinstance(val, dict): - if val['type'] == 'accessor': - getter = metadata_lookup_object(meta, val['getter_id']) - native_funcs_found[getter['native']] = True - setter = metadata_lookup_object(meta, val['setter_id']) - native_funcs_found[setter['native']] = True - if val['type'] == 'object': - target = metadata_lookup_object(meta, val['id']) - if target.has_key('native'): - native_funcs_found[target['native']] = True - if val['type'] == 'lightfunc': - # No lightfunc support for RAM initializer now. - pass - - for idx,k in enumerate(sorted(native_funcs_found.keys())): - native_funcs.append(k) # native func names - natfunc_name_to_natidx[k] = idx - - return native_funcs, natfunc_name_to_natidx - -# Generate bit-packed RAM object init data. -def gen_ramobj_initdata_bitpacked(meta, native_funcs, natfunc_name_to_natidx, double_byte_order): - # RAM initialization is based on a specially filtered list of top - # level objects which includes objects with 'bidx' and objects - # which aren't handled as inline values in the init bitstream. - objlist = meta['objects_ram_toplevel'] - objid_to_idx = meta['_objid_to_ramidx'] - objid_to_object = meta['_objid_to_object'] # This index is valid even for filtered object list - string_index = meta['_plain_to_stridx'] - - # Generate bitstream - be = dukutil.BitEncoder() - count_builtins = 0 - count_normal_props = 0 - count_function_props = 0 - for o in objlist: - count_builtins += 1 - gen_ramobj_initdata_for_object(meta, be, o, string_index, natfunc_name_to_natidx, objid_to_idx) - for o in objlist: - count_obj_normal, count_obj_func = gen_ramobj_initdata_for_props(meta, be, o, string_index, natfunc_name_to_natidx, objid_to_idx, double_byte_order) - count_normal_props += count_obj_normal - count_function_props += count_obj_func - - romobj_init_data = be.getByteString() - #print(repr(romobj_init_data)) - #print(len(romobj_init_data)) - - print('%d ram builtins, %d normal properties, %d function properties, %d bytes of object init data' % \ - (count_builtins, count_normal_props, count_function_props, len(romobj_init_data))) - - return romobj_init_data - -# Functions to emit object-related source/header parts. - -def emit_ramobj_source_nativefunc_array(genc, native_func_list): - genc.emitLine('/* native functions: %d */' % len(native_func_list)) - genc.emitLine('DUK_INTERNAL const duk_c_function duk_bi_native_functions[%d] = {' % len(native_func_list)) - for i in native_func_list: - # The function pointer cast here makes BCC complain about - # "initializer too complicated", so omit the cast. - #genc.emitLine('\t(duk_c_function) %s,' % i) - genc.emitLine('\t%s,' % i) - genc.emitLine('};') - -def emit_ramobj_source_objinit_data(genc, init_data): - genc.emitArray(init_data, 'duk_builtins_data', visibility='DUK_INTERNAL', typename='duk_uint8_t', intvalues=True, const=True, size=len(init_data)) - -def emit_ramobj_header_nativefunc_array(genc, native_func_list): - genc.emitLine('#if !defined(DUK_SINGLE_FILE)') - genc.emitLine('DUK_INTERNAL_DECL const duk_c_function duk_bi_native_functions[%d];' % len(native_func_list)) - genc.emitLine('#endif /* !DUK_SINGLE_FILE */') - -def emit_ramobj_header_objects(genc, meta): - objlist = meta['objects_bidx'] - for idx,o in enumerate(objlist): - defname = 'DUK_BIDX_' + '_'.join(o['id'].upper().split('_')[1:]) # bi_foo_bar -> FOO_BAR - genc.emitDefine(defname, idx) - genc.emitDefine('DUK_NUM_BUILTINS', len(objlist)) - genc.emitDefine('DUK_NUM_BIDX_BUILTINS', len(objlist)) # Objects with 'bidx' - genc.emitDefine('DUK_NUM_ALL_BUILTINS', len(meta['objects_ram_toplevel'])) # Objects with 'bidx' + temps needed in init - -def emit_ramobj_header_initdata(genc, init_data): - genc.emitLine('#if !defined(DUK_SINGLE_FILE)') - genc.emitLine('DUK_INTERNAL_DECL const duk_uint8_t duk_builtins_data[%d];' % len(init_data)) - genc.emitLine('#endif /* !DUK_SINGLE_FILE */') - genc.emitDefine('DUK_BUILTINS_DATA_LENGTH', len(init_data)) - -# -# ROM init data -# -# Compile-time initializers for ROM strings and ROM objects. This involves -# a lot of small details: -# -# - Several variants are needed for different options: unpacked vs. -# packed duk_tval, endianness, string hash in use, etc). -# -# - Static initializers must represent objects of different size. For -# example, separate structs are needed for property tables of different -# size or value typing. -# -# - Union initializers cannot be used portable because they're only -# available in C99 and above. -# -# - Initializers must use 'const' correctly to ensure that the entire -# initialization data will go into ROM (read-only data section). -# Const pointers etc will need to be cast into non-const pointers at -# some point to properly mix with non-const RAM pointers, so a portable -# const losing cast is needed. -# -# - C++ doesn't allow forward declaration of "static const" structures -# which is problematic because there are cyclical const structures. -# - -# Get string hash initializers; need to compute possible string hash variants -# which will match runtime values. -def rom_get_strhash16_macro(val): - hash16le = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=False, strhash16=True) - hash16be = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=True, strhash16=True) - hash16sparse = dukutil.duk_heap_hashstring_sparse(val, DUK__FIXED_HASH_SEED, strhash16=True) - return 'DUK__STRHASH16(%dU,%dU,%dU)' % (hash16le, hash16be, hash16sparse) -def rom_get_strhash32_macro(val): - hash32le = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=False, strhash16=False) - hash32be = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=True, strhash16=False) - hash32sparse = dukutil.duk_heap_hashstring_sparse(val, DUK__FIXED_HASH_SEED, strhash16=False) - return 'DUK__STRHASH32(%dUL,%dUL,%dUL)' % (hash32le, hash32be, hash32sparse) - -# Get string character .length; must match runtime .length computation. -def rom_charlen(x): - return dukutil.duk_unicode_unvalidated_utf8_length(x) - -# Get an initializer type and initializer literal for a specified value -# (expressed in YAML metadata format). The types and initializers depend -# on declarations emitted before the initializers, and in several cases -# use a macro to hide the selection between several initializer variants. -def rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map): - def double_bytes_initializer(val): - # Portable and exact float initializer. - assert(isinstance(val, str) and len(val) == 16) # hex encoded bytes - val = val.decode('hex') - tmp = [] - for i in xrange(8): - t = ord(val[i]) - if t >= 128: - tmp.append('%dU' % t) - else: - tmp.append('%d' % t) - return 'DUK__DBLBYTES(' + ','.join(tmp) + ')' - - def tval_number_initializer(val): - return 'DUK__TVAL_NUMBER(%s)' % double_bytes_initializer(val) - - v = val['value'] - if v is None: - init_type = 'duk_rom_tval_null' - init_lit = 'DUK__TVAL_NULL()' - elif isinstance(v, (bool)): - init_type = 'duk_rom_tval_boolean' - bval = 0 - if v: - bval = 1 - init_lit = 'DUK__TVAL_BOOLEAN(%d)' % bval - elif isinstance(v, (int, float)): - fval = struct.pack('>d', float(v)).encode('hex') - init_type = 'duk_rom_tval_number' - init_lit = tval_number_initializer(fval) - elif isinstance(v, (str, unicode)): - init_type = 'duk_rom_tval_string' - init_lit = 'DUK__TVAL_STRING(&%s)' % bi_str_map[v] - elif isinstance(v, (dict)): - if v['type'] == 'double': - init_type = 'duk_rom_tval_number' - init_lit = tval_number_initializer(v['bytes']) - elif v['type'] == 'undefined': - init_type = 'duk_rom_tval_undefined' - init_lit = 'DUK__TVAL_UNDEFINED()' - elif v['type'] == 'null': - init_type = 'duk_rom_tval_null' - init_lit = 'DUK__TVAL_UNDEFINED()' - elif v['type'] == 'object': - init_type = 'duk_rom_tval_object' - init_lit = 'DUK__TVAL_OBJECT(&%s)' % bi_obj_map[v['id']] - elif v['type'] == 'accessor': - getter_object = metadata_lookup_object(meta, v['getter_id']) - setter_object = metadata_lookup_object(meta, v['setter_id']) - init_type = 'duk_rom_tval_accessor' - init_lit = 'DUK__TVAL_ACCESSOR(&%s, &%s)' % (bi_obj_map[getter_object['id']], bi_obj_map[setter_object['id']]) - - elif v['type'] == 'lightfunc': - # Match DUK_LFUNC_FLAGS_PACK() in duk_tval.h. - if v.has_key('length'): - assert(v['length'] >= 0 and v['length'] <= 15) - lf_length = v['length'] - else: - lf_length = 0 - if v.get('varargs', True): - lf_nargs = 15 # varargs marker - else: - assert(v['nargs'] >= 0 and v['nargs'] <= 14) - lf_nargs = v['nargs'] - if v.has_key('magic'): - assert(v['magic'] >= -0x80 and v['magic'] <= 0x7f) - lf_magic = v['magic'] & 0xff - else: - lf_magic = 0 - lf_flags = (lf_magic << 8) + (lf_length << 4) + lf_nargs - init_type = 'duk_rom_tval_lightfunc' - init_lit = 'DUK__TVAL_LIGHTFUNC(%s, %dL)' % (v['native'], lf_flags) - else: - raise Exception('unhandled value: %r' % val) - else: - raise Exception('internal error: %r' % val) - return init_type, init_lit - -# Helpers to get either initializer type or value only (not both). -def rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map): - init_type, init_lit = rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map) - return init_type -def rom_get_value_initializer_literal(meta, val, bi_str_map, bi_obj_map): - init_type, init_lit = rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map) - return init_lit - -# Emit ROM strings source: structs/typedefs and their initializers. -# Separate initialization structs are needed for strings of different -# length. -def rom_emit_strings_source(genc, meta): - # Write built-in strings as code section initializers. - - strs = meta['_strings_plain'] # all strings, plain versions - reserved_words = meta['_is_plain_reserved_word'] - strict_reserved_words = meta['_is_plain_strict_reserved_word'] - strs_needing_stridx = meta['strings_stridx'] - - # Sort used lengths and declare per-length initializers. - lens = [] - for v in strs: - strlen = len(v) - if strlen not in lens: - lens.append(strlen) - lens.sort() - for strlen in lens: - genc.emitLine('typedef struct duk_romstr_%d duk_romstr_%d; ' % (strlen, strlen) + - 'struct duk_romstr_%d { duk_hstring hdr; duk_uint8_t data[%d]; };' % (strlen, strlen + 1)) - genc.emitLine('') - - # String hash values depend on endianness and other factors, - # use an initializer macro to select the appropriate hash. - genc.emitLine('/* When unaligned access possible, 32-bit values are fetched using host order.') - genc.emitLine(' * When unaligned access not possible, always simulate little endian order.') - genc.emitLine(' * See: src/duk_util_hashbytes.c:duk_util_hashbytes().') - genc.emitLine(' */') - genc.emitLine('#if defined(DUK_USE_STRHASH_DENSE)') - genc.emitLine('#if defined(DUK_USE_HASHBYTES_UNALIGNED_U32_ACCESS)') # XXX: config option to be reworked - genc.emitLine('#if defined(DUK_USE_INTEGER_BE)') - genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16be)') - genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32be)') - genc.emitLine('#else') - genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16le)') - genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32le)') - genc.emitLine('#endif') - genc.emitLine('#else') - genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16le)') - genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32le)') - genc.emitLine('#endif') - genc.emitLine('#else /* DUK_USE_STRHASH_DENSE */') - genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16sparse)') - genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32sparse)') - genc.emitLine('#endif /* DUK_USE_STRHASH_DENSE */') - - # String header initializer macro, takes into account lowmem etc. - genc.emitLine('#if defined(DUK_USE_HEAPPTR16)') - genc.emitLine('#if !defined(DUK_USE_REFCOUNT16)') - genc.emitLine('#error currently assumes DUK_USE_HEAPPTR16 and DUK_USE_REFCOUNT16 are both defined') - genc.emitLine('#endif') - genc.emitLine('#if defined(DUK_USE_HSTRING_CLEN)') - genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') - genc.emitLine('\t{ { (heaphdr_flags) | ((hash16) << 16), (refcount), (blen) }, (clen) }') - genc.emitLine('#else /* DUK_USE_HSTRING_CLEN */') - genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') - genc.emitLine('\t{ { (heaphdr_flags) | ((hash16) << 16), (refcount), (blen) } }') - genc.emitLine('#endif /* DUK_USE_HSTRING_CLEN */') - genc.emitLine('#else /* DUK_USE_HEAPPTR16 */') - genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') - genc.emitLine('\t{ { (heaphdr_flags), (refcount) }, (hash32), (blen), (clen) }') - genc.emitLine('#endif /* DUK_USE_HEAPPTR16 */') - - # Emit string initializers. - genc.emitLine('') - bi_str_map = {} # string -> initializer variable name - for str_index,v in enumerate(strs): - bi_str_map[v] = 'duk_str_%d' % str_index - - tmp = 'DUK_INTERNAL const duk_romstr_%d duk_str_%d = {' % (len(v), str_index) - flags = [ 'DUK_HTYPE_STRING', 'DUK_HEAPHDR_FLAG_READONLY' ] - is_arridx = string_is_arridx(v) - - blen = len(v) - clen = rom_charlen(v) - - if blen == clen: - flags.append('DUK_HSTRING_FLAG_ASCII') - if is_arridx: - #print('%r is arridx' % v) - flags.append('DUK_HSTRING_FLAG_ARRIDX') - if len(v) >= 1 and v[0] == '\xff': - flags.append('DUK_HSTRING_FLAG_INTERNAL') - if v in [ 'eval', 'arguments' ]: - flags.append('DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS') - if reserved_words.has_key(v): - flags.append('DUK_HSTRING_FLAG_RESERVED_WORD') - if strict_reserved_words.has_key(v): - flags.append('DUK_HSTRING_FLAG_STRICT_RESERVED_WORD') - - tmp += 'DUK__STRINIT(%s,%d,%s,%s,%d,%d),' % \ - ('|'.join(flags), 1, rom_get_strhash32_macro(v), \ - rom_get_strhash16_macro(v), blen, clen) - - tmpbytes = [] - for c in v: - if ord(c) < 128: - tmpbytes.append('%d' % ord(c)) - else: - tmpbytes.append('%dU' % ord(c)) - tmpbytes.append('%d' % 0) # NUL term - tmp += '{' + ','.join(tmpbytes) + '}' - tmp += '};' - genc.emitLine(tmp) - - # Emit an array of ROM strings, used for string interning. - # - # XXX: String interning now simply walks through the list checking if - # an incoming string is present in ROM. It would be better to use - # binary search (or perhaps even a perfect hash) for this lookup. - # To support binary search we could emit the list in string hash - # order, but because there are multiple different hash variants - # there would need to be multiple lists. We could also order the - # strings based on the string data which is independent of the string - # hash and still possible to binary search relatively efficiently. - # - # cdecl> explain const int * const foo; - # declare foo as const pointer to const int - genc.emitLine('') - genc.emitLine('DUK_INTERNAL const duk_hstring * const duk_rom_strings[%d] = {'% len(strs)) - tmp = [] - linecount = 0 - for str_index,v in enumerate(strs): - if str_index > 0: - tmp.append(', ') - if linecount >= 6: - linecount = 0 - tmp.append('\n') - tmp.append('(const duk_hstring *) &duk_str_%d' % str_index) - linecount += 1 - for line in ''.join(tmp).split('\n'): - genc.emitLine(line) - genc.emitLine('};') - - # Emit an array of duk_hstring pointers indexed using DUK_STRIDX_xxx. - # This will back e.g. DUK_HTHREAD_STRING_XYZ(thr) directly, without - # needing an explicit array in thr/heap->strs[]. - # - # cdecl > explain const int * const foo; - # declare foo as const pointer to const int - genc.emitLine('') - genc.emitLine('DUK_INTERNAL const duk_hstring * const duk_rom_strings_stridx[%d] = {' % len(strs_needing_stridx)) - for s in strs_needing_stridx: - genc.emitLine('\t(const duk_hstring *) &%s,' % bi_str_map[s['str']]) # strs_needing_stridx is a list of objects, not plain strings - genc.emitLine('};') - - return bi_str_map - -# Emit ROM strings header. -def rom_emit_strings_header(genc, meta): - genc.emitLine('#if !defined(DUK_SINGLE_FILE)') # C++ static const workaround - genc.emitLine('DUK_INTERNAL_DECL const duk_hstring * const duk_rom_strings[%d];'% len(meta['strings'])) - genc.emitLine('DUK_INTERNAL_DECL const duk_hstring * const duk_rom_strings_stridx[%d];' % len(meta['strings_stridx'])) - genc.emitLine('#endif') - -# Emit ROM objects initialized types and macros. -def rom_emit_object_initializer_types_and_macros(genc): - # Objects and functions are straightforward because they just use the - # RAM structure which has no dynamic or variable size parts. - genc.emitLine('typedef struct duk_romobj duk_romobj; ' + \ - 'struct duk_romobj { duk_hobject hdr; };') - genc.emitLine('typedef struct duk_romarr duk_romarr; ' + \ - 'struct duk_romarr { duk_harray hdr; };') - genc.emitLine('typedef struct duk_romfun duk_romfun; ' + \ - 'struct duk_romfun { duk_hnatfunc hdr; };') - - # For ROM pointer compression we'd need a -compile time- variant. - # The current portable solution is to just assign running numbers - # to ROM compressed pointers, and provide the table for user pointer - # compression function. Much better solutions would be possible, - # but such solutions are often compiler/platform specific. - - # Emit object/function initializer which is aware of options affecting - # the header. Heap next/prev pointers are always NULL. - genc.emitLine('#if defined(DUK_USE_HEAPPTR16)') - genc.emitLine('#if !defined(DUK_USE_REFCOUNT16) || defined(DUK_USE_HOBJECT_HASH_PART)') - genc.emitLine('#error currently assumes DUK_USE_HEAPPTR16 and DUK_USE_REFCOUNT16 are both defined and DUK_USE_HOBJECT_HASH_PART is undefined') - genc.emitLine('#endif') - #genc.emitLine('#if !defined(DUK_USE_HEAPPTR_ENC16_STATIC)') - #genc.emitLine('#error need DUK_USE_HEAPPTR_ENC16_STATIC which provides compile-time pointer compression') - #genc.emitLine('#endif') - genc.emitLine('#define DUK__ROMOBJ_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize) \\') - genc.emitLine('\t{ { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) } }') - genc.emitLine('#define DUK__ROMARR_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,length) \\') - genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) }, (length), 0 /*length_nonwritable*/ } }') - genc.emitLine('#define DUK__ROMFUN_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,nativefunc,nargs,magic) \\') - genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) }, (nativefunc), (duk_int16_t) (nargs), (duk_int16_t) (magic) } }') - genc.emitLine('#else /* DUK_USE_HEAPPTR16 */') - genc.emitLine('#define DUK__ROMOBJ_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize) \\') - genc.emitLine('\t{ { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) } }') - genc.emitLine('#define DUK__ROMARR_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,length) \\') - genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) }, (length), 0 /*length_nonwritable*/ } }') - genc.emitLine('#define DUK__ROMFUN_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,nativefunc,nargs,magic) \\') - genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) }, (nativefunc), (duk_int16_t) (nargs), (duk_int16_t) (magic) } }') - genc.emitLine('#endif /* DUK_USE_HEAPPTR16 */') - - # Initializer typedef for a dummy function pointer. ROM support assumes - # function pointers are 32 bits. Using a dummy function pointer type - # avoids function pointer to normal pointer cast which emits warnings. - genc.emitLine('typedef void (*duk_rom_funcptr)(void);') - - # Emit duk_tval structs. This gets a bit messier with packed/unpacked - # duk_tval, endianness variants, pointer sizes, etc. - genc.emitLine('#if defined(DUK_USE_PACKED_TVAL)') - genc.emitLine('typedef struct duk_rom_tval_undefined duk_rom_tval_undefined;') - genc.emitLine('typedef struct duk_rom_tval_null duk_rom_tval_null;') - genc.emitLine('typedef struct duk_rom_tval_lightfunc duk_rom_tval_lightfunc;') - genc.emitLine('typedef struct duk_rom_tval_boolean duk_rom_tval_boolean;') - genc.emitLine('typedef struct duk_rom_tval_number duk_rom_tval_number;') - genc.emitLine('typedef struct duk_rom_tval_object duk_rom_tval_object;') - genc.emitLine('typedef struct duk_rom_tval_string duk_rom_tval_string;') - genc.emitLine('typedef struct duk_rom_tval_accessor duk_rom_tval_accessor;') - genc.emitLine('struct duk_rom_tval_number { duk_uint8_t bytes[8]; };') - genc.emitLine('struct duk_rom_tval_accessor { const duk_hobject *get; const duk_hobject *set; };') - genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') - genc.emitLine('struct duk_rom_tval_object { const void *ptr; duk_uint32_t hiword; };') - genc.emitLine('struct duk_rom_tval_string { const void *ptr; duk_uint32_t hiword; };') - genc.emitLine('struct duk_rom_tval_undefined { const void *ptr; duk_uint32_t hiword; };') - genc.emitLine('struct duk_rom_tval_null { const void *ptr; duk_uint32_t hiword; };') - genc.emitLine('struct duk_rom_tval_lightfunc { duk_rom_funcptr ptr; duk_uint32_t hiword; };') - genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t dummy; duk_uint32_t hiword; };') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') - genc.emitLine('struct duk_rom_tval_object { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_string { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_undefined { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_null { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_lightfunc { duk_uint32_t hiword; duk_rom_funcptr ptr; };') - genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t hiword; duk_uint32_t dummy; };') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') - genc.emitLine('struct duk_rom_tval_object { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_string { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_undefined { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_null { duk_uint32_t hiword; const void *ptr; };') - genc.emitLine('struct duk_rom_tval_lightfunc { duk_uint32_t hiword; duk_rom_funcptr ptr; };') - genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t hiword; duk_uint32_t dummy; };') - genc.emitLine('#else') - genc.emitLine('#error invalid endianness defines') - genc.emitLine('#endif') - genc.emitLine('#else /* DUK_USE_PACKED_TVAL */') - # Unpacked initializers are written assuming normal struct alignment - # rules so that sizeof(duk_tval) == 16. 32-bit pointers need special - # handling to ensure the individual initializers pad to 16 bytes as - # necessary. - # XXX: 32-bit unpacked duk_tval is not yet supported. - genc.emitLine('#if defined(DUK_UINTPTR_MAX)') - genc.emitLine('#if (DUK_UINTPTR_MAX <= 0xffffffffUL)') - genc.emitLine('#error ROM initializer with unpacked duk_tval does not currently work on 32-bit targets') - genc.emitLine('#endif') - genc.emitLine('#endif') - genc.emitLine('typedef struct duk_rom_tval_undefined duk_rom_tval_undefined;') - genc.emitLine('struct duk_rom_tval_undefined { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') - genc.emitLine('typedef struct duk_rom_tval_null duk_rom_tval_null;') - genc.emitLine('struct duk_rom_tval_null { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') - genc.emitLine('typedef struct duk_rom_tval_boolean duk_rom_tval_boolean;') - genc.emitLine('struct duk_rom_tval_boolean { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint32_t val; duk_uint32_t unused; };') - genc.emitLine('typedef struct duk_rom_tval_number duk_rom_tval_number;') - genc.emitLine('struct duk_rom_tval_number { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') - genc.emitLine('typedef struct duk_rom_tval_object duk_rom_tval_object;') - genc.emitLine('struct duk_rom_tval_object { duk_small_uint_t tag; duk_small_uint_t extra; const duk_heaphdr *val; };') - genc.emitLine('typedef struct duk_rom_tval_string duk_rom_tval_string;') - genc.emitLine('struct duk_rom_tval_string { duk_small_uint_t tag; duk_small_uint_t extra; const duk_heaphdr *val; };') - genc.emitLine('typedef struct duk_rom_tval_lightfunc duk_rom_tval_lightfunc;') - genc.emitLine('struct duk_rom_tval_lightfunc { duk_small_uint_t tag; duk_small_uint_t extra; duk_rom_funcptr ptr; };') - genc.emitLine('typedef struct duk_rom_tval_accessor duk_rom_tval_accessor;') - genc.emitLine('struct duk_rom_tval_accessor { const duk_hobject *get; const duk_hobject *set; };') - genc.emitLine('#endif /* DUK_USE_PACKED_TVAL */') - genc.emitLine('') - - # Double initializer byte shuffle macro to handle byte orders - # without duplicating the entire initializers. - genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') - genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (h), (g), (f), (e), (d), (c), (b), (a) }') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') - genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (a), (b), (c), (d), (e), (f), (g), (h) }') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') - genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (d), (c), (b), (a), (h), (g), (f), (e) }') - genc.emitLine('#else') - genc.emitLine('#error invalid endianness defines') - genc.emitLine('#endif') - genc.emitLine('') - - # Emit duk_tval initializer literal macros. - genc.emitLine('#if defined(DUK_USE_PACKED_TVAL)') - genc.emitLine('#define DUK__TVAL_NUMBER(hostbytes) { hostbytes }') # bytes already in host order - genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') - genc.emitLine('#define DUK__TVAL_UNDEFINED() { (const void *) NULL, (DUK_TAG_UNDEFINED << 16) }') - genc.emitLine('#define DUK__TVAL_NULL() { (const void *) NULL, (DUK_TAG_NULL << 16) }') - genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (duk_rom_funcptr) (func), (DUK_TAG_LIGHTFUNC << 16) + (flags) }') - genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { 0, (DUK_TAG_BOOLEAN << 16) + (bval) }') - genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (const void *) (ptr), (DUK_TAG_OBJECT << 16) }') - genc.emitLine('#define DUK__TVAL_STRING(ptr) { (const void *) (ptr), (DUK_TAG_STRING << 16) }') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') - genc.emitLine('#define DUK__TVAL_UNDEFINED() { (DUK_TAG_UNDEFINED << 16), (const void *) NULL }') - genc.emitLine('#define DUK__TVAL_NULL() { (DUK_TAG_NULL << 16), (const void *) NULL }') - genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (DUK_TAG_LIGHTFUNC << 16) + (flags), (duk_rom_funcptr) (func) }') - genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { (DUK_TAG_BOOLEAN << 16) + (bval), 0 }') - genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (DUK_TAG_OBJECT << 16), (const void *) (ptr) }') - genc.emitLine('#define DUK__TVAL_STRING(ptr) { (DUK_TAG_STRING << 16), (const void *) (ptr) }') - genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') - genc.emitLine('#define DUK__TVAL_UNDEFINED() { (DUK_TAG_UNDEFINED << 16), (const void *) NULL }') - genc.emitLine('#define DUK__TVAL_NULL() { (DUK_TAG_NULL << 16), (const void *) NULL }') - genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (DUK_TAG_LIGHTFUNC << 16) + (flags), (duk_rom_funcptr) (func) }') - genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { (DUK_TAG_BOOLEAN << 16) + (bval), 0 }') - genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (DUK_TAG_OBJECT << 16), (const void *) (ptr) }') - genc.emitLine('#define DUK__TVAL_STRING(ptr) { (DUK_TAG_STRING << 16), (const void *) (ptr) }') - genc.emitLine('#else') - genc.emitLine('#error invalid endianness defines') - genc.emitLine('#endif') - genc.emitLine('#else /* DUK_USE_PACKED_TVAL */') - genc.emitLine('#define DUK__TVAL_NUMBER(hostbytes) { DUK__TAG_NUMBER, 0, hostbytes }') # bytes already in host order - genc.emitLine('#define DUK__TVAL_UNDEFINED() { DUK_TAG_UNDEFINED, 0, {0,0,0,0,0,0,0,0} }') - genc.emitLine('#define DUK__TVAL_NULL() { DUK_TAG_NULL, 0, {0,0,0,0,0,0,0,0} }') - genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { DUK_TAG_BOOLEAN, 0, (bval), 0 }') - genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { DUK_TAG_OBJECT, 0, (const duk_heaphdr *) (ptr) }') - genc.emitLine('#define DUK__TVAL_STRING(ptr) { DUK_TAG_STRING, 0, (const duk_heaphdr *) (ptr) }') - genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { DUK_TAG_LIGHTFUNC, (flags), (duk_rom_funcptr) (func) }') - genc.emitLine('#endif /* DUK_USE_PACKED_TVAL */') - genc.emitLine('#define DUK__TVAL_ACCESSOR(getter,setter) { (const duk_hobject *) (getter), (const duk_hobject *) (setter) }') - -# Emit ROM objects source: the object/function headers themselves, property -# table structs for different property table sizes/types, and property table -# initializers. -def rom_emit_objects(genc, meta, bi_str_map): - objs = meta['objects'] - id_to_bidx = meta['_objid_to_bidx'] - - # Table for compressed ROM pointers; reserve high range of compressed pointer - # values for this purpose. This must contain all ROM pointers that might be - # referenced (all objects, strings, and property tables at least). - romptr_compress_list = [] - def compress_rom_ptr(x): - if x == 'NULL': - return 0 - try: - idx = romptr_compress_list.index(x) - res = ROMPTR_FIRST + idx - except ValueError: - romptr_compress_list.append(x) - res = ROMPTR_FIRST + len(romptr_compress_list) - 1 - assert(res <= 0xffff) - return res - - # Need string and object maps (id -> C symbol name) early. - bi_obj_map = {} # object id -> initializer variable name - for idx,obj in enumerate(objs): - bi_obj_map[obj['id']] = 'duk_obj_%d' % idx - - # Add built-in strings and objects to compressed ROM pointers first. - for k in sorted(bi_str_map.keys()): - compress_rom_ptr('&%s' % bi_str_map[k]) - for k in sorted(bi_obj_map.keys()): - compress_rom_ptr('&%s' % bi_obj_map[k]) - - # Property attributes lookup, map metadata attribute string into a - # C initializer. - attr_lookup = { - '': 'DUK_PROPDESC_FLAGS_NONE', - 'w': 'DUK_PROPDESC_FLAGS_W', - 'e': 'DUK_PROPDESC_FLAGS_E', - 'c': 'DUK_PROPDESC_FLAGS_C', - 'we': 'DUK_PROPDESC_FLAGS_WE', - 'wc': 'DUK_PROPDESC_FLAGS_WC', - 'ec': 'DUK_PROPDESC_FLAGS_EC', - 'wec': 'DUK_PROPDESC_FLAGS_WEC', - 'a': 'DUK_PROPDESC_FLAGS_NONE|DUK_PROPDESC_FLAG_ACCESSOR', - 'ea': 'DUK_PROPDESC_FLAGS_E|DUK_PROPDESC_FLAG_ACCESSOR', - 'ca': 'DUK_PROPDESC_FLAGS_C|DUK_PROPDESC_FLAG_ACCESSOR', - 'eca': 'DUK_PROPDESC_FLAGS_EC|DUK_PROPDESC_FLAG_ACCESSOR', - } - - # Emit property table structs. These are very complex because - # property count *and* individual property type affect the fields - # in the initializer, properties can be data properties or accessor - # properties or different duk_tval types. There are also several - # property table memory layouts, each with a different ordering of - # keys, values, etc. Union initializers would make things a bit - # easier but they're not very portable (being C99). - # - # The easy solution is to use a separate initializer type for each - # property type. Could also cache and reuse identical initializers - # but there'd be very few of them so it's more straightforward to - # not reuse the structs. - # - # NOTE: naming is a bit inconsistent here, duk_tval is used also - # to refer to property value initializers like a getter/setter pair. - - genc.emitLine('#if defined(DUK_USE_HOBJECT_LAYOUT_1)') - for idx,obj in enumerate(objs): - numprops = len(obj['properties']) - if numprops == 0: - continue - tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) - tmp += 'struct duk_romprops_%d { ' % idx - for idx,val in enumerate(obj['properties']): - tmp += 'const duk_hstring *key%d; ' % idx - for idx,val in enumerate(obj['properties']): - # XXX: fastint support - tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) - for idx,val in enumerate(obj['properties']): - tmp += 'duk_uint8_t flags%d; ' % idx - tmp += '};' - genc.emitLine(tmp) - genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_2)') - for idx,obj in enumerate(objs): - numprops = len(obj['properties']) - if numprops == 0: - continue - tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) - tmp += 'struct duk_romprops_%d { ' % idx - for idx,val in enumerate(obj['properties']): - # XXX: fastint support - tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) - for idx,val in enumerate(obj['properties']): - tmp += 'const duk_hstring *key%d; ' % idx - for idx,val in enumerate(obj['properties']): - tmp += 'duk_uint8_t flags%d; ' % idx - # Padding follows for flags, but we don't need to emit it - # (at the moment there is never an array or hash part). - tmp += '};' - genc.emitLine(tmp) - genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_3)') - for idx,obj in enumerate(objs): - numprops = len(obj['properties']) - if numprops == 0: - continue - tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) - tmp += 'struct duk_romprops_%d { ' % idx - for idx,val in enumerate(obj['properties']): - # XXX: fastint support - tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) - # No array values - for idx,val in enumerate(obj['properties']): - tmp += 'const duk_hstring *key%d; ' % idx - # No hash index - for idx,val in enumerate(obj['properties']): - tmp += 'duk_uint8_t flags%d; ' % idx - tmp += '};' - genc.emitLine(tmp) - genc.emitLine('#else') - genc.emitLine('#error invalid object layout') - genc.emitLine('#endif') - genc.emitLine('') - - # Forward declare all property tables so that objects can reference them. - # Also pointer compress them. - - for idx,obj in enumerate(objs): - numprops = len(obj['properties']) - if numprops == 0: - continue - - # We would like to use DUK_INTERNAL_DECL here, but that maps - # to "static const" in a single file build which has C++ - # portability issues: you can't forward declare a static const. - # We can't reorder the property tables to avoid this because - # there are cyclic references. So, as the current workaround, - # declare as external. - genc.emitLine('DUK_EXTERNAL_DECL const duk_romprops_%d duk_prop_%d;' % (idx, idx)) - - # Add property tables to ROM compressed pointers too. - compress_rom_ptr('&duk_prop_%d' % idx) - genc.emitLine('') - - # Forward declare all objects so that objects can reference them, - # e.g. internal prototype reference. - - for idx,obj in enumerate(objs): - # Careful with C++: must avoid redefining a non-extern const. - # See commentary above for duk_prop_%d forward declarations. - if obj.get('callable', False): - genc.emitLine('DUK_EXTERNAL_DECL const duk_romfun duk_obj_%d;' % idx) - elif obj.get('class') == 'Array': - genc.emitLine('DUK_EXTERNAL_DECL const duk_romarr duk_obj_%d;' % idx) - else: - genc.emitLine('DUK_EXTERNAL_DECL const duk_romobj duk_obj_%d;' % idx) - genc.emitLine('') - - # Define objects, reference property tables. Objects will be - # logically non-extensible so also leave their extensible flag - # cleared despite what metadata requests; the runtime code expects - # ROM objects to be non-extensible. - for idx,obj in enumerate(objs): - numprops = len(obj['properties']) - - isfunc = obj.get('callable', False) - - if isfunc: - tmp = 'DUK_EXTERNAL const duk_romfun duk_obj_%d = ' % idx - elif obj.get('class') == 'Array': - tmp = 'DUK_EXTERNAL const duk_romarr duk_obj_%d = ' % idx - else: - tmp = 'DUK_EXTERNAL const duk_romobj duk_obj_%d = ' % idx - - flags = [ 'DUK_HTYPE_OBJECT', 'DUK_HEAPHDR_FLAG_READONLY' ] - if isfunc: - flags.append('DUK_HOBJECT_FLAG_NATFUNC') - flags.append('DUK_HOBJECT_FLAG_STRICT') - flags.append('DUK_HOBJECT_FLAG_NEWENV') - if obj.get('constructable', False): - flags.append('DUK_HOBJECT_FLAG_CONSTRUCTABLE') - if obj.get('class') == 'Array': - flags.append('DUK_HOBJECT_FLAG_EXOTIC_ARRAY') - flags.append('DUK_HOBJECT_CLASS_AS_FLAGS(%d)' % class_to_number(obj['class'])) # XXX: use constant, not number - - refcount = 1 # refcount is faked to be always 1 - if numprops == 0: - props = 'NULL' - else: - props = '&duk_prop_%d' % idx - props_enc16 = compress_rom_ptr(props) - - if obj.has_key('internal_prototype'): - iproto = '&%s' % bi_obj_map[obj['internal_prototype']] - else: - iproto = 'NULL' - iproto_enc16 = compress_rom_ptr(iproto) - - e_size = numprops - e_next = e_size - a_size = 0 # never an array part for now - h_size = 0 # never a hash for now; not appropriate for perf relevant builds - - if isfunc: - nativefunc = obj['native'] - if obj.get('varargs', False): - nargs = 'DUK_VARARGS' - elif obj.has_key('nargs'): - nargs = '%d' % obj['nargs'] - else: - assert(False) # 'nargs' should be defaulted from 'length' at metadata load - magic = '%d' % resolve_magic(obj.get('magic', None), id_to_bidx) - else: - nativefunc = 'dummy' - nargs = '0' - magic = '0' - - assert(a_size == 0) - assert(h_size == 0) - if isfunc: - tmp += 'DUK__ROMFUN_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d,%s,%s,%s);' % \ - ('|'.join(flags), refcount, props, props_enc16, \ - iproto, iproto_enc16, e_size, e_next, a_size, h_size, \ - nativefunc, nargs, magic) - elif obj.get('class') == 'Array': - arrlen = 0 - tmp += 'DUK__ROMARR_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d,%d);' % \ - ('|'.join(flags), refcount, props, props_enc16, \ - iproto, iproto_enc16, e_size, e_next, a_size, h_size, arrlen) - else: - tmp += 'DUK__ROMOBJ_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d);' % \ - ('|'.join(flags), refcount, props, props_enc16, \ - iproto, iproto_enc16, e_size, e_next, a_size, h_size) - - genc.emitLine(tmp) - - # Property tables. Can reference arbitrary strings and objects as - # they're defined before them. - - # Properties will be non-configurable, but must be writable so that - # standard property semantics allow shadowing properties to be - # established in inherited objects (e.g. "var obj={}; obj.toString - # = myToString"). Enumerable can also be kept. - - def _prepAttrs(val): - attrs = val['attributes'] - assert('c' not in attrs) - return attr_lookup[attrs] - - def _emitPropTableInitializer(idx, obj, layout): - init_vals = [] - init_keys = [] - init_flags = [] - - numprops = len(obj['properties']) - for val in obj['properties']: - init_keys.append('(const duk_hstring *)&%s' % bi_str_map[val['key']]) - for val in obj['properties']: - # XXX: fastint support - init_vals.append('%s' % rom_get_value_initializer_literal(meta, val, bi_str_map, bi_obj_map)) - for val in obj['properties']: - init_flags.append('%s' % _prepAttrs(val)) - - if layout == 1: - initlist = init_keys + init_vals + init_flags - elif layout == 2: - initlist = init_vals + init_keys + init_flags - elif layout == 3: - # Same as layout 2 now, no hash/array - initlist = init_vals + init_keys + init_flags - - if len(initlist) > 0: - genc.emitLine('DUK_EXTERNAL const duk_romprops_%d duk_prop_%d = {%s};' % (idx, idx, ','.join(initlist))) - - genc.emitLine('#if defined(DUK_USE_HOBJECT_LAYOUT_1)') - for idx,obj in enumerate(objs): - _emitPropTableInitializer(idx, obj, 1) - genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_2)') - for idx,obj in enumerate(objs): - _emitPropTableInitializer(idx, obj, 2) - genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_3)') - for idx,obj in enumerate(objs): - _emitPropTableInitializer(idx, obj, 3) - genc.emitLine('#else') - genc.emitLine('#error invalid object layout') - genc.emitLine('#endif') - genc.emitLine('') - - # Emit a list of ROM builtins (those objects needing a bidx). - # - # cdecl > explain const int * const foo; - # declare foo as const pointer to const int - - count_bidx = 0 - for bi in objs: - if bi.get('bidx_used', False): - count_bidx += 1 - genc.emitLine('DUK_INTERNAL const duk_hobject * const duk_rom_builtins_bidx[%d] = {' % count_bidx) - for bi in objs: - if not bi.get('bidx_used', False): - continue # for this we want the toplevel objects only - genc.emitLine('\t(const duk_hobject *) &%s,' % bi_obj_map[bi['id']]) - genc.emitLine('};') - - # Emit a table of compressed ROM pointers. We must be able to - # compress ROM pointers at compile time so we assign running - # indices to them. User pointer compression macros must use this - # array to encode/decode ROM pointers. - - genc.emitLine('') - genc.emitLine('#if defined(DUK_USE_ROM_OBJECTS) && defined(DUK_USE_HEAPPTR16)') - genc.emitLine('DUK_EXTERNAL const void * const duk_rom_compressed_pointers[%d] = {' % (len(romptr_compress_list) + 1)) - for idx,ptr in enumerate(romptr_compress_list): - genc.emitLine('\t(const void *) %s, /* 0x%04x */' % (ptr, ROMPTR_FIRST + idx)) - romptr_highest = ROMPTR_FIRST + len(romptr_compress_list) - 1 - genc.emitLine('\tNULL') # for convenience - genc.emitLine('};') - genc.emitLine('#endif') - - print('%d compressed rom pointers (used range is [0x%04x,0x%04x], %d space left)' % \ - (len(romptr_compress_list), ROMPTR_FIRST, romptr_highest, 0xffff - romptr_highest)) - - # Undefine helpers. - genc.emitLine('') - for i in [ - 'DUK__STRHASH16', - 'DUK__STRHASH32', - 'DUK__DBLBYTES', - 'DUK__TVAL_NUMBER', - 'DUK__TVAL_UNDEFINED', - 'DUK__TVAL_NULL', - 'DUK__TVAL_BOOLEAN', - 'DUK__TVAL_OBJECT', - 'DUK__TVAL_STRING', - 'DUK__STRINIT', - 'DUK__ROMOBJ_INIT', - 'DUK__ROMFUN_INIT' - ]: - genc.emitLine('#undef ' + i) - - return romptr_compress_list - -# Emit ROM objects header. -def rom_emit_objects_header(genc, meta): - bidx = 0 - for bi in meta['objects']: - if not bi.get('bidx_used', False): - continue # for this we want the toplevel objects only - genc.emitDefine('DUK_BIDX_' + '_'.join(bi['id'].upper().split('_')[1:]), bidx) # bi_foo_bar -> FOO_BAR - bidx += 1 - count_bidx = bidx - genc.emitDefine('DUK_NUM_BUILTINS', count_bidx) - genc.emitDefine('DUK_NUM_BIDX_BUILTINS', count_bidx) - genc.emitDefine('DUK_NUM_ALL_BUILTINS', len(meta['objects'])) - genc.emitLine('') - genc.emitLine('#if !defined(DUK_SINGLE_FILE)') # C++ static const workaround - genc.emitLine('DUK_INTERNAL_DECL const duk_hobject * const duk_rom_builtins_bidx[%d];' % count_bidx) - genc.emitLine('#endif') - - # XXX: missing declarations here, not an issue for single source build. - # Add missing declarations. - # XXX: For example, 'DUK_EXTERNAL_DECL ... duk_rom_compressed_pointers[]' is missing. - -# -# Shared for both RAM and ROM -# - -def emit_header_native_function_declarations(genc, meta): - emitted = {} # To suppress duplicates - funclist = [] - def _emit(fname): - if not emitted.has_key(fname): - emitted[fname] = True - funclist.append(fname) - - for o in meta['objects']: - if o.has_key('native'): - _emit(o['native']) - - for p in o['properties']: - v = p['value'] - if isinstance(v, dict) and v['type'] == 'lightfunc': - assert(v.has_key('native')) - _emit(v['native']) - #print('Lightfunc function declaration: %r' % v['native']) - - for fname in funclist: - # Visibility depends on whether the function is Duktape internal or user. - # Use a simple prefix for now. - if fname[:4] == 'duk_': - genc.emitLine('DUK_INTERNAL_DECL duk_ret_t %s(duk_context *ctx);' % fname) - else: - genc.emitLine('extern duk_ret_t %s(duk_context *ctx);' % fname) - -# -# Main -# - -def main(): - parser = optparse.OptionParser() - parser.add_option('--buildinfo', dest='buildinfo', help='Build info, JSON format') - parser.add_option('--used-stridx-metadata', dest='used_stridx_metadata', help='DUK_STRIDX_xxx used by source/headers, JSON format') - parser.add_option('--strings-metadata', dest='strings_metadata', help='Built-in strings metadata file, YAML format') - parser.add_option('--objects-metadata', dest='objects_metadata', help='Built-in objects metadata file, YAML format') - parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', action='append', default=[], help='User strings and objects to add, YAML format (can be repeated for multiple overrides)') - parser.add_option('--ram-support', dest='ram_support', action='store_true', default=False, help='Support RAM strings/objects') - parser.add_option('--rom-support', dest='rom_support', action='store_true', default=False, help='Support ROM strings/objects (increases output size considerably)') - parser.add_option('--rom-auto-lightfunc', dest='rom_auto_lightfunc', action='store_true', default=False, help='Convert ROM built-in function properties into lightfuncs automatically whenever possible') - parser.add_option('--out-header', dest='out_header', help='Output header file') - parser.add_option('--out-source', dest='out_source', help='Output source file') - parser.add_option('--out-metadata-json', dest='out_metadata_json', help='Output metadata file') - parser.add_option('--dev-dump-final-ram-metadata', dest='dev_dump_final_ram_metadata', help='Development option') - parser.add_option('--dev-dump-final-rom-metadata', dest='dev_dump_final_rom_metadata', help='Development option') - (opts, args) = parser.parse_args() - - # Options processing. - - if opts.buildinfo is None: - raise Exception('missing buildinfo') - - with open(opts.buildinfo, 'rb') as f: - build_info = dukutil.json_decode(f.read().strip()) - - # Read in metadata files, normalizing and merging as necessary. - - ram_meta = load_metadata(opts, rom=False, build_info=build_info) - rom_meta = load_metadata(opts, rom=True, build_info=build_info) - if opts.dev_dump_final_ram_metadata is not None: - dump_metadata(ram_meta, opts.dev_dump_final_ram_metadata) - if opts.dev_dump_final_rom_metadata is not None: - dump_metadata(rom_meta, opts.dev_dump_final_rom_metadata) - - # Create RAM init data bitstreams. - - ramstr_data, ramstr_maxlen = gen_ramstr_initdata_bitpacked(ram_meta) - ram_native_funcs, ram_natfunc_name_to_natidx = get_ramobj_native_func_maps(ram_meta) - - if opts.ram_support: - ramobj_data_le = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'little') - ramobj_data_be = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'big') - ramobj_data_me = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'mixed') - - # Write source and header files. - - gc_src = dukutil.GenerateC() - gc_src.emitHeader('genbuiltins.py') - gc_src.emitLine('#include "duk_internal.h"') - gc_src.emitLine('') - gc_src.emitLine('#if defined(DUK_USE_ROM_STRINGS)') - if opts.rom_support: - rom_bi_str_map = rom_emit_strings_source(gc_src, rom_meta) - rom_emit_object_initializer_types_and_macros(gc_src) - rom_emit_objects(gc_src, rom_meta, rom_bi_str_map) - else: - gc_src.emitLine('#error ROM support not enabled, rerun make_dist.py with --rom-support') - gc_src.emitLine('#else /* DUK_USE_ROM_STRINGS */') - emit_ramstr_source_strinit_data(gc_src, ramstr_data) - gc_src.emitLine('#endif /* DUK_USE_ROM_STRINGS */') - gc_src.emitLine('') - gc_src.emitLine('#if defined(DUK_USE_ROM_OBJECTS)') - if opts.rom_support: - gc_src.emitLine('#if !defined(DUK_USE_ROM_STRINGS)') - gc_src.emitLine('#error DUK_USE_ROM_OBJECTS requires DUK_USE_ROM_STRINGS') - gc_src.emitLine('#endif') - else: - gc_src.emitLine('#error ROM support not enabled, rerun make_dist.py with --rom-support') - gc_src.emitLine('#else /* DUK_USE_ROM_OBJECTS */') - if opts.ram_support: - emit_ramobj_source_nativefunc_array(gc_src, ram_native_funcs) # endian independent - gc_src.emitLine('#if defined(DUK_USE_DOUBLE_LE)') - emit_ramobj_source_objinit_data(gc_src, ramobj_data_le) - gc_src.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') - emit_ramobj_source_objinit_data(gc_src, ramobj_data_be) - gc_src.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') - emit_ramobj_source_objinit_data(gc_src, ramobj_data_me) - gc_src.emitLine('#else') - gc_src.emitLine('#error invalid endianness defines') - gc_src.emitLine('#endif') - else: - gc_src.emitLine('#error RAM support not enabled, rerun make_dist.py with --ram-support') - gc_src.emitLine('#endif /* DUK_USE_ROM_OBJECTS */') - - gc_hdr = dukutil.GenerateC() - gc_hdr.emitHeader('genbuiltins.py') - gc_hdr.emitLine('#ifndef DUK_BUILTINS_H_INCLUDED') - gc_hdr.emitLine('#define DUK_BUILTINS_H_INCLUDED') - gc_hdr.emitLine('') - gc_hdr.emitLine('#if defined(DUK_USE_ROM_STRINGS)') - if opts.rom_support: - emit_header_stridx_defines(gc_hdr, rom_meta) - rom_emit_strings_header(gc_hdr, rom_meta) - else: - gc_hdr.emitLine('#error ROM support not enabled, rerun make_dist.py with --rom-support') - gc_hdr.emitLine('#else /* DUK_USE_ROM_STRINGS */') - if opts.ram_support: - emit_header_stridx_defines(gc_hdr, ram_meta) - emit_ramstr_header_strinit_defines(gc_hdr, ram_meta, ramstr_data, ramstr_maxlen) - else: - gc_hdr.emitLine('#error RAM support not enabled, rerun make_dist.py with --ram-support') - gc_hdr.emitLine('#endif /* DUK_USE_ROM_STRINGS */') - gc_hdr.emitLine('') - gc_hdr.emitLine('#if defined(DUK_USE_ROM_OBJECTS)') - if opts.rom_support: - # Currently DUK_USE_ROM_PTRCOMP_FIRST must match our fixed - # define, and the two must be updated in sync. Catch any - # mismatch to avoid difficult to diagnose errors. - gc_hdr.emitLine('#if !defined(DUK_USE_ROM_PTRCOMP_FIRST)') - gc_hdr.emitLine('#error missing DUK_USE_ROM_PTRCOMP_FIRST define') - gc_hdr.emitLine('#endif') - gc_hdr.emitLine('#if (DUK_USE_ROM_PTRCOMP_FIRST != %dL)' % ROMPTR_FIRST) - gc_hdr.emitLine('#error DUK_USE_ROM_PTRCOMP_FIRST must match ROMPTR_FIRST in genbuiltins.py (%d), update manually and re-dist' % ROMPTR_FIRST) - gc_hdr.emitLine('#endif') - emit_header_native_function_declarations(gc_hdr, rom_meta) - rom_emit_objects_header(gc_hdr, rom_meta) - else: - gc_hdr.emitLine('#error RAM support not enabled, rerun make_dist.py with --ram-support') - gc_hdr.emitLine('#else /* DUK_USE_ROM_OBJECTS */') - if opts.ram_support: - emit_header_native_function_declarations(gc_hdr, ram_meta) - emit_ramobj_header_nativefunc_array(gc_hdr, ram_native_funcs) - emit_ramobj_header_objects(gc_hdr, ram_meta) - gc_hdr.emitLine('#if defined(DUK_USE_DOUBLE_LE)') - emit_ramobj_header_initdata(gc_hdr, ramobj_data_le) - gc_hdr.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') - emit_ramobj_header_initdata(gc_hdr, ramobj_data_be) - gc_hdr.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') - emit_ramobj_header_initdata(gc_hdr, ramobj_data_me) - gc_hdr.emitLine('#else') - gc_hdr.emitLine('#error invalid endianness defines') - gc_hdr.emitLine('#endif') - else: - gc_hdr.emitLine('#error RAM support not enabled, rerun make_dist.py with --ram-support') - gc_hdr.emitLine('#endif /* DUK_USE_ROM_OBJECTS */') - gc_hdr.emitLine('#endif /* DUK_BUILTINS_H_INCLUDED */') - - with open(opts.out_source, 'wb') as f: - f.write(gc_src.getString()) - - with open(opts.out_header, 'wb') as f: - f.write(gc_hdr.getString()) - - # Write a JSON file with build metadata, e.g. built-in strings. - - ver = long(build_info['version']) - plain_strs = [] - base64_strs = [] - str_objs = [] - for s in ram_meta['strings_stridx']: # XXX: provide all lists? - t1 = bytes_to_unicode(s['str']) - t2 = unicode_to_bytes(s['str']).encode('base64').strip() - plain_strs.append(t1) - base64_strs.append(t2) - str_objs.append({ - 'plain': t1, 'base64': t2, 'define': s['define'] - }) - meta = { - 'comment': 'Metadata for Duktape build', - 'duk_version': ver, - 'duk_version_string': '%d.%d.%d' % (ver / 10000, (ver / 100) % 100, ver % 100), - 'git_describe': build_info['git_describe'], - 'builtin_strings': plain_strs, - 'builtin_strings_base64': base64_strs, - 'builtin_strings_info': str_objs - } - - with open(opts.out_metadata_json, 'wb') as f: - f.write(json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True)) - -if __name__ == '__main__': - main() diff --git a/src/genequivyear.py b/src/genequivyear.py deleted file mode 100644 index e7ca72af..00000000 --- a/src/genequivyear.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python2 -# -# Generate equivalent year table needed by duk_bi_date.c. Based on: -# -# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146 -# - -import datetime -import pytz - -def isleapyear(year): - if (year % 4) != 0: - return False - if (year % 100) != 0: - return True - if (year % 400) != 0: - return False - return True - -def eqyear(weekday, isleap): - # weekday: 0=Sunday, 1=Monday, ... - - if isleap: - recent_year = 1956 - else: - recent_year = 1967 - recent_year += (weekday * 12) % 28 - year = 2008 + (recent_year + 3 * 28 - 2008) % 28 - - # some assertions - # - # Note that Ecmascript internal weekday (0=Sunday) matches neither - # Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday). - # Python isoweekday() % 7 matches the Ecmascript weekday. - # https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday - - dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC - #print(weekday, isleap, year, dt.isoweekday(), isleapyear(year)) - #print(repr(dt)) - #print(dt.isoformat()) - - if isleap != isleapyear(year): - raise Exception('internal error: equivalent year does not have same leap-year-ness') - pass - - if weekday != dt.isoweekday() % 7: - raise Exception('internal error: equivalent year does not begin with the same weekday') - pass - - return year - -def main(): - for i in xrange(14): - print(eqyear(i % 7, i >= 7)) - -if __name__ == '__main__': - main() diff --git a/src/genexesizereport.py b/src/genexesizereport.py deleted file mode 100644 index eede495f..00000000 --- a/src/genexesizereport.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env python2 -# -# Generate a size report from a Duktape library / executable. -# Write out useful information about function sizes in a variety -# of forms. -# - -import os -import sys -import re -import subprocess - -#000000000040d200 : -# 40d200: 55 push %rbp -# 40d201: 89 f5 mov %esi,%ebp - -re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$') -re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$') - -def objdump(filename): - proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE) - curr_func = None - func_start = None - func_end = None - ret = {} - - def storeFunc(): - if curr_func is None or func_start is None or func_end is None: - return - ret[curr_func] = { - 'name': curr_func, - 'start': func_start, - 'end': func_end, # exclusive - 'length': func_end - func_start - } - - for line in proc.stdout: - line = line.strip() - - m = re_funcstart.match(line) - if m is not None: - if curr_func is not None: - storeFunc() - curr_func = m.group(1) - func_start = None - func_end = None - - m = re_codeline.match(line) - if m is not None: - func_addr = long(m.group(1), 16) - func_bytes = m.group(2) - func_nbytes = len(func_bytes.split(' ')) - func_instr = m.group(3) - if func_start is None: - func_start = func_addr - func_end = func_addr + func_nbytes - - storeFunc() - - return ret - -def filterFuncs(funcs): - todo = [] # avoid mutation while iterating - - def accept(fun): - n = fun['name'] - - if n in [ '.comment', - '.dynstr', - '.dynsym', - '.eh_frame_hdr', - '.interp', - '.rela.dyn', - '.rela.plt', - '_DYNAMIC', - '_GLOBAL_OFFSET_TABLE_', - '_IO_stdin_used', - '__CTOR_LIST__', - '__DTOR_LIST__', - '_fini', - '_init', - '_start', - '' ]: - return False - - for pfx in [ '.debug', '.gnu', '.note', - '__FRAME_', '__' ]: - if n.startswith(pfx): - return False - - return True - - for k in funcs.keys(): - if not accept(funcs[k]): - todo.append(k) - - for k in todo: - del funcs[k] - -def main(): - funcs = objdump(sys.argv[1]) - filterFuncs(funcs) - - funcs_keys = funcs.keys() - funcs_keys.sort() - combined_size_all = 0 - combined_size_duk = 0 - for k in funcs_keys: - fun = funcs[k] - combined_size_all += fun['length'] - if fun['name'].startswith('duk_'): - combined_size_duk += fun['length'] - - f = sys.stdout - f.write('') - f.write('') - f.write('Size dump for %s' % sys.argv[1]) - f.write("""\ - -""") - f.write('') - f.write('') - - f.write('

Summary

') - f.write('') - f.write('' % len(funcs_keys)) - f.write('' % combined_size_all) - f.write('' % combined_size_duk) - f.write('
Entries%d
Combined size (all)%d
Combined size (duk_*)%d
') - - f.write('

Sorted by function name

') - f.write('') - f.write('') - funcs_keys = funcs.keys() - funcs_keys.sort() - for k in funcs_keys: - fun = funcs[k] - f.write('' % (fun['name'], fun['length'])) - f.write('
NameBytes
%s%d
') - - f.write('

Sorted by size

') - f.write('') - f.write('') - funcs_keys = funcs.keys() - def cmpSize(a,b): - return cmp(funcs[a]['length'], funcs[b]['length']) - funcs_keys.sort(cmp=cmpSize) - for k in funcs_keys: - fun = funcs[k] - f.write('' % (fun['name'], fun['length'])) - f.write('
NameBytes
%s%d
') - - f.write('') - f.write('') - -if __name__ == '__main__': - main() diff --git a/src/genhashsizes.py b/src/genhashsizes.py deleted file mode 100644 index 867e2856..00000000 --- a/src/genhashsizes.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python2 -# -# Find a sequence of duk_hobject hash sizes which have a desired 'ratio' -# and are primes. Prime hash sizes ensure that all probe sequence values -# (less than hash size) are relatively prime to hash size, i.e. cover the -# entire hash. Prime data is packed into about 1 byte/prime using a -# prediction-correction model. -# -# Also generates a set of probe steps which are relatively prime to every -# hash size. - -import sys -import math - -def is_prime(n): - if n == 0: - return False - if n == 1 or n == 2: - return True - - n_limit = int(math.ceil(float(n) ** 0.5)) + 1 - n_limit += 100 # paranoia - if n_limit >= n: - n_limit = n - 1 - for i in xrange(2,n_limit + 1): - if (n % i) == 0: - return False - return True - -def next_prime(n): - while True: - n += 1 - if is_prime(n): - return n - -def generate_sizes(min_size, max_size, step_ratio): - "Generate a set of hash sizes following a nice ratio." - - sizes = [] - ratios = [] - curr = next_prime(min_size) - next = curr - sizes.append(curr) - - step_ratio = float(step_ratio) / 1024 - - while True: - if next > max_size: - break - ratio = float(next) / float(curr) - if ratio < step_ratio: - next = next_prime(next) - continue - sys.stdout.write('.'); sys.stdout.flush() - sizes.append(next) - ratios.append(ratio) - curr = next - next = next_prime(int(next * step_ratio)) - - sys.stdout.write('\n'); sys.stdout.flush() - return sizes, ratios - -def generate_corrections(sizes, step_ratio): - "Generate a set of correction from a ratio-based predictor." - - # Generate a correction list for size list, assuming steps follow a certain - # ratio; this allows us to pack size list into one byte per size - - res = [] - - res.append(sizes[0]) # first entry is first size - - for i in xrange(1, len(sizes)): - prev = sizes[i - 1] - pred = int(prev * step_ratio) >> 10 - diff = int(sizes[i] - pred) - res.append(diff) - - if diff < 0 or diff > 127: - raise Exception('correction does not fit into 8 bits') - - res.append(-1) # negative denotes last end of list - return res - -def generate_probes(count, sizes): - res = [] - - # Generate probe values which are guaranteed to be relatively prime to - # all generated hash size primes. These don't have to be primes, but - # we currently use smallest non-conflicting primes here. - - i = 2 - while len(res) < count: - if is_prime(i) and (i not in sizes): - if i > 255: - raise Exception('probe step does not fit into 8 bits') - res.append(i) - i += 1 - continue - i += 1 - - return res - -# NB: these must match duk_hobject defines and code -step_ratio = 1177 # approximately (1.15 * (1 << 10)) -min_size = 16 -max_size = 2**32 - 1 - -sizes, ratios = generate_sizes(min_size, max_size, step_ratio) -corrections = generate_corrections(sizes, step_ratio) -probes = generate_probes(32, sizes) -print len(sizes) -print 'SIZES: ' + repr(sizes) -print 'RATIOS: ' + repr(ratios) -print 'CORRECTIONS: ' + repr(corrections) -print 'PROBES: ' + repr(probes) - -# highest 32-bit prime -i = 2**32 -while True: - i -= 1 - if is_prime(i): - print 'highest 32-bit prime is: %d (0x%08x)' % (i, i) - break diff --git a/src/genobjsizereport.py b/src/genobjsizereport.py deleted file mode 100644 index 0d0e5e62..00000000 --- a/src/genobjsizereport.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python2 -# -# Size report of (stripped) object and source files. -# - -import os -import sys - -def getsize(fname): - return os.stat(fname).st_size - -def getlines(fname): - f = None - try: - f = open(fname, 'rb') - lines = f.read().split('\n') - return len(lines) - finally: - if f is not None: - f.close() - f = None - -def process(srcfile, objfile): - srcsize = getsize(srcfile) - srclines = getlines(srcfile) - srcbpl = float(srcsize) / float(srclines) - objsize = getsize(objfile) - objbpl = float(objsize) / float(srclines) - - return objsize, objbpl, srcsize, srclines, srcbpl - -def main(): - tot_srcsize = 0 - tot_srclines = 0 - tot_objsize = 0 - - tmp = [] - for i in sys.argv[1:]: - objfile = i - if i.endswith('.strip'): - objname = i[:-6] - else: - objname = i - base, ext = os.path.splitext(objname) - srcfile = base + '.c' - - objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile) - srcbase = os.path.basename(srcfile) - objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o - tot_srcsize += srcsize - tot_srclines += srclines - tot_objsize += objsize - tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl)) - - def mycmp(a,b): - return cmp(a[5], b[5]) - - tmp.sort(cmp=mycmp, reverse=True) # sort by object size - fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f' - for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp: - print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl)) - - print('========================================================================') - print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines), - '', tot_objsize, float(tot_objsize) / float(tot_srclines))) - -if __name__ == '__main__': - # Usage: - # - # $ strip *.o - # $ python genobjsizereport.py *.o - - main() diff --git a/src/prepare_unicode_data.py b/src/prepare_unicode_data.py deleted file mode 100644 index cd27d409..00000000 --- a/src/prepare_unicode_data.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python2 -# -# UnicodeData.txt may contain ranges in addition to individual characters. -# Unpack the ranges into individual characters for the other scripts to use. -# - -import os -import sys - -def main(): - f_in = open(sys.argv[1], 'rb') - f_out = open(sys.argv[2], 'wb') - while True: - line = f_in.readline() - if line == '' or line == '\n': - break - parts = line.split(';') # keep newline - if parts[1].endswith('First>'): - line2 = f_in.readline() - parts2 = line2.split(';') - if not parts2[1].endswith('Last>'): - raise Exception('cannot parse range') - cp1 = long(parts[0], 16) - cp2 = long(parts2[0], 16) - - for i in xrange(cp1, cp2 + 1): # inclusive - parts[0] = '%04X' % i - f_out.write(';'.join(parts)) - else: - f_out.write(line) - - f_in.close() - f_out.flush() - f_out.close() - -if __name__ == '__main__': - main() diff --git a/src/scan_used_stridx_bidx.py b/src/scan_used_stridx_bidx.py deleted file mode 100644 index 412b23c1..00000000 --- a/src/scan_used_stridx_bidx.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python2 -# -# Scan Duktape code base for references to built-in strings and built-in -# objects, i.e. for: -# -# - Strings which will need DUK_STRIDX_xxx constants and a place in the -# thr->strs[] array. -# -# - Objects which will need DUK_BIDX_xxx constants and a place in the -# thr->builtins[] array. -# - -import os -import sys -import re -import json - -re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE) -re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE) -re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE) -re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE) - -def main(): - str_defs = {} - obj_defs = {} - - for fn in sys.argv[1:]: - with open(fn, 'rb') as f: - d = f.read() - for m in re.finditer(re_str_stridx, d): - str_defs[m.group(1)] = True - for m in re.finditer(re_str_heap, d): - str_defs[m.group(1)] = True - for m in re.finditer(re_str_hthread, d): - str_defs[m.group(1)] = True - for m in re.finditer(re_obj_bidx, d): - obj_defs[m.group(1)] = True - - str_used = [] - for k in sorted(str_defs.keys()): - str_used.append('DUK_STRIDX_' + k) - - obj_used = [] - for k in sorted(obj_defs.keys()): - obj_used.append('DUK_BIDX_' + k) - - doc = { - 'used_stridx_defines': str_used, - 'used_bidx_defines': obj_used, - 'count_used_stridx_defines': len(str_used), - 'count_used_bidx_defines': len(obj_used) - } - print(json.dumps(doc, indent=4)) - -if __name__ == '__main__': - main() diff --git a/tools/combine_src.py b/tools/combine_src.py new file mode 100644 index 00000000..ae7579e6 --- /dev/null +++ b/tools/combine_src.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python2 +# +# Combine a set of a source files into a single C file. +# +# Overview of the process: +# +# * Parse user supplied C files. Add automatic #undefs at the end +# of each C file to avoid defined bleeding from one file to another. +# +# * Combine the C files in specified order. If sources have ordering +# dependencies (depends on application), order may matter. +# +# * Process #include statements in the combined source, categorizing +# them either as "internal" (found in specified include path) or +# "external". Internal includes, unless explicitly excluded, are +# inlined into the result while extenal includes are left as is. +# Duplicate #include statements are replaced with a comment. +# +# At every step, source and header lines are represented with explicit +# line objects which keep track of original filename and line. The +# output contains #line directives, if necessary, to ensure error +# throwing and other diagnostic info will work in a useful manner when +# deployed. It's also possible to generate a combined source with no +# #line directives. +# +# Making the process deterministic is important, so that if users have +# diffs that they apply to the combined source, such diffs would apply +# for as long as possible. +# +# Limitations and notes: +# +# * While there are automatic #undef's for #define's introduced in each +# C file, it's not possible to "undefine" structs, unions, etc. If +# there are structs/unions/typedefs with conflicting names, these +# have to be resolved in the source files first. +# +# * Because duplicate #include statements are suppressed, currently +# assumes #include statements are not conditional. +# +# * A system header might be #include'd in multiple source files with +# different feature defines (like _BSD_SOURCE). Because the #include +# file will only appear once in the resulting source, the first +# occurrence wins. The result may not work correctly if the feature +# defines must actually be different between two or more source files. +# + +import os +import sys +import re +import json +import optparse + +# Include path for finding include files which are amalgamated. +include_paths = [] + +# Include files specifically excluded from being inlined. +include_excluded = [] + +class File: + filename_full = None + filename = None + lines = None + + def __init__(self, filename, lines): + self.filename = os.path.basename(filename) + self.filename_full = filename + self.lines = lines + +class Line: + filename_full = None + filename = None + lineno = None + data = None + + def __init__(self, filename, lineno, data): + self.filename = os.path.basename(filename) + self.filename_full = filename + self.lineno = lineno + self.data = data + +def readFile(filename): + lines = [] + + with open(filename, 'rb') as f: + lineno = 0 + for line in f: + lineno += 1 + if len(line) > 0 and line[-1] == '\n': + line = line[:-1] + lines.append(Line(filename, lineno, line)) + + return File(filename, lines) + +def lookupInclude(incfn): + re_sep = re.compile(r'/|\\') + + inccomp = re.split(re_sep, incfn) # split include path, support / and \ + + for path in include_paths: + fn = apply(os.path.join, [ path ] + inccomp) + if os.path.exists(fn): + return fn # Return full path to first match + + return None + +def addAutomaticUndefs(f): + defined = {} + + re_def = re.compile(r'#define\s+(\w+).*$') + re_undef = re.compile(r'#undef\s+(\w+).*$') + + for line in f.lines: + m = re_def.match(line.data) + if m is not None: + #print('DEFINED: %s' % repr(m.group(1))) + defined[m.group(1)] = True + m = re_undef.match(line.data) + if m is not None: + # Could just ignore #undef's here: we'd then emit + # reliable #undef's (though maybe duplicates) at + # the end. + #print('UNDEFINED: %s' % repr(m.group(1))) + if defined.has_key(m.group(1)): + del defined[m.group(1)] + + # Undefine anything that seems to be left defined. This not a 100% + # process because some #undef's might be conditional which we don't + # track at the moment. Note that it's safe to #undef something that's + # not defined. + + keys = sorted(defined.keys()) # deterministic order + if len(keys) > 0: + #print('STILL DEFINED: %r' % repr(defined.keys())) + f.lines.append(Line(f.filename, len(f.lines) + 1, '')) + f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */')) + for k in keys: + f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k)) + +def createCombined(files, prologue_filename, line_directives): + res = [] + line_map = [] # indicate combined source lines where uncombined file/line would change + metadata = { + 'line_map': line_map + } + + emit_state = [ None, None ] # curr_filename, curr_lineno + + def emit(line): + if isinstance(line, (str, unicode)): + res.append(line) + emit_state[1] += 1 + else: + if line.filename != emit_state[0] or line.lineno != emit_state[1]: + if line_directives: + res.append('#line %d "%s"' % (line.lineno, line.filename)) + line_map.append({ 'original_file': line.filename, + 'original_line': line.lineno, + 'combined_line': len(res) + 1 }) + res.append(line.data) + emit_state[0] = line.filename + emit_state[1] = line.lineno + 1 + + included = {} # headers already included + + if prologue_filename is not None: + with open(prologue_filename, 'rb') as f: + for line in f.read().split('\n'): + res.append(line) + + re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$') + + # Process a file, appending it to the result; the input may be a + # source or an include file. #include directives are handled + # recursively. + def processFile(f): + #print('Process file: ' + f.filename) + + for line in f.lines: + if not line.data.startswith('#include'): + emit(line) + continue + + m = re_inc.match(line.data) + if m is None: + raise Exception('Couldn\'t match #include line: %s' % repr(line.data)) + incpath = m.group(2) + if incpath in include_excluded: + # Specific include files excluded from the + # inlining / duplicate suppression process. + emit(line) # keep as is + continue + + if included.has_key(incpath): + # We suppress duplicate includes, both internal and + # external, based on the assumption that includes are + # not behind #ifdef checks. This is the case for + # Duktape (except for the include files excluded). + emit('/* #include %s -> already included */' % incpath) + continue + included[incpath] = True + + # An include file is considered "internal" and is amalgamated + # if it is found in the include path provided by the user. + + incfile = lookupInclude(incpath) + if incfile is not None: + #print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile))) + emit('/* #include %s */' % incpath) + processFile(readFile(incfile)) + else: + #print('Include considered external: %s' % repr(line.data)) + emit(line) # keep as is + + for f in files: + processFile(f) + + return '\n'.join(res) + '\n', metadata + +def main(): + global include_paths, include_excluded + + parser = optparse.OptionParser() + parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times') + parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)') + parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file') + parser.add_option('--output-source', dest='output_source', help='Output source filename') + parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename') + parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source') + (opts, args) = parser.parse_args() + + assert(opts.include_paths is not None) + include_paths = opts.include_paths # global for easy access + include_excluded = opts.include_excluded + assert(opts.output_source) + assert(opts.output_metadata) + + print('Read input files, add automatic #undefs') + sources = args + files = [] + for fn in sources: + res = readFile(fn) + #print('Add automatic undefs for: ' + fn) + addAutomaticUndefs(res) + files.append(res) + + print('Create combined source file from %d source files' % len(files)) + combined_source, metadata = \ + createCombined(files, opts.prologue, opts.line_directives) + with open(opts.output_source, 'wb') as f: + f.write(combined_source) + with open(opts.output_metadata, 'wb') as f: + f.write(json.dumps(metadata, indent=4)) + + print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source)) + +if __name__ == '__main__': + main() diff --git a/tools/create_spdx_license.py b/tools/create_spdx_license.py new file mode 100644 index 00000000..e2736f34 --- /dev/null +++ b/tools/create_spdx_license.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python2 +# +# Helper to create an SPDX license file (http://spdx.org) +# +# This must be executed when the dist/ directory is otherwise complete, +# except for the SPDX license, so that the file lists and such contained +# in the SPDX license will be correct. +# +# The utility outputs RDF/XML to specified file: +# +# $ python create_spdx_license.py /tmp/license.spdx +# +# Then, validate with SPDXViewer and SPDXTools: +# +# $ java -jar SPDXViewer.jar /tmp/license.spdx +# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html +# +# Finally, copy to dist: +# +# $ cp /tmp/license.spdx dist/license.spdx +# +# SPDX FAQ indicates there is no standard extension for an SPDX license file +# but '.spdx' is a common practice. +# +# The algorithm to compute a "verification code", implemented in this file, +# can be verified as follows: +# +# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz +# $ cd /tmp +# $ tar xvfJ duktape-N.N.N.tar.xz +# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code +# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/ +# +# Compare the resulting verification code manually with the one in license.spdx. +# +# Resources: +# +# - http://spdx.org/about-spdx/faqs +# - http://wiki.spdx.org/view/Technical_Team/Best_Practices +# + +import os +import sys +import re +import datetime +import sha +import rdflib +from rdflib import URIRef, BNode, Literal, Namespace + +RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') +RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') +XSD = Namespace('http://www.w3.org/2001/XMLSchema#') +SPDX = Namespace('http://spdx.org/rdf/terms#') +DOAP = Namespace('http://usefulinc.com/ns/doap#') +DUKTAPE = Namespace('http://duktape.org/rdf/terms#') + +def checksumFile(g, filename): + f = open(filename, 'rb') + d = f.read() + f.close() + shasum = sha.sha(d).digest().encode('hex').lower() + + csum_node = BNode() + g.add((csum_node, RDF.type, SPDX.Checksum)) + g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) + g.add((csum_node, SPDX.checksumValue, Literal(shasum))) + + return csum_node + +def computePackageVerification(g, dirname, excluded): + # SPDX 1.2 Section 4.7 + # The SPDXTools command "GenerateVerificationCode" can be used to + # check the verification codes created. Note that you must manually + # remove "license.spdx" from the unpacked dist directory before + # computing the verification code. + + verify_node = BNode() + + hashes = [] + for dirpath, dirnames, filenames in os.walk(dirname): + for fn in filenames: + full_fn = os.path.join(dirpath, fn) + f = open(full_fn, 'rb') + d = f.read() + f.close() + + if full_fn in excluded: + #print('excluded in verification: ' + full_fn) + continue + #print('included in verification: ' + full_fn) + + file_sha1 = sha.sha(d).digest().encode('hex').lower() + hashes.append(file_sha1) + + #print(repr(hashes)) + hashes.sort() + #print(repr(hashes)) + verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower() + + for fn in excluded: + g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn))) + g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code))) + + return verify_node + +def fileType(filename): + ign, ext = os.path.splitext(filename) + if ext in [ '.c', '.h', '.js' ]: + return SPDX.fileType_source + else: + return SPDX.fileType_other + +def getDuktapeVersion(): + f = open('./src/duktape.h') + re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$') + for line in f: + line = line.strip() + m = re_ver.match(line) + if m is None: + continue + ver = int(m.group(1)) + return '%d.%d.%d' % ((ver / 10000) % 100, + (ver / 100) % 100, + ver % 100) + + raise Exception('could not figure out Duktape version') + +def main(): + outfile = sys.argv[1] + + if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'): + sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') + sys.exit(1) + os.chdir('dist') + if not os.path.exists('Makefile.cmdline'): + sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') + sys.exit(1) + + duktape_version = getDuktapeVersion() + duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz' + now = datetime.datetime.utcnow() + now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second) + creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime) + duktape_org = Literal('Organization: duktape.org') + mit_license = URIRef('http://spdx.org/licenses/MIT') + duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)') + + g = rdflib.Graph() + + crea_node = BNode() + g.add((crea_node, RDF.type, SPDX.CreationInfo)) + g.add((crea_node, RDFS.comment, Literal(''))) + g.add((crea_node, SPDX.creator, duktape_org)) + g.add((crea_node, SPDX.created, creation_date)) + g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/ + + # 'name' should not include a version number (see best practices) + pkg_node = BNode() + g.add((pkg_node, RDF.type, SPDX.Package)) + g.add((pkg_node, SPDX.name, Literal('Duktape'))) + g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version))) + g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname))) + g.add((pkg_node, SPDX.supplier, duktape_org)) + g.add((pkg_node, SPDX.originator, duktape_org)) + g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI))) + g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI))) + verify_node = computePackageVerification(g, '.', [ './license.spdx' ]) + g.add((pkg_node, SPDX.packageVerificationCode, verify_node)) + # SPDX.checksum: omitted because license is inside the package + g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.'))) + + # NOTE: MIT license alone is sufficient for now, because Duktape, Lua, + # Murmurhash2, and CommonJS (though probably not even relevant for + # licensing) are all MIT. + g.add((pkg_node, SPDX.licenseConcluded, mit_license)) + g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license)) + g.add((pkg_node, SPDX.licenseDeclared, mit_license)) + g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.'))) + g.add((pkg_node, SPDX.copyrightText, duktape_copyright)) + g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter'))) + g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint'))) + # hasFile properties added separately below + + #reviewed_node = BNode() + #g.add((reviewed_node, RDF.type, SPDX.Review)) + #g.add((reviewed_node, SPDX.reviewer, XXX)) + #g.add((reviewed_node, SPDX.reviewDate, XXX)) + #g.add((reviewed_node, RDFS.comment, '')) + + spdx_doc = BNode() + g.add((spdx_doc, RDF.type, SPDX.SpdxDocument)) + g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2'))) + g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0'))) + g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version))) + g.add((spdx_doc, SPDX.creationInfo, crea_node)) + g.add((spdx_doc, SPDX.describesPackage, pkg_node)) + # SPDX.hasExtractedLicensingInfo + # SPDX.reviewed + # SPDX.referencesFile: added below + + for dirpath, dirnames, filenames in os.walk('.'): + for fn in filenames: + full_fn = os.path.join(dirpath, fn) + #print('# file: ' + full_fn) + + file_node = BNode() + g.add((file_node, RDF.type, SPDX.File)) + g.add((file_node, SPDX.fileName, Literal(full_fn))) + g.add((file_node, SPDX.fileType, fileType(full_fn))) + g.add((file_node, SPDX.checksum, checksumFile(g, full_fn))) + + # Here we assume that LICENSE.txt provides the actual "in file" + # licensing information, and everything else is implicitly under + # MIT license. + g.add((file_node, SPDX.licenseConcluded, mit_license)) + if full_fn == './LICENSE.txt': + g.add((file_node, SPDX.licenseInfoInFile, mit_license)) + else: + g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none))) + + # SPDX.licenseComments + g.add((file_node, SPDX.copyrightText, duktape_copyright)) + # SPDX.noticeText + # SPDX.artifactOf + # SPDX.fileDependency + # SPDX.fileContributor + + # XXX: should referencesFile include all files? + g.add((spdx_doc, SPDX.referencesFile, file_node)) + + g.add((pkg_node, SPDX.hasFile, file_node)) + + # Serialize into RDF/XML directly. We could also serialize into + # N-Triples and use external tools (like 'rapper') to get cleaner, + # abbreviated output. + + #print('# Duktape SPDX license file (autogenerated)') + #print(g.serialize(format='turtle')) + #print(g.serialize(format='nt')) + f = open(outfile, 'wb') + #f.write(g.serialize(format='rdf/xml')) + f.write(g.serialize(format='xml')) + f.close() + +if __name__ == '__main__': + main() diff --git a/tools/duk_meta_to_strarray.py b/tools/duk_meta_to_strarray.py new file mode 100644 index 00000000..e0b79097 --- /dev/null +++ b/tools/duk_meta_to_strarray.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python2 +# +# Create an array of C strings with Duktape built-in strings. +# Useful when using external strings. +# + +import os +import sys +import json + +def to_c_string(x): + res = '"' + term = False + for i, c in enumerate(x): + if term: + term = False + res += '" "' + + o = ord(c) + if o < 0x20 or o > 0x7e or c in '\'"\\': + # Terminate C string so that escape doesn't become + # ambiguous + res += '\\x%02x' % o + term = True + else: + res += c + res += '"' + return res + +def main(): + f = open(sys.argv[1], 'rb') + d = f.read() + f.close() + meta = json.loads(d) + + print('const char *duk_builtin_strings[] = {') + + strlist = meta['builtin_strings_base64'] + for i in xrange(len(strlist)): + s = strlist[i] + if i == len(strlist) - 1: + print(' %s' % to_c_string(s.decode('base64'))) + else: + print(' %s,' % to_c_string(s.decode('base64'))) + + print('};') + +if __name__ == '__main__': + main() diff --git a/tools/dukutil.py b/tools/dukutil.py new file mode 100644 index 00000000..e61b78fa --- /dev/null +++ b/tools/dukutil.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python2 +# +# Python utilities shared by the build scripts. +# + +import datetime +import json + +class BitEncoder: + "Bitstream encoder." + + _bits = None + + def __init__(self): + self._bits = [] + + def bits(self, x, nbits): + if (x >> nbits) != 0: + raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits)) + for shift in xrange(nbits - 1, -1, -1): # nbits - 1, nbits - 2, ..., 0 + self._bits.append((x >> shift) & 0x01) + + def string(self, x): + for i in xrange(len(x)): + ch = ord(x[i]) + for shift in xrange(7, -1, -1): # 7, 6, ..., 0 + self._bits.append((ch >> shift) & 0x01) + + def getNumBits(self): + "Get current number of encoded bits." + return len(self._bits) + + def getNumBytes(self): + "Get current number of encoded bytes, rounded up." + nbits = len(self._bits) + while (nbits % 8) != 0: + nbits += 1 + return nbits / 8 + + def getBytes(self): + "Get current bitstream as a byte sequence, padded with zero bits." + bytes = [] + + for i in xrange(self.getNumBytes()): + t = 0 + for j in xrange(8): + off = i*8 + j + if off >= len(self._bits): + t = (t << 1) + else: + t = (t << 1) + self._bits[off] + bytes.append(t) + + return bytes + + def getByteString(self): + "Get current bitstream as a string." + return ''.join([chr(i) for i in self.getBytes()]) + +class GenerateC: + "Helper for generating C source and header files." + + _data = None + wrap_col = 76 + + def __init__(self): + self._data = [] + + def emitRaw(self, text): + "Emit raw text (without automatic newline)." + self._data.append(text) + + def emitLine(self, text): + "Emit a raw line (with automatic newline)." + self._data.append(text + '\n') + + def emitHeader(self, autogen_by): + "Emit file header comments." + + # Note: a timestamp would be nice but it breaks incremental building + self.emitLine('/*') + self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by) + self.emitLine(' */') + self.emitLine('') + + def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True): + "Emit an array as a C array." + + # lenient input + if isinstance(data, unicode): + data = data.encode('utf-8') + if isinstance(data, str): + tmp = [] + for i in xrange(len(data)): + tmp.append(ord(data[i])) + data = tmp + + size_spec = '' + if size is not None: + size_spec = '%d' % size + visib_qual = '' + if visibility is not None: + visib_qual = visibility + ' ' + const_qual = '' + if const: + const_qual = 'const ' + self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec)) + + line = '' + for i in xrange(len(data)): + if intvalues: + suffix = '' + if data[i] < -32768 or data[i] > 32767: + suffix = 'L' + t = "%d%s," % (data[i], suffix) + else: + t = "(%s)'\\x%02x', " % (typename, data[i]) + if len(line) + len(t) >= self.wrap_col: + self.emitLine(line) + line = t + else: + line += t + if line != '': + self.emitLine(line) + self.emitLine('};') + + def emitDefine(self, name, value, comment=None): + "Emit a C define with an optional comment." + + # XXX: there is no escaping right now (for comment or value) + if comment is not None: + self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment)) + else: + self.emitLine('#define %-60s %s' % (name, value)) + + def getString(self): + "Get the entire file as a string." + return ''.join(self._data) + +def json_encode(x): + "JSON encode a value." + try: + return json.dumps(x) + except AttributeError: + pass + + # for older library versions + return json.write(x) + +def json_decode(x): + "JSON decode a value." + try: + return json.loads(x) + except AttributeError: + pass + + # for older library versions + return json.read(x) + +# Compute a byte hash identical to duk_util_hashbytes(). +DUK__MAGIC_M = 0x5bd1e995 +DUK__MAGIC_R = 24 +def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian): + h = (str_seed ^ nbytes) & 0xffffffff + + while nbytes >= 4: + # 4-byte fetch byte order: + # - native (endian dependent) if unaligned accesses allowed + # - little endian if unaligned accesses not allowed + + if big_endian: + k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \ + (ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24) + else: + k = ord(x[off]) + (ord(x[off + 1]) << 8) + \ + (ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24) + + k = (k * DUK__MAGIC_M) & 0xffffffff + k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff + k = (k * DUK__MAGIC_M) & 0xffffffff + h = (h * DUK__MAGIC_M) & 0xffffffff + h = (h ^ k) & 0xffffffff + + off += 4 + nbytes -= 4 + + if nbytes >= 3: + h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff + if nbytes >= 2: + h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff + if nbytes >= 1: + h = (h ^ ord(x[off])) & 0xffffffff + h = (h * DUK__MAGIC_M) & 0xffffffff + + h = (h ^ (h >> 13)) & 0xffffffff + h = (h * DUK__MAGIC_M) & 0xffffffff + h = (h ^ (h >> 15)) & 0xffffffff + + return h + +# Compute a string hash identical to duk_heap_hashstring() when dense +# hashing is enabled. +DUK__STRHASH_SHORTSTRING = 4096 +DUK__STRHASH_MEDIUMSTRING = 256 * 1024 +DUK__STRHASH_BLOCKSIZE = 256 +def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False): + str_seed = (hash_seed ^ len(x)) & 0xffffffff + + if len(x) <= DUK__STRHASH_SHORTSTRING: + res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian) + else: + if len(x) <= DUK__STRHASH_MEDIUMSTRING: + skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE + else: + skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE + + res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian) + off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256 + + while off < len(x): + left = len(x) - off + now = left + if now > DUK__STRHASH_BLOCKSIZE: + now = DUK__STRHASH_BLOCKSIZE + res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff + off += skip + + if strhash16: + res &= 0xffff + + return res + +# Compute a string hash identical to duk_heap_hashstring() when sparse +# hashing is enabled. +DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value +def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False): + res = (hash_seed ^ len(x)) & 0xffffffff + + step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1 + off = len(x) + while off >= step: + assert(off >= 1) + res = ((res * 33) + ord(x[off - 1])) & 0xffffffff + off -= step + + if strhash16: + res &= 0xffff + + return res + +# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length(). +def duk_unicode_unvalidated_utf8_length(x): + assert(isinstance(x, str)) + clen = 0 + for c in x: + t = ord(c) + if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted + clen += 1 + return clen diff --git a/tools/dump_bytecode.py b/tools/dump_bytecode.py new file mode 100644 index 00000000..f291cde0 --- /dev/null +++ b/tools/dump_bytecode.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python2 +# +# Utility to dump bytecode into a human readable form. +# + +import os +import sys +import struct +import optparse + +def decode_string(buf, off): + strlen, = struct.unpack('>L', buf[off:off+4]) + off += 4 + strdata = buf[off:off+strlen] + off += strlen + + return off, strdata + +def sanitize_string(val): + # Don't try to UTF-8 decode, just escape non-printable ASCII. + def f(c): + if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"': + return '\\x%02x' % ord(c) + else: + return c + return "'" + ''.join(map(f, val)) + "'" + +def decode_sanitize_string(buf, off): + off, val = decode_string(buf, off) + return off, sanitize_string(val) + +def dump_function(buf, off, ind): + count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12]) + off += 12 + print '%sInstructions: %d' % (ind, count_inst) + print '%sConstants: %d' % (ind, count_const) + print '%sInner functions: %d' % (ind, count_funcs) + + nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12]) + off += 12 + print '%sNregs: %d' % (ind, nregs) + print '%sNargs: %d' % (ind, nargs) + print '%sStart line number: %d' % (ind, start_line) + print '%sEnd line number: %d' % (ind, end_line) + + compfunc_flags, = struct.unpack('>L', buf[off:off+4]) + off += 4 + print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags) + + for i in xrange(count_inst): + ins, = struct.unpack('>L', buf[off:off+4]) + off += 4 + print '%s %06d: %08lx' % (ind, i, ins) + + print '%sConstants:' % ind + for i in xrange(count_const): + const_type, = struct.unpack('B', buf[off:off+1]) + off += 1 + + if const_type == 0x00: + off, strdata = decode_sanitize_string(buf, off) + print '%s %06d: %s' % (ind, i, strdata) + elif const_type == 0x01: + num, = struct.unpack('>d', buf[off:off+8]) + off += 8 + print '%s %06d: %f' % (ind, i, num) + else: + raise Exception('invalid constant type: %d' % const_type) + + for i in xrange(count_funcs): + print '%sInner function %d:' % (ind, i) + off = dump_function(buf, off, ind + ' ') + + val, = struct.unpack('>L', buf[off:off+4]) + off += 4 + print '%s.length: %d' % (ind, val) + off, val = decode_sanitize_string(buf, off) + print '%s.name: %s' % (ind, val) + off, val = decode_sanitize_string(buf, off) + print '%s.fileName: %s' % (ind, val) + off, val = decode_string(buf, off) # actually a buffer + print '%s._Pc2line: %s' % (ind, val.encode('hex')) + + while True: + off, name = decode_string(buf, off) + if name == '': + break + name = sanitize_string(name) + val, = struct.unpack('>L', buf[off:off+4]) + off += 4 + print '%s_Varmap[%s] = %d' % (ind, name, val) + + idx = 0 + while True: + off, name = decode_string(buf, off) + if name == '': + break + name = sanitize_string(name) + print '%s_Formals[%d] = %s' % (ind, idx, name) + idx += 1 + + return off + +def dump_bytecode(buf, off, ind): + sig, ver = struct.unpack('BB', buf[off:off+2]) + off += 2 + if sig != 0xff: + raise Exception('invalid signature byte: %d' % sig) + if ver != 0x00: + raise Exception('unsupported bytecode version: %d' % ver) + print '%sBytecode version: 0x%02x' % (ind, ver) + + off = dump_function(buf, off, ind + ' ') + + return off + +def main(): + parser = optparse.OptionParser() + parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump') + (opts, args) = parser.parse_args() + + with open(args[0], 'rb') as f: + d = f.read() + if opts.hex_decode: + d = d.strip() + d = d.decode('hex') + dump_bytecode(d, 0, '') + +if __name__ == '__main__': + main() diff --git a/tools/extract_caseconv.py b/tools/extract_caseconv.py new file mode 100644 index 00000000..614044b9 --- /dev/null +++ b/tools/extract_caseconv.py @@ -0,0 +1,444 @@ +#!/usr/bin/env python2 +# +# Extract rules for Unicode case conversion, specifically the behavior +# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The +# bitstream encoded rules are used for the slow path at run time, so +# compactness is favored over speed. +# +# There is no support for context or locale sensitive rules, as they +# are handled directly in C code before consulting tables generated +# here. Ecmascript requires case conversion both with and without +# locale/language specific rules (e.g. String.prototype.toLowerCase() +# and String.prototype.toLocaleLowerCase()), so they are best handled +# in C anyway. +# +# Case conversion rules for ASCII are also excluded as they are +# handled by C fast path. Rules for non-BMP characters (codepoints +# above U+FFFF) are omitted as they're not required for standard +# Ecmascript. +# + +import os, sys, math +import optparse +import dukutil + +class UnicodeData: + "Read UnicodeData.txt into an internal representation." + + def __init__(self, filename): + self.data = self.read_unicode_data(filename) + print 'read %d unicode data entries' % len(self.data) + + def read_unicode_data(self, filename): + res = [] + f = open(filename, 'rb') + for line in f: + if line.startswith('#'): + continue + line = line.strip() + if line == '': + continue + parts = line.split(';') + if len(parts) != 15: + raise Exception('invalid unicode data line') + res.append(parts) + f.close() + + # Sort based on Unicode codepoint + def mycmp(a,b): + return cmp(long(a[0], 16), long(b[0], 16)) + + res.sort(cmp=mycmp) + return res + +class SpecialCasing: + "Read SpecialCasing.txt into an internal representation." + + def __init__(self, filename): + self.data = self.read_special_casing_data(filename) + print 'read %d special casing entries' % len(self.data) + + def read_special_casing_data(self, filename): + res = [] + f = open(filename, 'rb') + for line in f: + try: + idx = line.index('#') + line = line[:idx] + except ValueError: + pass + line = line.strip() + if line == '': + continue + parts = line.split(';') + parts = [i.strip() for i in parts] + while len(parts) < 6: + parts.append('') + res.append(parts) + f.close() + return res + +def parse_unicode_sequence(x): + res = '' + for i in x.split(' '): + i = i.strip() + if i == '': + continue + res += unichr(long(i, 16)) + return res + +def get_base_conversion_maps(unicode_data): + "Create case conversion tables without handling special casing yet." + + uc = {} # codepoint (number) -> string + lc = {} + tc = {} # titlecase + + for x in unicode_data.data: + c1 = long(x[0], 16) + + # just 16-bit support needed + if c1 >= 0x10000: + continue + + if x[12] != '': + # field 12: simple uppercase mapping + c2 = parse_unicode_sequence(x[12]) + uc[c1] = c2 + tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary + if x[13] != '': + # field 13: simple lowercase mapping + c2 = parse_unicode_sequence(x[13]) + lc[c1] = c2 + if x[14] != '': + # field 14: simple titlecase mapping + c2 = parse_unicode_sequence(x[14]) + tc[c1] = c2 + + return uc, lc, tc + +def update_special_casings(uc, lc, tc, special_casing): + "Update case conversion tables with special case conversion rules." + + for x in special_casing.data: + c1 = long(x[0], 16) + + if x[4] != '': + # conditions + continue + + lower = parse_unicode_sequence(x[1]) + title = parse_unicode_sequence(x[2]) + upper = parse_unicode_sequence(x[3]) + + if len(lower) > 1: + lc[c1] = lower + if len(upper) > 1: + uc[c1] = upper + if len(title) > 1: + tc[c1] = title + + print 'special case: %d %d %d' % (len(lower), len(upper), len(title)) + +def remove_ascii_part(convmap): + "Remove ASCII case conversion parts (handled by C fast path)." + + for i in xrange(128): + if convmap.has_key(i): + del convmap[i] + +def scan_range_with_skip(convmap, start_idx, skip): + "Scan for a range of continuous case conversion with a certain 'skip'." + + conv_i = start_idx + if not convmap.has_key(conv_i): + return None, None, None + elif len(convmap[conv_i]) > 1: + return None, None, None + else: + conv_o = ord(convmap[conv_i]) + + start_i = conv_i + start_o = conv_o + + while True: + new_i = conv_i + skip + new_o = conv_o + skip + + if not convmap.has_key(new_i): + break + if len(convmap[new_i]) > 1: + break + if ord(convmap[new_i]) != new_o: + break + + conv_i = new_i + conv_o = new_o + + # [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char + count = (conv_i - start_i) / skip + 1 + if count <= 1: + return None, None, None + + # we have an acceptable range, remove them from the convmap here + for i in xrange(start_i, conv_i + skip, skip): + del convmap[i] + + return start_i, start_o, count + +def find_first_range_with_skip(convmap, skip): + "Find first range with a certain 'skip' value." + + for i in xrange(65536): + start_i, start_o, count = scan_range_with_skip(convmap, i, skip) + if start_i is None: + continue + return start_i, start_o, count + + return None, None, None + +def generate_tables(convmap): + "Generate bit-packed case conversion table for a given conversion map." + + # The bitstream encoding is based on manual inspection for whatever + # regularity the Unicode case conversion rules have. + # + # Start with a full description of case conversions which does not + # cover all codepoints; unmapped codepoints convert to themselves. + # Scan for range-to-range mappings with a range of skips starting from 1. + # Whenever a valid range is found, remove it from the map. Finally, + # output the remaining case conversions (1:1 and 1:n) on a per codepoint + # basis. + # + # This is very slow because we always scan from scratch, but its the + # most reliable and simple way to scan + + ranges = [] # range mappings (2 or more consecutive mappings with a certain skip) + singles = [] # 1:1 character mappings + complex = [] # 1:n character mappings + + # Ranges with skips + + for skip in xrange(1,6+1): # skips 1...6 are useful + while True: + start_i, start_o, count = find_first_range_with_skip(convmap, skip) + if start_i is None: + break + print 'skip %d: %d %d %d' % (skip, start_i, start_o, count) + ranges.append([start_i, start_o, count, skip]) + + # 1:1 conversions + + k = convmap.keys() + k.sort() + for i in k: + if len(convmap[i]) > 1: + continue + singles.append([i, ord(convmap[i])]) # codepoint, codepoint + del convmap[i] + + # There are many mappings to 2-char sequences with latter char being U+0399. + # These could be handled as a special case, but we don't do that right now. + # + # [8064L, u'\u1f08\u0399'] + # [8065L, u'\u1f09\u0399'] + # [8066L, u'\u1f0a\u0399'] + # [8067L, u'\u1f0b\u0399'] + # [8068L, u'\u1f0c\u0399'] + # [8069L, u'\u1f0d\u0399'] + # [8070L, u'\u1f0e\u0399'] + # [8071L, u'\u1f0f\u0399'] + # ... + # + # tmp = {} + # k = convmap.keys() + # k.sort() + # for i in k: + # if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399': + # tmp[i] = convmap[i][0] + # del convmap[i] + # print repr(tmp) + # + # skip = 1 + # while True: + # start_i, start_o, count = find_first_range_with_skip(tmp, skip) + # if start_i is None: + # break + # print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count) + # print len(tmp.keys()) + # print repr(tmp) + # XXX: need to put 12 remaining mappings back to convmap... + + # 1:n conversions + + k = convmap.keys() + k.sort() + for i in k: + complex.append([i, convmap[i]]) # codepoint, string + del convmap[i] + + for t in singles: + print repr(t) + + for t in complex: + print repr(t) + + print 'range mappings: %d' % len(ranges) + print 'single character mappings: %d' % len(singles) + print 'complex mappings (1:n): %d' % len(complex) + print 'remaining (should be zero): %d' % len(convmap.keys()) + + # XXX: opportunities for diff encoding skip=3 ranges? + prev = None + for t in ranges: + # range: [start_i, start_o, count, skip] + if t[3] != 3: + continue + if prev is not None: + print '%d %d' % (t[0] - prev[0], t[1] - prev[1]) + else: + print 'start: %d %d' % (t[0], t[1]) + prev = t + + # bit packed encoding + + be = dukutil.BitEncoder() + + for curr_skip in xrange(1, 7): # 1...6 + count = 0 + for r in ranges: + start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] + if skip != curr_skip: + continue + count += 1 + be.bits(count, 6) + print 'encode: skip=%d, count=%d' % (curr_skip, count) + + for r in ranges: + start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] + if skip != curr_skip: + continue + be.bits(start_i, 16) + be.bits(start_o, 16) + be.bits(r_count, 7) + be.bits(0x3f, 6) # maximum count value = end of skips + + count = len(singles) + be.bits(count, 6) + for t in singles: + cp_i, cp_o = t[0], t[1] + be.bits(cp_i, 16) + be.bits(cp_o, 16) + + count = len(complex) + be.bits(count, 7) + for t in complex: + cp_i, str_o = t[0], t[1] + be.bits(cp_i, 16) + be.bits(len(str_o), 2) + for i in xrange(len(str_o)): + be.bits(ord(str_o[i]), 16) + + return be.getBytes(), be.getNumBits() + +def generate_regexp_canonicalize_lookup(convmap): + res = [] + + highest_nonid = -1 + + for cp in xrange(65536): + res_cp = cp # default to as is + if convmap.has_key(cp): + tmp = convmap[cp] + if len(tmp) == 1: + # Multiple codepoints from input, ignore + res_cp = ord(tmp[0]) + if cp >= 0x80 and res_cp < 0x80: + res_cp = cp # non-ASCII mapped to ASCII, ignore + + if cp != res_cp: + highest_nonid = cp + + res.append(res_cp) + + # At the moment this is 65370, which means there's very little + # gain in assuming 1:1 mapping above a certain BMP codepoint. + print('HIGHEST NON-ID MAPPING: %d' % highest_nonid) + return res + +def clonedict(x): + "Shallow clone of input dict." + res = {} + for k in x.keys(): + res[k] = x[k] + return res + +def main(): + parser = optparse.OptionParser() + parser.add_option('--command', dest='command', default='caseconv_bitpacked') + parser.add_option('--unicode-data', dest='unicode_data') + parser.add_option('--special-casing', dest='special_casing') + parser.add_option('--out-source', dest='out_source') + parser.add_option('--out-header', dest='out_header') + parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc') + parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc') + parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup') + (opts, args) = parser.parse_args() + + unicode_data = UnicodeData(opts.unicode_data) + special_casing = SpecialCasing(opts.special_casing) + + uc, lc, tc = get_base_conversion_maps(unicode_data) + update_special_casings(uc, lc, tc, special_casing) + + if opts.command == 'caseconv_bitpacked': + # XXX: ASCII and non-BMP filtering could be an option but is now hardcoded + + # ascii is handled with 'fast path' so not needed here + t = clonedict(uc) + remove_ascii_part(t) + uc_bytes, uc_nbits = generate_tables(t) + + t = clonedict(lc) + remove_ascii_part(t) + lc_bytes, lc_nbits = generate_tables(t) + + # Generate C source and header files + genc = dukutil.GenerateC() + genc.emitHeader('extract_caseconv.py') + genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True) + genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True) + f = open(opts.out_source, 'wb') + f.write(genc.getString()) + f.close() + + genc = dukutil.GenerateC() + genc.emitHeader('extract_caseconv.py') + genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes))) + genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes))) + f = open(opts.out_header, 'wb') + f.write(genc.getString()) + f.close() + elif opts.command == 're_canon_lookup': + # direct canonicalization lookup for case insensitive regexps, includes ascii part + t = clonedict(uc) + re_canon_lookup = generate_regexp_canonicalize_lookup(t) + + genc = dukutil.GenerateC() + genc.emitHeader('extract_caseconv.py') + genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True) + f = open(opts.out_source, 'wb') + f.write(genc.getString()) + f.close() + + genc = dukutil.GenerateC() + genc.emitHeader('extract_caseconv.py') + genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup))) + f = open(opts.out_header, 'wb') + f.write(genc.getString()) + f.close() + else: + raise Exception('invalid command: %r' % opts.command) + +if __name__ == '__main__': + main() diff --git a/tools/extract_chars.py b/tools/extract_chars.py new file mode 100644 index 00000000..77e061c7 --- /dev/null +++ b/tools/extract_chars.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python2 +# +# Select a set of Unicode characters (based on included/excluded categories +# etc) and write out a compact bitstream for matching a character against +# the set at runtime. This is for the slow path, where we're especially +# concerned with compactness. A C source file with the table is written, +# together with a matching C header. +# +# Unicode categories (such as 'Z') can be used. Two pseudo-categories +# are also available for exclusion only: ASCII and NONBMP. "ASCII" +# category excludes ASCII codepoints which is useful because C code +# typically contains an ASCII fast path so ASCII characters don't need +# to be considered in the Unicode tables. "NONBMP" excludes codepoints +# above U+FFFF which is useful because such codepoints don't need to be +# supported in standard Ecmascript. +# + +import os, sys, math +import optparse +import dukutil + +def read_unicode_data(unidata, catsinc, catsexc, filterfunc): + "Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc." + res = [] + f = open(unidata, 'rb') + + def filter_none(cp): + return True + if filterfunc is None: + filterfunc = filter_none + + # The Unicode parsing is slow enough to warrant some speedups. + exclude_cat_exact = {} + for cat in catsexc: + exclude_cat_exact[cat] = True + include_cat_exact = {} + for cat in catsinc: + include_cat_exact[cat] = True + + for line in f: + #line = line.strip() + parts = line.split(';') + + codepoint = parts[0] + if not filterfunc(long(codepoint, 16)): + continue + + category = parts[2] + if exclude_cat_exact.has_key(category): + continue # quick reject + + rejected = False + for cat in catsexc: + if category.startswith(cat) or codepoint == cat: + rejected = True + break + if rejected: + continue + + if include_cat_exact.has_key(category): + res.append(line) + continue + + accepted = False + for cat in catsinc: + if category.startswith(cat) or codepoint == cat: + accepted = True + break + if accepted: + res.append(line) + + f.close() + + # Sort based on Unicode codepoint + def mycmp(a,b): + t1 = a.split(';') + t2 = b.split(';') + n1 = long(t1[0], 16) + n2 = long(t2[0], 16) + return cmp(n1, n2) + + res.sort(cmp=mycmp) + + return res + +def scan_ranges(lines): + "Scan continuous ranges from (filtered) UnicodeData.txt lines." + ranges = [] + range_start = None + prev = None + + for line in lines: + t = line.split(';') + n = long(t[0], 16) + if range_start is None: + range_start = n + else: + if n == prev + 1: + # continue range + pass + else: + ranges.append((range_start, prev)) + range_start = n + prev = n + + if range_start is not None: + ranges.append((range_start, prev)) + + return ranges + +def generate_png(lines, fname): + "Generate an illustrative PNG of the character set." + from PIL import Image + + m = {} + for line in lines: + t = line.split(';') + n = long(t[0], 16) + m[n] = 1 + + codepoints = 0x10ffff + 1 + width = int(256) + height = int(math.ceil(float(codepoints) / float(width))) + im = Image.new('RGB', (width, height)) + black = (0,0,0) + white = (255,255,255) + for cp in xrange(codepoints): + y = cp / width + x = cp % width + + if m.has_key(long(cp)): + im.putpixel((x,y), black) + else: + im.putpixel((x,y), white) + + im.save(fname) + +def generate_match_table1(ranges): + "Unused match table format." + + # This is an earlier match table format which is no longer used. + # IdentifierStart-UnicodeLetter has 445 ranges and generates a + # match table of 2289 bytes. + + data = [] + prev_re = None + + def genrange(rs, re): + if (rs > re): + raise Exception('assumption failed: rs=%d re=%d' % (rs, re)) + + while True: + now = re - rs + 1 + if now > 255: + now = 255 + data.append(now) # range now + data.append(0) # skip 0 + rs = rs + now + else: + data.append(now) # range now + break + + def genskip(ss, se): + if (ss > se): + raise Exception('assumption failed: ss=%d se=%s' % (ss, se)) + + while True: + now = se - ss + 1 + if now > 255: + now = 255 + data.append(now) # skip now + data.append(0) # range 0 + ss = ss + now + else: + data.append(now) # skip now + break + + for rs, re in ranges: + if prev_re is not None: + genskip(prev_re + 1, rs - 1) + genrange(rs, re) + prev_re = re + + num_entries = len(data) + + # header: start of first range + # num entries + hdr = [] + hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over + hdr.append(ranges[0][1] & 0xff) + hdr.append(num_entries >> 8) + hdr.append(num_entries & 0xff) + + return hdr + data + +def generate_match_table2(ranges): + "Unused match table format." + + # Another attempt at a match table which is also unused. + # Total tables for all current classes is now 1472 bytes. + + data = [] + + def enc(x): + while True: + if x < 0x80: + data.append(x) + break + data.append(0x80 + (x & 0x7f)) + x = x >> 7 + + prev_re = 0 + + for rs, re in ranges: + r1 = rs - prev_re # 1 or above (no unjoined ranges) + r2 = re - rs # 0 or above + enc(r1) + enc(r2) + prev_re = re + + enc(0) # end marker + + return data + +def generate_match_table3(ranges): + "Current match table format." + + # Yet another attempt, similar to generate_match_table2 except + # in packing format. + # + # Total match size now (at time of writing): 1194 bytes. + # + # This is the current encoding format used in duk_lexer.c. + + be = dukutil.BitEncoder() + + freq = [0] * (0x10ffff + 1) # informative + + def enc(x): + freq[x] += 1 + + if x <= 0x0e: + # 4-bit encoding + be.bits(x, 4) + return + x -= 0x0e + 1 + if x <= 0xfd: + # 12-bit encoding + be.bits(0x0f, 4) + be.bits(x, 8) + return + x -= 0xfd + 1 + if x <= 0xfff: + # 24-bit encoding + be.bits(0x0f, 4) + be.bits(0xfe, 8) + be.bits(x, 12) + return + x -= 0xfff + 1 + if True: + # 36-bit encoding + be.bits(0x0f, 4) + be.bits(0xff, 8) + be.bits(x, 24) + return + + raise Exception('cannot encode') + + prev_re = 0 + + for rs, re in ranges: + r1 = rs - prev_re # 1 or above (no unjoined ranges) + r2 = re - rs # 0 or above + enc(r1) + enc(r2) + prev_re = re + + enc(0) # end marker + + data, nbits = be.getBytes(), be.getNumBits() + return data, freq + +def main(): + parser = optparse.OptionParser() + parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt + parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt + parser.add_option('--include-categories', dest='include_categories') + parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE') + parser.add_option('--out-source', dest='out_source') + parser.add_option('--out-header', dest='out_header') + parser.add_option('--out-png', dest='out_png') + parser.add_option('--table-name', dest='table_name', default='match_table') + (opts, args) = parser.parse_args() + + unidata = opts.unicode_data + catsinc = [] + if opts.include_categories != '': + catsinc = opts.include_categories.split(',') + catsexc = [] + if opts.exclude_categories != 'NONE': + catsexc = opts.exclude_categories.split(',') + + print 'CATSEXC: %s' % repr(catsexc) + print 'CATSINC: %s' % repr(catsinc) + + # pseudocategories + filter_ascii = ('ASCII' in catsexc) + filter_nonbmp = ('NONBMP' in catsexc) + + # Read raw result + def filter1(x): + if filter_ascii and x <= 0x7f: + # exclude ascii + return False + if filter_nonbmp and x >= 0x10000: + # exclude non-bmp + return False + return True + + print('read unicode data') + res = read_unicode_data(unidata, catsinc, catsexc, filter1) + print('done reading unicode data') + + # Raw output + #print('RAW OUTPUT:') + #print('===========') + #print('\n'.join(res)) + + # Scan ranges + #print('') + #print('RANGES:') + #print('=======') + ranges = scan_ranges(res) + #for i in ranges: + # if i[0] == i[1]: + # print('0x%04x' % i[0]) + # else: + # print('0x%04x ... 0x%04x' % (i[0], i[1])) + #print('') + print('%d ranges total' % len(ranges)) + + # Generate match table + #print('') + #print('MATCH TABLE:') + #print('============') + #matchtable1 = generate_match_table1(ranges) + #matchtable2 = generate_match_table2(ranges) + matchtable3, freq = generate_match_table3(ranges) + #print 'match table: %s' % repr(matchtable3) + print 'match table length: %d bytes' % len(matchtable3) + print 'encoding freq:' + for i in xrange(len(freq)): + if freq[i] == 0: + continue + print ' %6d: %d' % (i, freq[i]) + + print('') + print('MATCH C TABLE -> file %s' % repr(opts.out_header)) + + # Create C source and header files + genc = dukutil.GenerateC() + genc.emitHeader('extract_chars.py') + genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True) + if opts.out_source is not None: + f = open(opts.out_source, 'wb') + f.write(genc.getString()) + f.close() + + genc = dukutil.GenerateC() + genc.emitHeader('extract_chars.py') + genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3))) + if opts.out_header is not None: + f = open(opts.out_header, 'wb') + f.write(genc.getString()) + f.close() + + # Image (for illustrative purposes only) + if opts.out_png is not None: + generate_png(res, opts.out_png) + +if __name__ == '__main__': + main() diff --git a/tools/extract_unique_options.py b/tools/extract_unique_options.py new file mode 100644 index 00000000..27ba6680 --- /dev/null +++ b/tools/extract_unique_options.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python2 +# +# Extract unique DUK_USE_xxx flags from current code base: +# +# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in +# + +import os, sys, re + +# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched +# (only uppercase allowed) +re_use = re.compile(r'DUK_USE_[A-Z0-9_]+') +re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+') + +def main(): + uses = {} + opts = {} + + for fn in sys.argv[1:]: + f = open(fn, 'rb') + for line in f: + for t in re.findall(re_use, line): + if t[-1] != '_': # skip e.g. 'DUK_USE_' + uses[t] = True + for t in re.findall(re_opt, line): + if t[-1] != '_': + opts[t] = True + f.close() + + k = opts.keys() + k.sort() + for i in k: + print(i) + + k = uses.keys() + k.sort() + for i in k: + print(i) + +if __name__ == '__main__': + main() diff --git a/tools/genbuildparams.py b/tools/genbuildparams.py new file mode 100644 index 00000000..ebf5f502 --- /dev/null +++ b/tools/genbuildparams.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python2 +# +# Generate build parameter files based on build information. +# A C header is generated for C code, and a JSON file for +# build scripts etc which need to know the build config. +# + +import os +import sys +import json +import optparse + +import dukutil + +if __name__ == '__main__': + parser = optparse.OptionParser() + parser.add_option('--version', dest='version') + parser.add_option('--git-commit', dest='git_commit') + parser.add_option('--git-describe', dest='git_describe') + parser.add_option('--git-branch', dest='git_branch') + parser.add_option('--out-json', dest='out_json') + parser.add_option('--out-header', dest='out_header') + (opts, args) = parser.parse_args() + + t = { + 'version': opts.version, + 'git_commit': opts.git_commit, + 'git_describe': opts.git_describe, + 'git_branch': opts.git_branch, + } + + f = open(opts.out_json, 'wb') + f.write(dukutil.json_encode(t).encode('ascii')) + f.close() + + f = open(opts.out_header, 'wb') + f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n') + f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n') + f.write('/* automatically generated by genbuildparams.py, do not edit */\n') + f.write('\n') + f.write('/* DUK_VERSION is defined in duktape.h */') + f.write('\n') + f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n') + f.close() diff --git a/tools/genbuiltins.py b/tools/genbuiltins.py new file mode 100644 index 00000000..1477cf36 --- /dev/null +++ b/tools/genbuiltins.py @@ -0,0 +1,2985 @@ +#!/usr/bin/env python2 +# +# Generate initialization data for built-in strings and objects. +# +# Supports two different initialization approaches: +# +# 1. Bit-packed format for unpacking strings and objects during +# heap or thread init into RAM-based structures. This is the +# default behavior. +# +# 2. Embedding strings and/or objects into a read-only data section +# at compile time. This is useful for low memory targets to reduce +# memory usage. Objects in data section will be immutable. +# +# Both of these have practical complications like endianness differences, +# pointer compression variants, object property table layout variants, +# and so on. Multiple #ifdef'd initializer sections are emitted to cover +# all supported alternatives. +# + +import os +import sys +import re +import traceback +import json +import yaml +import math +import struct +import optparse +import copy + +import dukutil + +# Fixed seed for ROM strings, must match src/duk_heap_alloc.c. +DUK__FIXED_HASH_SEED = 0xabcd1234 + +# Base value for compressed ROM pointers, used range is [ROMPTR_FIRST,0xffff]. +# Must match DUK_USE_ROM_PTRCOMP_FIRST (generated header checks). +ROMPTR_FIRST = 0xf800 # 2048 should be enough; now around ~1000 used + +# +# Miscellaneous helpers +# + +# Convert Unicode to bytes, identifying Unicode U+0000 to U+00FF as bytes. +# This representation is used in YAML metadata and allows invalid UTF-8 to +# be represented exactly (which is necessary). +def unicode_to_bytes(x): + if isinstance(x, str): + return x + tmp = '' + for c in x: + if ord(c) > 0xff: + raise Exception('invalid codepoint: %r' % x) + tmp += chr(ord(c)) + assert(isinstance(tmp, str)) + return tmp + +# Convert bytes to Unicode, identifying bytes as U+0000 to U+00FF. +def bytes_to_unicode(x): + if isinstance(x, unicode): + return x + tmp = u'' + for c in x: + tmp += unichr(ord(c)) + assert(isinstance(tmp, unicode)) + return tmp + +# Convert all strings in an object to bytes recursively. Useful for +# normalizing all strings in a YAML document. +def recursive_strings_to_bytes(doc): + def f(x): + if isinstance(x, unicode): + return unicode_to_bytes(x) + if isinstance(x, dict): + res = {} + for k in x.keys(): + res[f(k)] = f(x[k]) + return res + if isinstance(x, list): + res = [] + for e in x: + res.append(f(e)) + return res + return x + + return f(doc) + +# Convert all strings in an object to from bytes to Unicode recursively. +# Useful for writing back JSON/YAML dumps. +def recursive_bytes_to_strings(doc): + def f(x): + if isinstance(x, str): + return bytes_to_unicode(x) + if isinstance(x, dict): + res = {} + for k in x.keys(): + res[f(k)] = f(x[k]) + return res + if isinstance(x, list): + res = [] + for e in x: + res.append(f(e)) + return res + return x + + return f(doc) + +# Check if string is an "array index" in Ecmascript terms. +def string_is_arridx(v): + is_arridx = False + try: + ival = int(v) + if ival >= 0 and ival <= 0xfffffffe and ('%d' % ival == v): + is_arridx = True + except ValueError: + pass + + return is_arridx + +# +# Metadata loading, merging, and other preprocessing +# +# Final metadata object contains merged and normalized objects and strings. +# Keys added include (see more below): +# +# strings_stridx: string objects which have a stridx, matches stridx index order +# objects_bidx: objects which have a bidx, matches bidx index order +# objects_ram_toplevel: objects which are top level for RAM init +# +# Various helper keys are also added, containing auxiliary object/string +# lists, lookup maps, etc. See code below for details of these. +# + +def metadata_lookup_object(meta, obj_id): + return meta['_objid_to_object'][obj_id] + +def metadata_lookup_object_and_index(meta, obj_id): + for i,t in enumerate(meta['objects']): + if t['id'] == obj_id: + return t, i + return None, None + +def metadata_lookup_property(obj, key): + for p in obj['properties']: + if p['key'] == key: + return p + return None + +def metadata_lookup_property_and_index(obj, key): + for i,t in enumerate(obj['properties']): + if t['key'] == key: + return t, i + return None, None + +# Remove disabled objects and properties. +def metadata_remove_disabled(meta): + objlist = [] + for o in meta['objects']: + if o.get('disable', False): + print('Remove disabled object: %s' % o['id']) + else: + objlist.append(o) + + props = [] + for p in o['properties']: + if p.get('disable', False): + print('Remove disabled property: %s, object: %s' % (p['key'], o['id'])) + else: + props.append(p) + + o['properties'] = props + + meta['objects'] = objlist + +# Delete dangling references to removed/missing objects. +def metadata_delete_dangling_references_to_object(meta, obj_id): + for o in meta['objects']: + new_p = [] + for p in o['properties']: + v = p['value'] + ptype = None + if isinstance(v, dict): + ptype = p['value']['type'] + delprop = False + if ptype == 'object' and v['id'] == obj_id: + delprop = True + if ptype == 'accessor' and v.get('getter_id') == obj_id: + p['getter_id'] = None + if ptype == 'accessor' and v.get('setter_id') == obj_id: + p['setter_id'] = None + # XXX: Should empty accessor (= no getter, no setter) be deleted? + # If so, beware of shorthand. + if delprop: + print('Deleted property %s of object %s, points to deleted object %s' % \ + (p['key'], o['id'], obj_id)) + else: + new_p.append(p) + o['properties'] = new_p + +# Merge a user YAML file into current metadata. +def metadata_merge_user_objects(meta, user_meta): + if user_meta.has_key('add_objects'): + raise Exception('"add_objects" removed, use "objects" with "add: True"') + if user_meta.has_key('replace_objects'): + raise Exception('"replace_objects" removed, use "objects" with "replace: True"') + if user_meta.has_key('modify_objects'): + raise Exception('"modify_objects" removed, use "objects" with "modify: True"') + + for o in user_meta.get('objects', []): + if o.get('disable', False): + print('Skip disabled object: %s' % o['id']) + continue + targ, targ_idx = metadata_lookup_object_and_index(meta, o['id']) + + if o.get('delete', False): + print('Delete object: %s' % targ['id']) + if targ is None: + raise Exception('Cannot delete object %s which doesn\'t exist' % o['id']) + meta['objects'].pop(targ_idx) + metadata_delete_dangling_references_to_object(meta, targ['id']) + continue + + if o.get('replace', False): + print('Replace object %s' % o['id']) + if targ is None: + print('WARNING: object to be replaced doesn\'t exist, append new object') + meta['objects'].append(o) + else: + meta['objects'][targ_idx] = o + continue + + if o.get('add', False) or not o.get('modify', False): # 'add' is the default + print('Add object %s' % o['id']) + if targ is not None: + raise Exception('Cannot add object %s which already exists' % o['id']) + meta['objects'].append(o) + continue + + assert(o.get('modify', False)) # modify handling + if targ is None: + raise Exception('Cannot modify object %s which doesn\'t exist' % o['id']) + + for k in sorted(o.keys()): + # Merge top level keys by copying over, except 'properties' + if k == 'properties': + continue + targ[k] = o[k] + for p in o.get('properties', []): + if p.get('disable', False): + print('Skip disabled property: %s' % p['key']) + continue + prop = None + prop_idx = None + prop, prop_idx = metadata_lookup_property_and_index(targ, p['key']) + if prop is not None: + if p.get('delete', False): + print('Delete property %s of %s' % (p['key'], o['id'])) + targ['properties'].pop(prop_idx) + else: + print('Replace property %s of %s' % (p['key'], o['id'])) + targ['properties'][prop_idx] = p + else: + if p.get('delete', False): + print('Deleting property %s of %s: doesn\'t exist, nop' % (p['key'], o['id'])) + else: + print('Add property %s of %s' % (p['key'], o['id'])) + targ['properties'].append(p) + +# Normalize nargs for top level functions by defaulting 'nargs' from 'length'. +def metadata_normalize_nargs_length(meta): + # Default 'nargs' from 'length' for top level function objects. + for o in meta['objects']: + if o.has_key('nargs'): + continue + if not o.get('callable', False): + continue + for p in o['properties']: + if p['key'] != 'length': + continue + #print('Default nargs for top level: %r' % p) + assert(isinstance(p['value'], int)) + o['nargs'] = p['value'] + break + assert(o.has_key('nargs')) + + # Default 'nargs' from 'length' for function property shorthand. + for o in meta['objects']: + for p in o['properties']: + if not (isinstance(p['value'], dict) and p['value']['type'] == 'function'): + continue + pval = p['value'] + if not pval.has_key('length'): + print('Default length for function shorthand: %r' % p) + pval['length'] = 0 + if not pval.has_key('nargs'): + #print('Default nargs for function shorthand: %r' % p) + pval['nargs'] = pval['length'] + +# Prepare a list of built-in objects which need a runtime 'bidx'. +def metadata_prepare_objects_bidx(meta): + objlist = meta['objects'] + meta['objects'] = [] + meta['objects_bidx'] = [] + objid_map = {} # temp map + + # Build helper index. + for o in objlist: + objid_map[o['id']] = o + + # Use 'builtins' as the bidx list with no filtering for now. + # Ideally we'd scan the actually needed indices from the source. + for o in meta['builtins']: + # No filtering now, just use list as is + obj = objid_map[o['id']] + obj['bidx_used'] = True + meta['objects'].append(obj) + meta['objects_bidx'].append(obj) + + # Append remaining objects. + for o in objlist: + if o.get('bidx_used', False): + # Already in meta['objects']. + pass + else: + meta['objects'].append(o) + +# Normalize metadata property shorthand. For example, if a proprety value +# is a shorthand function, create a function object and change the property +# to point to that function object. +def metadata_normalize_shorthand(meta): + # Gather objects through the top level built-ins list. + objs = [] + subobjs = [] + + def getSubObject(): + obj = {} + obj['id'] = 'subobj_%d' % len(subobjs) # synthetic ID + obj['properties'] = [] + obj['auto_generated'] = True # mark as autogenerated (just FYI) + subobjs.append(obj) + return obj + + def decodeFunctionShorthand(funprop): + # Convert the built-in function property "shorthand" into an actual + # object for ROM built-ins. + assert(funprop['value']['type'] == 'function') + val = funprop['value'] + obj = getSubObject() + props = obj['properties'] + obj['native'] = val['native'] + obj['nargs'] = val.get('nargs', val['length']) + obj['varargs'] = val.get('varargs', False) + obj['magic'] = val.get('magic', 0) + obj['internal_prototype'] = 'bi_function_prototype' + obj['class'] = 'Function' + obj['callable'] = True + obj['constructable'] = val.get('constructable', False) + props.append({ 'key': 'length', 'value': val['length'], 'attributes': '' }) + props.append({ 'key': 'name', 'value': funprop['key'], 'attributes': '' }) + return obj + + def addAccessor(funprop, magic, nargs, length, name, native_func): + assert(funprop['value']['type'] == 'accessor') + obj = getSubObject() + props = obj['properties'] + obj['native'] = native_func + obj['nargs'] = nargs + obj['varargs'] = False + obj['magic'] = magic + obj['internal_prototype'] = 'bi_function_prototype' + obj['class'] = 'Function' + obj['callable'] = True + obj['constructable'] = False + # Shorthand accessors are minimal and have no .length or .name + # right now. Use longhand if these matter. + #props.append({ 'key': 'length', 'value': length, 'attributes': '' }) + #props.append({ 'key': 'name', 'value': name, 'attributes': '' }) + return obj + + def decodeGetterShorthand(key, funprop): + assert(funprop['value']['type'] == 'accessor') + val = funprop['value'] + return addAccessor(funprop, + val['getter_magic'], + val['getter_nargs'], + val.get('getter_length', 0), + key, + val['getter']) + + def decodeSetterShorthand(key, funprop): + assert(funprop['value']['type'] == 'accessor') + val = funprop['value'] + return addAccessor(funprop, + val['setter_magic'], + val['setter_nargs'], + val.get('setter_length', 0), + key, + val['setter']) + + def decodeStructuredValue(val): + #print('Decode structured value: %r' % val) + if isinstance(val, (int, long, float, str)): + return val # as is + elif isinstance(val, (dict)): + # Object: decode recursively + obj = decodeStructuredObject(val) + return { 'type': 'object', 'id': obj['id'] } + elif isinstance(val, (list)): + raise Exception('structured shorthand does not yet support array literals') + else: + raise Exception('unsupported value in structured shorthand: %r' % v) + + def decodeStructuredObject(val): + # XXX: We'd like to preserve dict order from YAML source but + # Python doesn't do that. Use sorted order to make the result + # deterministic. User can always use longhand for exact + # property control. + + #print('Decode structured object: %r' % val) + obj = getSubObject() + obj['class'] = 'Object' + obj['internal_prototype'] = 'bi_object_prototype' + + props = obj['properties'] + keys = sorted(val.keys()) + for k in keys: + #print('Decode property %s' % k) + prop = { 'key': k, 'value': decodeStructuredValue(val[k]), 'attributes': 'wec' } + props.append(prop) + + return obj + + def decodeStructuredShorthand(structprop): + assert(structprop['value']['type'] == 'structured') + val = structprop['value']['value'] + return decodeStructuredValue(val) + + def clonePropShared(prop): + res = {} + for k in [ 'key', 'attributes', 'autoLightfunc' ]: + if prop.has_key(k): + res[k] = prop[k] + return res + + for idx,obj in enumerate(meta['objects']): + props = [] + repl_props = [] + + for val in obj['properties']: + # Date.prototype.toGMTString must point to the same Function object + # as Date.prototype.toUTCString, so special case hack it here. + if obj['id'] == 'bi_date_prototype' and val['key'] == 'toGMTString': + #print('Skip Date.prototype.toGMTString') + continue + + if isinstance(val['value'], dict) and val['value']['type'] == 'function': + # Function shorthand. + subfun = decodeFunctionShorthand(val) + prop = clonePropShared(val) + prop['value'] = { 'type': 'object', 'id': subfun['id'] } + repl_props.append(prop) + elif isinstance(val['value'], dict) and val['value']['type'] == 'accessor' and \ + (val['value'].has_key('getter') or val['value'].has_key('setter')): + # Accessor normal and shorthand forms both use the type 'accessor', + # but are differentiated by properties. + sub_getter = decodeGetterShorthand(val['key'], val) + sub_setter = decodeSetterShorthand(val['key'], val) + prop = clonePropShared(val) + prop['value'] = { 'type': 'accessor', 'getter_id': sub_getter['id'], 'setter_id': sub_setter['id'] } + assert('a' in prop['attributes']) # If missing, weird things happen runtime + #print('Expand accessor shorthand: %r -> %r' % (val, prop)) + repl_props.append(prop) + elif isinstance(val['value'], dict) and val['value']['type'] == 'structured': + # Structured shorthand. + subval = decodeStructuredShorthand(val) + prop = clonePropShared(val) + prop['value'] = subval + repl_props.append(prop) + print('Decoded structured shorthand for object %s, property %s' % (obj['id'], val['key'])) + elif isinstance(val['value'], dict) and val['value']['type'] == 'buffer': + # Duktape buffer type not yet supported. + raise Exception('Buffer type not yet supported for builtins: %r' % val) + elif isinstance(val['value'], dict) and val['value']['type'] == 'pointer': + # Duktape pointer type not yet supported. + raise Exception('Pointer type not yet supported for builtins: %r' % val) + else: + # Property already in normalized form. + repl_props.append(val) + + if obj['id'] == 'bi_date_prototype' and val['key'] == 'toUTCString': + #print('Clone Date.prototype.toUTCString to Date.prototype.toGMTString') + prop2 = copy.deepcopy(repl_props[-1]) + prop2['key'] = 'toGMTString' + repl_props.append(prop2) + + # Replace properties with a variant where function properties + # point to built-ins rather than using an inline syntax. + obj['properties'] = repl_props + + len_before = len(meta['objects']) + meta['objects'] += subobjs + len_after = len(meta['objects']) + + print('Normalized metadata shorthand, %d objects -> %d final objects' % (len_before, len_after)) + +# Normalize property attribute order, default attributes, etc. +def metadata_normalize_property_attributes(meta): + for o in meta['objects']: + for p in o['properties']: + orig_attrs = p.get('attributes', None) + is_accessor = (isinstance(p['value'], dict) and p['value']['type'] == 'accessor') + + # If missing, set default attributes. + attrs = orig_attrs + if attrs is None: + if is_accessor: + attrs = 'ca' # accessor default is configurable + else: + attrs = 'wc' # default is writable, configurable + #print('Defaulted attributes of %s/%s to %s' % (o['id'], p['key'], attrs)) + + # Decode flags to normalize their order in the end. + writable = 'w' in attrs + enumerable = 'e' in attrs + configurable = 'c' in attrs + accessor = 'a' in attrs + + # Force 'accessor' attribute for accessors. + if is_accessor and not accessor: + #print('Property %s is accessor but has no "a" attribute, add attribute' % p['key']) + accessor = True + + # Normalize order and write back. + attrs = '' + if writable: + attrs += 'w' + if enumerable: + attrs += 'e' + if configurable: + attrs += 'c' + if accessor: + attrs += 'a' + p['attributes'] = attrs + + if orig_attrs != attrs: + #print('Updated attributes of %s/%s from %r to %r' % (o['id'], p['key'], orig_attrs, attrs)) + pass + +# Normalize ROM property attributes. +def metadata_normalize_rom_property_attributes(meta): + for o in meta['objects']: + for p in o['properties']: + # ROM properties must not be configurable (runtime code + # depends on this). Writability is kept so that instance + # objects can override parent properties. + p['attributes'] = p['attributes'].replace('c', '') + +# Add a 'name' property for all top level functions; expected by RAM +# initialization code. +def metadata_normalize_ram_function_names(meta): + for o in meta['objects']: + if not o.get('callable', False): + continue + name_prop = None + for p in o['properties']: + if p['key'] == 'name': + name_prop = p + break + if name_prop is None: + print('Adding missing "name" property for top level function %s' % o['id']) + o['properties'].append({ 'key': 'name', 'value': '', 'attributes': '' }) + +# Add a built-in objects list for RAM initialization. +def metadata_add_ram_filtered_object_list(meta): + # For RAM init data to support user objects, we need to prepare a + # filtered top level object list, containing only those objects which + # need a value stack index during duk_hthread_builtins.c init process. + # + # Objects in meta['objects'] which are covered by inline property + # notation in the init data (this includes e.g. member functions like + # Math.cos) must not be present. + + objlist = [] + for o in meta['objects']: + keep = o.get('bidx_used', False) + if o.has_key('native') and not o.has_key('bidx'): + # Handled inline by run-time init code + pass + else: + # Top level object + keep = True + if keep: + objlist.append(o) + + print('Filtered RAM object list: %d objects with bidx, %d total top level objects' % \ + (len(meta['objects_bidx']), len(objlist))) + + meta['objects_ram_toplevel'] = objlist + +# Add missing strings into strings metadata. For example, if an object +# property key is not part of the strings list, append it there. This +# is critical for ROM builtins because all property keys etc must also +# be in ROM. +def metadata_normalize_missing_strings(meta, user_meta): + # We just need plain strings here. + strs_have = {} + for s in meta['strings']: + strs_have[s['str']] = True + + # For ROM builtins all the strings must be in the strings list, + # so scan objects for any strings not explicitly listed in metadata. + for idx, obj in enumerate(meta['objects']): + for prop in obj['properties']: + key = prop['key'] + if not strs_have.get(key): + #print('Add missing string: %r' % key) + meta['strings'].append({ 'str': key, '_auto_add_ref': True }) + strs_have[key] = True + if prop.has_key('value') and isinstance(prop['value'], (str, unicode)): + val = unicode_to_bytes(prop['value']) # XXX: should already be + if not strs_have.get(val): + #print('Add missing string: %r' % val) + meta['strings'].append({ 'str': val, '_auto_add_ref': True }) + strs_have[val] = True + + # Force user strings to be in ROM data. + for s in user_meta.get('add_forced_strings', []): + if not strs_have.get(s['str']): + #print('Add user string: %r' % s['str']) + s['_auto_add_user'] = True + meta['strings'].append(s) + +# Convert built-in function properties into lightfuncs where applicable. +def metadata_convert_lightfuncs(meta): + num_converted = 0 + num_skipped = 0 + + for o in meta['objects']: + for p in o['properties']: + v = p['value'] + ptype = None + if isinstance(v, dict): + ptype = p['value']['type'] + if ptype != 'object': + continue + targ, targ_idx = metadata_lookup_object_and_index(meta, p['value']['id']) + + reasons = [] + if not targ.get('callable', False): + reasons.append('not-callable') + #if targ.get('constructable', False): + # reasons.append('constructable') + + lf_len = 0 + for p2 in targ['properties']: + # Don't convert if function has more properties than + # we're willing to sacrifice. + #print(' - Check %r . %s' % (o.get('id', None), p2['key'])) + if p2['key'] == 'length' and isinstance(p2['value'], (int, long)): + lf_len = p2['value'] + if p2['key'] not in [ 'length', 'name' ]: + reasons.append('nonallowed-property') + + if not p.get('autoLightfunc', True): + print('Automatic lightfunc conversion rejected for key %s, explicitly requested in metadata' % p['key']) + reasons.append('no-auto-lightfunc') + + # lf_len comes from actual property table (after normalization) + if targ.has_key('magic'): + try: + # Magic values which resolve to 'bidx' indices cannot + # be resolved here yet, because the bidx map is not + # yet ready. If so, reject the lightfunc conversion + # for now. In practice this doesn't matter. + lf_magic = resolve_magic(targ.get('magic'), {}) # empty map is a "fake" bidx map + #print('resolved magic ok -> %r' % lf_magic) + except Exception, e: + #print('Failed to resolve magic for %r: %r' % (p['key'], e)) + reasons.append('magic-resolve-failed') + lf_magic = 0xffffffff # dummy, will be out of bounds + else: + lf_magic = 0 + if targ.get('varargs', True): + lf_nargs = None + lf_varargs = True + else: + lf_nargs = targ['nargs'] + lf_varargs = False + + if lf_len < 0 or lf_len > 15: + #print('lf_len out of bounds: %r' % lf_len) + reasons.append('len-bounds') + if lf_magic < -0x80 or lf_magic > 0x7f: + #print('lf_magic out of bounds: %r' % lf_magic) + reasons.append('magic-bounds') + if not lf_varargs and (lf_nargs < 0 or lf_nargs > 14): + #print('lf_nargs out of bounds: %r' % lf_nargs) + reasons.append('nargs-bounds') + + if len(reasons) > 0: + #print('Don\'t convert to lightfunc: %r %r (%r): %r' % (o.get('id', None), p.get('key', None), p['value']['id'], reasons)) + num_skipped += 1 + continue + + p_id = p['value']['id'] + p['value'] = { + 'type': 'lightfunc', + 'native': targ['native'], + 'length': lf_len, + 'magic': lf_magic, + 'nargs': lf_nargs, + 'varargs': lf_varargs + } + #print(' - Convert to lightfunc: %r %r (%r) -> %r' % (o.get('id', None), p.get('key', None), p_id, p['value'])) + + num_converted += 1 + + print('Converted %d built-in function properties to lightfuncs, %d skipped as non-eligible' % (num_converted, num_skipped)) + +# Detect objects not reachable from any object with a 'bidx'. This is usually +# a user error because such objects can't be reached at runtime so they're +# useless in RAM or ROM init data. +def metadata_remove_orphan_objects(meta): + reachable = {} + + for o in meta['objects']: + if o.get('bidx_used', False): + reachable[o['id']] = True + + while True: + reachable_count = len(reachable.keys()) + + def _markId(obj_id): + if obj_id is None: + return + reachable[obj_id] = True + + for o in meta['objects']: + if not reachable.has_key(o['id']): + continue + for p in o['properties']: + # Shorthand has been normalized so no need + # to support it here. + v = p['value'] + ptype = None + if isinstance(v, dict): + ptype = p['value']['type'] + if ptype == 'object': + _markId(v['id']) + if ptype == 'accessor': + _markId(v.get('getter_id')) + _markId(v.get('setter_id')) + + print('Mark reachable: reachable count initially %d, now %d' % \ + (reachable_count, len(reachable.keys()))) + if reachable_count == len(reachable.keys()): + break + + num_deleted = 0 + deleted = True + while deleted: + deleted = False + for i,o in enumerate(meta['objects']): + if not reachable.has_key(o['id']): + #print('WARNING: object %s not reachable, dropping' % o['id']) + meta['objects'].pop(i) + deleted = True + num_deleted += 1 + break + + print('Deleted %d unreachable objects' % num_deleted) + +# Add C define names for builtin strings. These defines are added to all +# strings, even when they won't get a stridx because the define names are +# used to autodetect referenced strings. +def metadata_add_string_define_names(strlist, special_defs): + for s in strlist: + v = s['str'] + + if special_defs.has_key(v): + s['define'] = 'DUK_STRIDX_' + special_defs[v] + continue + + if len(v) >= 1 and v[0] == '\xff': + pfx = 'DUK_STRIDX_INT_' + v = v[1:] + else: + pfx = 'DUK_STRIDX_' + + t = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', v) # add underscores: aB -> a_B + s['define'] = pfx + t.upper() + +# Add a 'stridx_used' flag for strings which need a stridx. +def metadata_add_string_used_stridx(strlist, used_stridx_meta): + defs_needed = {} + defs_found = {} + for s in used_stridx_meta['used_stridx_defines']: + defs_needed[s] = True + + # strings whose define is referenced + for s in strlist: + if s.has_key('define') and defs_needed.has_key(s['define']): + s['stridx_used'] = True + defs_found[s['define']] = True + + # duk_lexer.h needs all reserved words + for s in strlist: + if s.get('reserved_word', False): + s['stridx_used'] = True + + # ensure all needed defines are provided + defs_found['DUK_STRIDX_START_RESERVED'] = True # special defines provided automatically + defs_found['DUK_STRIDX_START_STRICT_RESERVED'] = True + defs_found['DUK_STRIDX_END_RESERVED'] = True + defs_found['DUK_STRIDX_TO_TOK'] = True + for k in sorted(defs_needed.keys()): + if not defs_found.has_key(k): + raise Exception('source code needs define %s not provided by strings' % repr(k)) + +# Merge duplicate strings in string metadata. +def metadata_merge_string_entries(strlist): + # The raw string list may contain duplicates so merge entries. + # The list is processed in reverse because the last entry should + # "win" and keep its place (this matters for reserved words). + + strs = [] + str_map = {} # plain string -> object in strs[] + tmp = copy.deepcopy(strlist) + tmp.reverse() + for s in tmp: + prev = str_map.get(s['str']) + if prev is not None: + for k in s.keys(): + if prev.has_key(k) and prev[k] != s[k]: + raise Exception('fail to merge string entry, conflicting keys: %r <-> %r' % (prev, s)) + prev[k] = s[k] + else: + strs.append(s) + str_map[s['str']] = s + strs.reverse() + return strs + +# Order builtin strings (strings with a stridx) into an order satisfying +# multiple constraints. +def metadata_order_builtin_strings(input_strlist, keyword_list, strip_unused_stridx=False): + # Strings are ordered in the result as follows: + # 1. Non-reserved words requiring 8-bit indices + # 2. Non-reserved words not requiring 8-bit indices + # 3. Reserved words in non-strict mode only + # 4. Reserved words in strict mode + # + # Reserved words must follow an exact order because they are + # translated to/from token numbers by addition/subtraction. + # Some strings require an 8-bit index and must be in the + # beginning. + + tmp_strs = [] + for s in copy.deepcopy(input_strlist): + if not s.get('stridx_used', False): + # Drop strings which are not actually needed by src/*.(c|h). + # Such strings won't be in heap->strs[] or ROM legacy list. + pass + else: + tmp_strs.append(s) + + # The reserved word list must match token order in duk_lexer.h + # exactly, so pluck them out first. + + str_index = {} + kw_index = {} + keywords = [] + strs = [] + for idx,s in enumerate(tmp_strs): + str_index[s['str']] = s + for idx,s in enumerate(keyword_list): + keywords.append(str_index[s]) + kw_index[s] = True + for idx,s in enumerate(tmp_strs): + if not kw_index.has_key(s['str']): + strs.append(s) + + # Sort the strings by category number; within category keep + # previous order. + + for idx,s in enumerate(strs): + s['_idx'] = idx # for ensuring stable sort + + def req8Bit(s): + return s.get('class_name', False) # currently just class names + + def getCat(s): + req8 = req8Bit(s) + if s.get('reserved_word', False): + # XXX: unused path now, because keywords are "plucked out" + # explicitly. + assert(not req8) + if s.get('future_reserved_word_strict', False): + return 4 + else: + return 3 + elif req8: + return 1 + else: + return 2 + + def sortCmp(a,b): + return cmp( (getCat(a),a['_idx']), (getCat(b),b['_idx']) ) + + strs.sort(cmp=sortCmp) + + for idx,s in enumerate(strs): + # Remove temporary _idx properties + del s['_idx'] + + for idx,s in enumerate(strs): + if req8Bit(s) and i >= 256: + raise Exception('8-bit string index not satisfied: ' + repr(s)) + + return strs + keywords + +# Dump metadata into a JSON file. +def dump_metadata(meta, fn): + tmp = json.dumps(recursive_bytes_to_strings(meta), indent=4) + with open(fn, 'wb') as f: + f.write(tmp) + print('Wrote metadata dump to %s' % fn) + +# Main metadata loading function: load metadata from multiple sources, +# merge and normalize, prepare various indexes etc. +def load_metadata(opts, rom=False, build_info=None): + # Load built-in strings and objects. + with open(opts.strings_metadata, 'rb') as f: + strings_metadata = recursive_strings_to_bytes(yaml.load(f)) + with open(opts.objects_metadata, 'rb') as f: + objects_metadata = recursive_strings_to_bytes(yaml.load(f)) + + # Merge strings and objects metadata as simple top level key merge. + meta = {} + for k in objects_metadata.keys(): + meta[k] = objects_metadata[k] + for k in strings_metadata.keys(): + meta[k] = strings_metadata[k] + + # Add user objects. + user_meta = {} + for fn in opts.user_builtin_metadata: + print('Merging user builtin metadata file %s' % fn) + with open(fn, 'rb') as f: + user_meta = recursive_strings_to_bytes(yaml.load(f)) + metadata_merge_user_objects(meta, user_meta) + + # Remove disabled objects and properties. + metadata_remove_disabled(meta) + + # Normalize 'nargs' and 'length' defaults. + metadata_normalize_nargs_length(meta) + + # Normalize property attributes. + metadata_normalize_property_attributes(meta) + + # Normalize property shorthand into full objects. + metadata_normalize_shorthand(meta) + + # RAM top-level functions must have a 'name'. + if not rom: + metadata_normalize_ram_function_names(meta) + + # Add Duktape.version and (Duktape.env for ROM case). + for o in meta['objects']: + if o['id'] == 'bi_duktape': + o['properties'].insert(0, { 'key': 'version', 'value': int(build_info['version']), 'attributes': '' }) + if rom: + # Use a fixed (quite dummy for now) Duktape.env + # when ROM builtins are in use. In the RAM case + # this is added during global object initialization + # based on config options in use. + o['properties'].insert(0, { 'key': 'env', 'value': 'ROM', 'attributes': '' }) + + # Normalize property attributes (just in case shorthand handling + # didn't add attributes to all properties). + metadata_normalize_property_attributes(meta) + + # For ROM objects, mark all properties non-configurable. + if rom: + metadata_normalize_rom_property_attributes(meta) + + # Convert built-in function properties automatically into + # lightfuncs if requested and function is eligible. + if rom and opts.rom_auto_lightfunc: + metadata_convert_lightfuncs(meta) + + # Create a list of objects needing a 'bidx'. This is now just + # based on the 'builtins' metadata list but could be dynamically + # scanned somehow. Ensure 'objects' and 'objects_bidx' match + # in order for shared length. + metadata_prepare_objects_bidx(meta) + + # Merge duplicate strings. + meta['strings'] = metadata_merge_string_entries(meta['strings']) + + # Prepare an ordered list of strings with 'stridx': + # - Add a 'stridx_used' flag for strings which need an index in current code base + # - Add a C define (DUK_STRIDX_xxx) for such strings + # - Compute a stridx string order satisfying current runtime constraints + # + # The meta['strings_stridx'] result will be in proper order and stripped of + # any strings which don't need a stridx. + metadata_add_string_define_names(meta['strings'], meta['special_define_names']) + with open(opts.used_stridx_metadata, 'rb') as f: + metadata_add_string_used_stridx(meta['strings'], json.loads(f.read())) + meta['strings_stridx'] = metadata_order_builtin_strings(meta['strings'], meta['reserved_word_token_order']) + + # For the ROM build: add any strings referenced by built-in objects + # into the string list (not the 'stridx' list though): all strings + # referenced by ROM objects must also be in ROM. + if rom: + for fn in opts.user_builtin_metadata: + # XXX: awkward second pass + with open(fn, 'rb') as f: + user_meta = recursive_strings_to_bytes(yaml.load(f)) + metadata_normalize_missing_strings(meta, user_meta) + metadata_normalize_missing_strings(meta, {}) # in case no files + + # Check for orphan objects and remove them. + metadata_remove_orphan_objects(meta) + + # Add final stridx and bidx indices to metadata objects and strings. + idx = 0 + for o in meta['objects']: + if o.get('bidx_used', False): + o['bidx'] = idx + idx += 1 + idx = 0 + for s in meta['strings']: + if s.get('stridx_used', False): + s['stridx'] = idx + idx += 1 + + # Prepare a filtered RAM top level object list, needed for technical + # reasons during RAM init handling. + if not rom: + metadata_add_ram_filtered_object_list(meta) + + # Sanity check: object index must match 'bidx' for all objects + # which have a runtime 'bidx'. This is assumed by e.g. RAM + # thread init. + for i,o in enumerate(meta['objects']): + if i < len(meta['objects_bidx']): + assert(meta['objects_bidx'][i] == meta['objects'][i]) + if o.has_key('bidx'): + assert(o['bidx'] == i) + + # Create a set of helper lists and maps now that the metadata is + # in its final form. + meta['_strings_plain'] = [] + meta['_strings_stridx_plain'] = [] + meta['_stridx_to_string'] = {} + meta['_idx_to_string'] = {} + meta['_stridx_to_plain'] = {} + meta['_idx_to_plain'] = {} + meta['_string_to_stridx'] = {} + meta['_plain_to_stridx'] = {} + meta['_string_to_idx'] = {} + meta['_plain_to_idx'] = {} + meta['_define_to_stridx'] = {} + meta['_stridx_to_define'] = {} + meta['_is_plain_reserved_word'] = {} + meta['_is_plain_strict_reserved_word'] = {} + meta['_objid_to_object'] = {} + meta['_objid_to_bidx'] = {} + meta['_objid_to_idx'] = {} + meta['_objid_to_ramidx'] = {} + meta['_bidx_to_objid'] = {} + meta['_idx_to_objid'] = {} + meta['_bidx_to_object'] = {} + meta['_idx_to_object'] = {} + + for i,s in enumerate(meta['strings']): + assert(s['str'] not in meta['_strings_plain']) + meta['_strings_plain'].append(s['str']) + if s.get('reserved_word', False): + meta['_is_plain_reserved_word'][s['str']] = True # includes also strict reserved words + if s.get('future_reserved_word_strict', False): + meta['_is_plain_strict_reserved_word'][s['str']] = True + meta['_idx_to_string'][i] = s + meta['_idx_to_plain'][i] = s['str'] + meta['_plain_to_idx'][s['str']] = i + #meta['_string_to_idx'][s] = i + for i,s in enumerate(meta['strings_stridx']): + assert(s.get('stridx_used', False) == True) + meta['_strings_stridx_plain'].append(s['str']) + meta['_stridx_to_string'][i] = s + meta['_stridx_to_plain'][i] = s['str'] + #meta['_string_to_stridx'][s] = i + meta['_plain_to_stridx'][s['str']] = i + meta['_define_to_stridx'][s['define']] = i + meta['_stridx_to_define'][i] = s['define'] + for i,o in enumerate(meta['objects']): + meta['_objid_to_object'][o['id']] = o + meta['_objid_to_idx'][o['id']] = i + meta['_idx_to_objid'][i] = o['id'] + meta['_idx_to_object'][i] = o + for i,o in enumerate(meta['objects_bidx']): + assert(o.get('bidx_used', False) == True) + meta['_objid_to_bidx'][o['id']] = i + meta['_bidx_to_objid'][i] = o['id'] + meta['_bidx_to_object'][i] = o + if meta.has_key('objects_ram_toplevel'): + for i,o in enumerate(meta['objects_ram_toplevel']): + meta['_objid_to_ramidx'][o['id']] = i + + # Dump stats. + + if rom: + meta_name = 'ROM' + else: + meta_name = 'RAM' + + count_add_ref = 0 + count_add_user = 0 + for s in meta['strings']: + if s.get('_auto_add_ref', False): + count_add_ref += 1 + if s.get('_auto_add_user', False): + count_add_user += 1 + count_add = count_add_ref + count_add_user + + print(('Prepared %s metadata: %d objects, %d objects with bidx, ' + \ + '%d strings, %d strings with stridx, %d strings added ' + \ + '(%d property key references, %d user strings)') % \ + (meta_name, len(meta['objects']), len(meta['objects_bidx']), \ + len(meta['strings']), len(meta['strings_stridx']), \ + count_add, count_add_ref, count_add_user)) + + return meta + +# +# Metadata helpers +# + +# Magic values for Math built-in. +math_onearg_magic = { + 'fabs': 0, # BI_MATH_FABS_IDX + 'acos': 1, # BI_MATH_ACOS_IDX + 'asin': 2, # BI_MATH_ASIN_IDX + 'atan': 3, # BI_MATH_ATAN_IDX + 'ceil': 4, # BI_MATH_CEIL_IDX + 'cos': 5, # BI_MATH_COS_IDX + 'exp': 6, # BI_MATH_EXP_IDX + 'floor': 7, # BI_MATH_FLOOR_IDX + 'log': 8, # BI_MATH_LOG_IDX + 'round': 9, # BI_MATH_ROUND_IDX + 'sin': 10, # BI_MATH_SIN_IDX + 'sqrt': 11, # BI_MATH_SQRT_IDX + 'tan': 12 # BI_MATH_TAN_IDX +} +math_twoarg_magic = { + 'atan2': 0, # BI_MATH_ATAN2_IDX + 'pow': 1 # BI_MATH_POW_IDX +} + +# Magic values for Array built-in. +array_iter_magic = { + 'every': 0, # BI_ARRAY_ITER_EVERY + 'some': 1, # BI_ARRAY_ITER_SOME + 'forEach': 2, # BI_ARRAY_ITER_FOREACH + 'map': 3, # BI_ARRAY_ITER_MAP + 'filter': 4 # BI_ARRAY_ITER_FILTER +} + +# Magic value for typedarray/node.js buffer read field operations. +def magic_readfield(elem, signed=None, bigendian=None, typedarray=None): + # Must match duk__FLD_xxx in duk_bi_buffer.c + elemnum = { + '8bit': 0, + '16bit': 1, + '32bit': 2, + 'float': 3, + 'double': 4, + 'varint': 5 + }[elem] + if signed == True: + signednum = 1 + elif signed == False: + signednum = 0 + else: + raise Exception('missing "signed"') + if bigendian == True: + bigendiannum = 1 + elif bigendian == False: + bigendiannum = 0 + else: + raise Exception('missing "bigendian"') + if typedarray == True: + typedarraynum = 1 + elif typedarray == False: + typedarraynum = 0 + else: + raise Exception('missing "typedarray"') + return elemnum + (signednum << 4) + (bigendiannum << 3) + (typedarraynum << 5) + +# Magic value for typedarray/node.js buffer write field operations. +def magic_writefield(elem, signed=None, bigendian=None, typedarray=None): + return magic_readfield(elem, signed=signed, bigendian=bigendian, typedarray=typedarray) + +# Magic value for typedarray constructors. +def magic_typedarray_constructor(elem, shift): + # Must match duk_hbufobj.h header + elemnum = { + 'uint8': 0, + 'uint8clamped': 1, + 'int8': 2, + 'uint16': 3, + 'int16': 4, + 'uint32': 5, + 'int32': 6, + 'float32': 7, + 'float64': 8 + }[elem] + return (elemnum << 2) + shift + +# Resolve a magic value from a YAML metadata element into an integer. +def resolve_magic(elem, objid_to_bidx): + if elem is None: + return 0 + if isinstance(elem, (int, long)): + v = int(elem) + if not (v >= -0x8000 and v <= 0x7fff): + raise Exception('invalid plain value for magic: %s' % repr(v)) + return v + if not isinstance(elem, dict): + raise Exception('invalid magic: %r' % elem) + + assert(elem.has_key('type')) + if elem['type'] == 'bidx': + # Maps to thr->builtins[]. + v = elem['id'] + return objid_to_bidx[v] + elif elem['type'] == 'plain': + v = elem['value'] + if not (v >= -0x8000 and v <= 0x7fff): + raise Exception('invalid plain value for magic: %s' % repr(v)) + return v + elif elem['type'] == 'math_onearg': + return math_onearg_magic[elem['funcname']] + elif elem['type'] == 'math_twoarg': + return math_twoarg_magic[elem['funcname']] + elif elem['type'] == 'array_iter': + return array_iter_magic[elem['funcname']] + elif elem['type'] == 'typedarray_constructor': + return magic_typedarray_constructor(elem['elem'], elem['shift']) + elif elem['type'] == 'buffer_readfield': + return magic_readfield(elem['elem'], elem['signed'], elem['bigendian'], elem['typedarray']) + elif elem['type'] == 'buffer_writefield': + return magic_writefield(elem['elem'], elem['signed'], elem['bigendian'], elem['typedarray']) + else: + raise Exception('invalid magic type: %r' % elem) + +# Helper to find a property from a property list, remove it from the +# property list, and return the removed property. +def steal_prop(props, key): + for idx,prop in enumerate(props): + if prop['key'] == key: + return props.pop(idx) + return None + +# +# RAM initialization data +# +# Init data for built-in strings and objects. The init data for both +# strings and objects is a bit-packed stream tailored to match the decoders +# in duk_heap_alloc.c (strings) and duk_hthread_builtins.c (objects). +# Various bitfield sizes are used to minimize the bitstream size without +# resorting to actual, expensive compression. The goal is to minimize the +# overall size of the init code and the init data. +# +# The built-in data created here is used to set up initial RAM versions +# of the strings and objects. References to these objects are tracked in +# heap->strs[] and thr->builtins[] which allows Duktape internals to refer +# to built-ins e.g. as thr->builtins[DUK_BIDX_STRING_PROTOTYPE]. +# +# Not all strings and objects need to be reachable through heap->strs[] +# or thr->builtins[]: the strings/objects that need to be in these arrays +# is determined based on metadata and source code scanning. +# + +# XXX: Reserved word stridxs could be made to match token numbers +# directly so that a duk_stridx2token[] would not be needed. + +# Default property attributes, see E5 Section 15 beginning. +LENGTH_PROPERTY_ATTRIBUTES = '' +ACCESSOR_PROPERTY_ATTRIBUTES = 'c' +DEFAULT_DATA_PROPERTY_ATTRIBUTES = 'wc' + +# Encoding constants (must match duk_hthread_builtins.c). +CLASS_BITS = 5 +BIDX_BITS = 7 +STRIDX_BITS = 9 # would be nice to optimize to 8 +NATIDX_BITS = 8 +NUM_NORMAL_PROPS_BITS = 6 +NUM_FUNC_PROPS_BITS = 6 +PROP_FLAGS_BITS = 3 +STRING_LENGTH_BITS = 8 +STRING_CHAR_BITS = 7 +LENGTH_PROP_BITS = 3 +NARGS_BITS = 3 +PROP_TYPE_BITS = 3 +MAGIC_BITS = 16 + +NARGS_VARARGS_MARKER = 0x07 +NO_CLASS_MARKER = 0x00 # 0 = DUK_HOBJECT_CLASS_NONE +NO_BIDX_MARKER = 0x7f +NO_STRIDX_MARKER = 0xff + +PROP_TYPE_DOUBLE = 0 +PROP_TYPE_STRING = 1 +PROP_TYPE_STRIDX = 2 +PROP_TYPE_BUILTIN = 3 +PROP_TYPE_UNDEFINED = 4 +PROP_TYPE_BOOLEAN_TRUE = 5 +PROP_TYPE_BOOLEAN_FALSE = 6 +PROP_TYPE_ACCESSOR = 7 + +# must match duk_hobject.h +PROPDESC_FLAG_WRITABLE = (1 << 0) +PROPDESC_FLAG_ENUMERABLE = (1 << 1) +PROPDESC_FLAG_CONFIGURABLE = (1 << 2) +PROPDESC_FLAG_ACCESSOR = (1 << 3) # unused now + +# Class names, numeric indices must match duk_hobject.h class numbers. +class_names = [ + 'Unused', + 'Arguments', + 'Array', + 'Boolean', + 'Date', + 'Error', + 'Function', + 'JSON', + 'Math', + 'Number', + 'Object', + 'RegExp', + 'String', + 'global', + 'ObjEnv', + 'DecEnv', + 'Buffer', + 'Pointer', + 'Thread', +] +class2num = {} +for i,v in enumerate(class_names): + class2num[v] = i + +# Map class name to a class number. +def class_to_number(x): + return class2num[x] + +# Generate bit-packed RAM string init data. +def gen_ramstr_initdata_bitpacked(meta): + be = dukutil.BitEncoder() + + # Strings are encoded as follows: a string begins in lowercase + # mode and recognizes the following 5-bit symbols: + # + # 0-25 'a' ... 'z' + # 26 '_' + # 27 0x00 (actually decoded to 0xff, internal marker) + # 28 reserved + # 29 switch to uppercase for one character + # (next 5-bit symbol must be in range 0-25) + # 30 switch to uppercase + # 31 read a 7-bit character verbatim + # + # Uppercase mode is the same except codes 29 and 30 switch to + # lowercase. + + UNDERSCORE = 26 + ZERO = 27 + SWITCH1 = 29 + SWITCH = 30 + SEVENBIT = 31 + + maxlen = 0 + n_optimal = 0 + n_switch1 = 0 + n_switch = 0 + n_sevenbit = 0 + + for s_obj in meta['strings_stridx']: + s = s_obj['str'] + + be.bits(len(s), 5) + + if len(s) > maxlen: + maxlen = len(s) + + # 5-bit character, mode specific + mode = 'lowercase' + + for idx, c in enumerate(s): + # This encoder is not that optimal, but good enough for now. + + islower = (ord(c) >= ord('a') and ord(c) <= ord('z')) + isupper = (ord(c) >= ord('A') and ord(c) <= ord('Z')) + islast = (idx == len(s) - 1) + isnextlower = False + isnextupper = False + if not islast: + c2 = s[idx+1] + isnextlower = (ord(c2) >= ord('a') and ord(c2) <= ord('z')) + isnextupper = (ord(c2) >= ord('A') and ord(c2) <= ord('Z')) + + if c == '_': + be.bits(UNDERSCORE, 5) + n_optimal += 1 + elif c == '\xff': + # A 0xff prefix (never part of valid UTF-8) is used for internal properties. + # It is encoded as 0x00 in generated init data for technical reasons: it + # keeps lookup table elements 7 bits instead of 8 bits. + be.bits(ZERO, 5) + n_optimal += 1 + elif islower and mode == 'lowercase': + be.bits(ord(c) - ord('a'), 5) + n_optimal += 1 + elif isupper and mode == 'uppercase': + be.bits(ord(c) - ord('A'), 5) + n_optimal += 1 + elif islower and mode == 'uppercase': + if isnextlower: + be.bits(SWITCH, 5) + be.bits(ord(c) - ord('a'), 5) + mode = 'lowercase' + n_switch += 1 + else: + be.bits(SWITCH1, 5) + be.bits(ord(c) - ord('a'), 5) + n_switch1 += 1 + elif isupper and mode == 'lowercase': + if isnextupper: + be.bits(SWITCH, 5) + be.bits(ord(c) - ord('A'), 5) + mode = 'uppercase' + n_switch += 1 + else: + be.bits(SWITCH1, 5) + be.bits(ord(c) - ord('A'), 5) + n_switch1 += 1 + else: + assert(ord(c) >= 0 and ord(c) <= 127) + be.bits(SEVENBIT, 5) + be.bits(ord(c), 7) + n_sevenbit += 1 + #print('sevenbit for: %r' % c) + + # end marker not necessary, C code knows length from define + + res = be.getByteString() + + print('%d ram strings, %d bytes of string init data, %d maximum string length, ' + \ + 'encoding: optimal=%d,switch1=%d,switch=%d,sevenbit=%d') % \ + (len(meta['strings_stridx']), len(res), maxlen, \ + n_optimal, n_switch1, n_switch, n_sevenbit) + + return res, maxlen + +# Functions to emit string-related source/header parts. + +def emit_ramstr_source_strinit_data(genc, strdata): + genc.emitArray(strdata, 'duk_strings_data', visibility='DUK_INTERNAL', typename='duk_uint8_t', intvalues=True, const=True, size=len(strdata)) + +def emit_ramstr_header_strinit_defines(genc, meta, strdata, strmaxlen): + genc.emitLine('#if !defined(DUK_SINGLE_FILE)') + genc.emitLine('DUK_INTERNAL_DECL const duk_uint8_t duk_strings_data[%d];' % len(strdata)) + genc.emitLine('#endif /* !DUK_SINGLE_FILE */') + genc.emitDefine('DUK_STRDATA_MAX_STRLEN', strmaxlen) + genc.emitDefine('DUK_STRDATA_DATA_LENGTH', len(strdata)) + +# This is used for both RAM and ROM strings. +def emit_header_stridx_defines(genc, meta): + strlist = meta['strings_stridx'] + + for idx,s in enumerate(strlist): + genc.emitDefine(s['define'], idx, repr(s['str'])) + defname = s['define'].replace('_STRIDX','_HEAP_STRING') + genc.emitDefine(defname + '(heap)', 'DUK_HEAP_GET_STRING((heap),%s)' % s['define']) + defname = s['define'].replace('_STRIDX', '_HTHREAD_STRING') + genc.emitDefine(defname + '(thr)', 'DUK_HTHREAD_GET_STRING((thr),%s)' % s['define']) + + idx_start_reserved = None + idx_start_strict_reserved = None + for idx,s in enumerate(strlist): + if idx_start_reserved is None and s.get('reserved_word', False): + idx_start_reserved = idx + if idx_start_strict_reserved is None and s.get('future_reserved_word_strict', False): + idx_start_strict_reserved = idx + assert(idx_start_reserved is not None) + assert(idx_start_strict_reserved is not None) + + genc.emitLine('') + genc.emitDefine('DUK_HEAP_NUM_STRINGS', len(strlist)) + genc.emitDefine('DUK_STRIDX_START_RESERVED', idx_start_reserved) + genc.emitDefine('DUK_STRIDX_START_STRICT_RESERVED', idx_start_strict_reserved) + genc.emitDefine('DUK_STRIDX_END_RESERVED', len(strlist), comment='exclusive endpoint') + genc.emitLine('') + genc.emitLine('/* To convert a heap stridx to a token number, subtract') + genc.emitLine(' * DUK_STRIDX_START_RESERVED and add DUK_TOK_START_RESERVED.') + genc.emitLine(' */') + +# Encode property flags for RAM initializers. +def encode_property_flags(flags): + # Note: must match duk_hobject.h + + res = 0 + nflags = 0 + if 'w' in flags: + nflags += 1 + res = res | PROPDESC_FLAG_WRITABLE + if 'e' in flags: + nflags += 1 + res = res | PROPDESC_FLAG_ENUMERABLE + if 'c' in flags: + nflags += 1 + res = res | PROPDESC_FLAG_CONFIGURABLE + if 'a' in flags: + nflags += 1 + res = res | PROPDESC_FLAG_ACCESSOR + + if nflags != len(flags): + raise Exception('unsupported flags: %s' % repr(flags)) + + return res + +# Generate RAM object initdata for an object (but not its properties). +def gen_ramobj_initdata_for_object(meta, be, bi, string_to_stridx, natfunc_name_to_natidx, objid_to_bidx): + def _stridx(strval): + stridx = string_to_stridx[strval] + be.bits(stridx, STRIDX_BITS) + def _stridx_or_string(strval): + # XXX: could share the built-in strings decoder, would save ~200 bytes. + stridx = string_to_stridx.get(strval) + if stridx is not None: + be.bits(0, 1) # marker: stridx + be.bits(stridx, STRIDX_BITS) + else: + be.bits(1, 1) # marker: raw bytes + be.bits(len(strval), STRING_LENGTH_BITS) + for i in xrange(len(strval)): + be.bits(ord(strval[i]), STRING_CHAR_BITS) + def _natidx(native_name): + natidx = natfunc_name_to_natidx[native_name] + be.bits(natidx, NATIDX_BITS) + + class_num = class_to_number(bi['class']) + be.bits(class_num, CLASS_BITS) + + props = [x for x in bi['properties']] # clone + + prop_proto = steal_prop(props, 'prototype') + prop_constr = steal_prop(props, 'constructor') + prop_name = steal_prop(props, 'name') + prop_length = steal_prop(props, 'length') + + length = -1 # default value -1 signifies varargs + if prop_length is not None: + assert(isinstance(prop_length['value'], int)) + length = prop_length['value'] + be.bits(1, 1) # flag: have length + be.bits(length, LENGTH_PROP_BITS) + else: + be.bits(0, 1) # flag: no length + + # The attributes for 'length' are standard ("none") except for + # Array.prototype.length which must be writable (this is handled + # separately in duk_hthread_builtins.c). + + len_attrs = LENGTH_PROPERTY_ATTRIBUTES + if prop_length is not None: + len_attrs = prop_length['attributes'] + + if len_attrs != LENGTH_PROPERTY_ATTRIBUTES: + # Attributes are assumed to be the same, except for Array.prototype. + if bi['class'] != 'Array': # Array.prototype is the only one with this class + raise Exception('non-default length attribute for unexpected object') + + # For 'Function' classed objects, emit the native function stuff. + # Unfortunately this is more or less a copy of what we do for + # function properties now. This should be addressed if a rework + # on the init format is done. + + if bi['class'] == 'Function': + _natidx(bi['native']) + + if bi.get('varargs', False): + be.bits(1, 1) # flag: non-default nargs + be.bits(NARGS_VARARGS_MARKER, NARGS_BITS) + elif bi.has_key('nargs') and bi['nargs'] != length: + be.bits(1, 1) # flag: non-default nargs + be.bits(bi['nargs'], NARGS_BITS) + else: + assert(length is not None) + be.bits(0, 1) # flag: default nargs OK + + # All Function-classed global level objects are callable + # (have [[Call]]) but not all are constructable (have + # [[Construct]]). Flag that. + + assert(bi.has_key('callable')) + assert(bi['callable'] == True) + + assert(prop_name is not None) + assert(isinstance(prop_name['value'], str)) + _stridx_or_string(prop_name['value']) + + if bi.get('constructable', False): + be.bits(1, 1) # flag: constructable + else: + be.bits(0, 1) # flag: not constructable + + # Convert signed magic to 16-bit unsigned for encoding + magic = resolve_magic(bi.get('magic'), objid_to_bidx) & 0xffff + if magic != 0: + assert(magic >= 0) + assert(magic < (1 << MAGIC_BITS)) + be.bits(1, 1) + be.bits(magic, MAGIC_BITS) + else: + be.bits(0, 1) + +# Generate RAM object initdata for an object's properties. +def gen_ramobj_initdata_for_props(meta, be, bi, string_to_stridx, natfunc_name_to_natidx, objid_to_bidx, double_byte_order): + count_normal_props = 0 + count_function_props = 0 + + def _bidx(bi_id): + if bi_id is None: + be.bits(NO_BIDX_MARKER, BIDX_BITS) + else: + be.bits(objid_to_bidx[bi_id], BIDX_BITS) + def _stridx(strval): + stridx = string_to_stridx[strval] + be.bits(stridx, STRIDX_BITS) + def _stridx_or_string(strval): + # XXX: could share the built-in strings decoder, would save ~200 bytes. + stridx = string_to_stridx.get(strval) + if stridx is not None: + be.bits(0, 1) # marker: stridx + be.bits(stridx, STRIDX_BITS) + else: + be.bits(1, 1) # marker: raw bytes + be.bits(len(strval), STRING_LENGTH_BITS) + for i in xrange(len(strval)): + be.bits(ord(strval[i]), STRING_CHAR_BITS) + def _natidx(native_name): + natidx = natfunc_name_to_natidx[native_name] + be.bits(natidx, NATIDX_BITS) + + props = [x for x in bi['properties']] # clone + + # internal prototype: not an actual property so not in property list + if bi.has_key('internal_prototype'): + _bidx(bi['internal_prototype']) + else: + _bidx(None) + + # external prototype: encoded specially, steal from property list + prop_proto = steal_prop(props, 'prototype') + if prop_proto is not None: + assert(prop_proto['value']['type'] == 'object') + assert(prop_proto['attributes'] == '') + _bidx(prop_proto['value']['id']) + else: + _bidx(None) + + # external constructor: encoded specially, steal from property list + prop_constr = steal_prop(props, 'constructor') + if prop_constr is not None: + assert(prop_constr['value']['type'] == 'object') + assert(prop_constr['attributes'] == 'wc') + _bidx(prop_constr['value']['id']) + else: + _bidx(None) + + # name: encoded specially for function objects, so steal and ignore here + if bi['class'] == 'Function': + prop_name = steal_prop(props, 'name') + assert(prop_name is not None) + assert(isinstance(prop_name['value'], str)) + # Function.prototype.name has special handling in duk_hthread_builtins.c + assert((bi['id'] != 'bi_function_prototype' and prop_name['attributes'] == '') or \ + (bi['id'] == 'bi_function_prototype' and prop_name['attributes'] == 'w')) + + # length: encoded specially, so steal and ignore + prop_proto = steal_prop(props, 'length') + + # Date.prototype.toGMTString needs special handling and is handled + # directly in duk_hthread_builtins.c; so steal and ignore here. + if bi['id'] == 'bi_date_prototype': + prop_togmtstring = steal_prop(props, 'toGMTString') + assert(prop_togmtstring is not None) + #print('Stole Date.prototype.toGMTString') + + # Split properties into non-builtin functions and other properties. + # This split is a bit arbitrary, but is used to reduce flag bits in + # the bit stream. + values = [] + functions = [] + for prop in props: + if isinstance(prop['value'], dict) and \ + prop['value']['type'] == 'object' and \ + metadata_lookup_object(meta, prop['value']['id']).has_key('native') and \ + not metadata_lookup_object(meta, prop['value']['id']).has_key('bidx'): + functions.append(prop) + else: + values.append(prop) + + be.bits(len(values), NUM_NORMAL_PROPS_BITS) + + for valspec in values: + count_normal_props += 1 + + val = valspec['value'] + + _stridx_or_string(valspec['key']) + + # Attribute check doesn't check for accessor flag; that is now + # automatically set by C code when value is an accessor type. + # Accessors must not have 'writable', so they'll always have + # non-default attributes (less footprint than adding a different + # default). + default_attrs = DEFAULT_DATA_PROPERTY_ATTRIBUTES + + attrs = valspec.get('attributes', default_attrs) + attrs = attrs.replace('a', '') # ram bitstream doesn't encode 'accessor' attribute + if attrs != default_attrs: + #print('non-default attributes: %s -> %r (default %r)' % (valspec['key'], attrs, default_attrs)) + be.bits(1, 1) # flag: have custom attributes + be.bits(encode_property_flags(attrs), PROP_FLAGS_BITS) + else: + be.bits(0, 1) # flag: no custom attributes + + if val is None: + print('WARNING: RAM init data format doesn\'t support "null" now, value replaced with "undefined": %r' % valspec) + #raise Exception('RAM init format doesn\'t support a "null" value now') + be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) + elif isinstance(val, bool): + if val == True: + be.bits(PROP_TYPE_BOOLEAN_TRUE, PROP_TYPE_BITS) + else: + be.bits(PROP_TYPE_BOOLEAN_FALSE, PROP_TYPE_BITS) + elif isinstance(val, (float, int)) or isinstance(val, dict) and val['type'] == 'double': + # Avoid converting a manually specified NaN temporarily into + # a float to avoid risk of e.g. NaN being replaced by another. + if isinstance(val, dict): + val = val['bytes'].decode('hex') + assert(len(val) == 8) + else: + val = struct.pack('>d', float(val)) + + be.bits(PROP_TYPE_DOUBLE, PROP_TYPE_BITS) + + # encoding of double must match target architecture byte order + indexlist = { + 'big': [ 0, 1, 2, 3, 4, 5, 6, 7 ], + 'little': [ 7, 6, 5, 4, 3, 2, 1, 0 ], + 'mixed': [ 3, 2, 1, 0, 7, 6, 5, 4 ] # some arm platforms + }[double_byte_order] + + data = ''.join([ val[indexlist[idx]] for idx in xrange(8) ]) + + #print('DOUBLE: %s -> %s' % (val.encode('hex'), data.encode('hex'))) + + if len(data) != 8: + raise Exception('internal error') + be.string(data) + elif isinstance(val, str) or isinstance(val, unicode): + if isinstance(val, unicode): + # Note: non-ASCII characters will not currently work, + # because bits/char is too low. + val = val.encode('utf-8') + + if string_to_stridx.has_key(val): + # String value is in built-in string table -> encode + # using a string index. This saves some space, + # especially for the 'name' property of errors + # ('EvalError' etc). + + be.bits(PROP_TYPE_STRIDX, PROP_TYPE_BITS) + _stridx(val) + else: + # Not in string table -> encode as raw 7-bit value + + be.bits(PROP_TYPE_STRING, PROP_TYPE_BITS) + be.bits(len(val), STRING_LENGTH_BITS) + for i in xrange(len(val)): + be.bits(ord(val[i]), STRING_CHAR_BITS) + elif isinstance(val, dict): + if val['type'] == 'object': + be.bits(PROP_TYPE_BUILTIN, PROP_TYPE_BITS) + _bidx(val['id']) + elif val['type'] == 'undefined': + be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) + elif val['type'] == 'accessor': + be.bits(PROP_TYPE_ACCESSOR, PROP_TYPE_BITS) + getter_fn = metadata_lookup_object(meta, val['getter_id']) + setter_fn = metadata_lookup_object(meta, val['setter_id']) + _natidx(getter_fn['native']) + _natidx(setter_fn['native']) + assert(getter_fn['nargs'] == 0) + assert(setter_fn['nargs'] == 1) + assert(getter_fn['magic'] == 0) + assert(setter_fn['magic'] == 0) + elif val['type'] == 'lightfunc': + print('WARNING: RAM init data format doesn\'t support "lightfunc" now, value replaced with "undefined": %r' % valspec) + be.bits(PROP_TYPE_UNDEFINED, PROP_TYPE_BITS) + else: + raise Exception('unsupported value: %s' % repr(val)) + else: + raise Exception('unsupported value: %s' % repr(val)) + + be.bits(len(functions), NUM_FUNC_PROPS_BITS) + + for funprop in functions: + count_function_props += 1 + + funobj = metadata_lookup_object(meta, funprop['value']['id']) + prop_len = metadata_lookup_property(funobj, 'length') + assert(prop_len is not None) + assert(isinstance(prop_len['value'], (int))) + length = prop_len['value'] + + _stridx_or_string(funprop['key']) + _natidx(funobj['native']) + be.bits(length, LENGTH_PROP_BITS) + + if funobj.get('varargs', False): + be.bits(1, 1) # flag: non-default nargs + be.bits(NARGS_VARARGS_MARKER, NARGS_BITS) + elif funobj.has_key('nargs') and funobj['nargs'] != length: + be.bits(1, 1) # flag: non-default nargs + be.bits(funobj['nargs'], NARGS_BITS) + else: + be.bits(0, 1) # flag: default nargs OK + + # XXX: make this check conditional to minimize bit count + # (there are quite a lot of function properties) + # Convert signed magic to 16-bit unsigned for encoding + magic = resolve_magic(funobj.get('magic'), objid_to_bidx) & 0xffff + if magic != 0: + assert(magic >= 0) + assert(magic < (1 << MAGIC_BITS)) + be.bits(1, 1) + be.bits(magic, MAGIC_BITS) + else: + be.bits(0, 1) + + return count_normal_props, count_function_props + +# Get helper maps for RAM objects. +def get_ramobj_native_func_maps(meta): + # Native function list and index + native_funcs_found = {} + native_funcs = [] + natfunc_name_to_natidx = {} + + for o in meta['objects']: + if o.has_key('native'): + native_funcs_found[o['native']] = True + for v in o['properties']: + val = v['value'] + if isinstance(val, dict): + if val['type'] == 'accessor': + getter = metadata_lookup_object(meta, val['getter_id']) + native_funcs_found[getter['native']] = True + setter = metadata_lookup_object(meta, val['setter_id']) + native_funcs_found[setter['native']] = True + if val['type'] == 'object': + target = metadata_lookup_object(meta, val['id']) + if target.has_key('native'): + native_funcs_found[target['native']] = True + if val['type'] == 'lightfunc': + # No lightfunc support for RAM initializer now. + pass + + for idx,k in enumerate(sorted(native_funcs_found.keys())): + native_funcs.append(k) # native func names + natfunc_name_to_natidx[k] = idx + + return native_funcs, natfunc_name_to_natidx + +# Generate bit-packed RAM object init data. +def gen_ramobj_initdata_bitpacked(meta, native_funcs, natfunc_name_to_natidx, double_byte_order): + # RAM initialization is based on a specially filtered list of top + # level objects which includes objects with 'bidx' and objects + # which aren't handled as inline values in the init bitstream. + objlist = meta['objects_ram_toplevel'] + objid_to_idx = meta['_objid_to_ramidx'] + objid_to_object = meta['_objid_to_object'] # This index is valid even for filtered object list + string_index = meta['_plain_to_stridx'] + + # Generate bitstream + be = dukutil.BitEncoder() + count_builtins = 0 + count_normal_props = 0 + count_function_props = 0 + for o in objlist: + count_builtins += 1 + gen_ramobj_initdata_for_object(meta, be, o, string_index, natfunc_name_to_natidx, objid_to_idx) + for o in objlist: + count_obj_normal, count_obj_func = gen_ramobj_initdata_for_props(meta, be, o, string_index, natfunc_name_to_natidx, objid_to_idx, double_byte_order) + count_normal_props += count_obj_normal + count_function_props += count_obj_func + + romobj_init_data = be.getByteString() + #print(repr(romobj_init_data)) + #print(len(romobj_init_data)) + + print('%d ram builtins, %d normal properties, %d function properties, %d bytes of object init data' % \ + (count_builtins, count_normal_props, count_function_props, len(romobj_init_data))) + + return romobj_init_data + +# Functions to emit object-related source/header parts. + +def emit_ramobj_source_nativefunc_array(genc, native_func_list): + genc.emitLine('/* native functions: %d */' % len(native_func_list)) + genc.emitLine('DUK_INTERNAL const duk_c_function duk_bi_native_functions[%d] = {' % len(native_func_list)) + for i in native_func_list: + # The function pointer cast here makes BCC complain about + # "initializer too complicated", so omit the cast. + #genc.emitLine('\t(duk_c_function) %s,' % i) + genc.emitLine('\t%s,' % i) + genc.emitLine('};') + +def emit_ramobj_source_objinit_data(genc, init_data): + genc.emitArray(init_data, 'duk_builtins_data', visibility='DUK_INTERNAL', typename='duk_uint8_t', intvalues=True, const=True, size=len(init_data)) + +def emit_ramobj_header_nativefunc_array(genc, native_func_list): + genc.emitLine('#if !defined(DUK_SINGLE_FILE)') + genc.emitLine('DUK_INTERNAL_DECL const duk_c_function duk_bi_native_functions[%d];' % len(native_func_list)) + genc.emitLine('#endif /* !DUK_SINGLE_FILE */') + +def emit_ramobj_header_objects(genc, meta): + objlist = meta['objects_bidx'] + for idx,o in enumerate(objlist): + defname = 'DUK_BIDX_' + '_'.join(o['id'].upper().split('_')[1:]) # bi_foo_bar -> FOO_BAR + genc.emitDefine(defname, idx) + genc.emitDefine('DUK_NUM_BUILTINS', len(objlist)) + genc.emitDefine('DUK_NUM_BIDX_BUILTINS', len(objlist)) # Objects with 'bidx' + genc.emitDefine('DUK_NUM_ALL_BUILTINS', len(meta['objects_ram_toplevel'])) # Objects with 'bidx' + temps needed in init + +def emit_ramobj_header_initdata(genc, init_data): + genc.emitLine('#if !defined(DUK_SINGLE_FILE)') + genc.emitLine('DUK_INTERNAL_DECL const duk_uint8_t duk_builtins_data[%d];' % len(init_data)) + genc.emitLine('#endif /* !DUK_SINGLE_FILE */') + genc.emitDefine('DUK_BUILTINS_DATA_LENGTH', len(init_data)) + +# +# ROM init data +# +# Compile-time initializers for ROM strings and ROM objects. This involves +# a lot of small details: +# +# - Several variants are needed for different options: unpacked vs. +# packed duk_tval, endianness, string hash in use, etc). +# +# - Static initializers must represent objects of different size. For +# example, separate structs are needed for property tables of different +# size or value typing. +# +# - Union initializers cannot be used portable because they're only +# available in C99 and above. +# +# - Initializers must use 'const' correctly to ensure that the entire +# initialization data will go into ROM (read-only data section). +# Const pointers etc will need to be cast into non-const pointers at +# some point to properly mix with non-const RAM pointers, so a portable +# const losing cast is needed. +# +# - C++ doesn't allow forward declaration of "static const" structures +# which is problematic because there are cyclical const structures. +# + +# Get string hash initializers; need to compute possible string hash variants +# which will match runtime values. +def rom_get_strhash16_macro(val): + hash16le = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=False, strhash16=True) + hash16be = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=True, strhash16=True) + hash16sparse = dukutil.duk_heap_hashstring_sparse(val, DUK__FIXED_HASH_SEED, strhash16=True) + return 'DUK__STRHASH16(%dU,%dU,%dU)' % (hash16le, hash16be, hash16sparse) +def rom_get_strhash32_macro(val): + hash32le = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=False, strhash16=False) + hash32be = dukutil.duk_heap_hashstring_dense(val, DUK__FIXED_HASH_SEED, big_endian=True, strhash16=False) + hash32sparse = dukutil.duk_heap_hashstring_sparse(val, DUK__FIXED_HASH_SEED, strhash16=False) + return 'DUK__STRHASH32(%dUL,%dUL,%dUL)' % (hash32le, hash32be, hash32sparse) + +# Get string character .length; must match runtime .length computation. +def rom_charlen(x): + return dukutil.duk_unicode_unvalidated_utf8_length(x) + +# Get an initializer type and initializer literal for a specified value +# (expressed in YAML metadata format). The types and initializers depend +# on declarations emitted before the initializers, and in several cases +# use a macro to hide the selection between several initializer variants. +def rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map): + def double_bytes_initializer(val): + # Portable and exact float initializer. + assert(isinstance(val, str) and len(val) == 16) # hex encoded bytes + val = val.decode('hex') + tmp = [] + for i in xrange(8): + t = ord(val[i]) + if t >= 128: + tmp.append('%dU' % t) + else: + tmp.append('%d' % t) + return 'DUK__DBLBYTES(' + ','.join(tmp) + ')' + + def tval_number_initializer(val): + return 'DUK__TVAL_NUMBER(%s)' % double_bytes_initializer(val) + + v = val['value'] + if v is None: + init_type = 'duk_rom_tval_null' + init_lit = 'DUK__TVAL_NULL()' + elif isinstance(v, (bool)): + init_type = 'duk_rom_tval_boolean' + bval = 0 + if v: + bval = 1 + init_lit = 'DUK__TVAL_BOOLEAN(%d)' % bval + elif isinstance(v, (int, float)): + fval = struct.pack('>d', float(v)).encode('hex') + init_type = 'duk_rom_tval_number' + init_lit = tval_number_initializer(fval) + elif isinstance(v, (str, unicode)): + init_type = 'duk_rom_tval_string' + init_lit = 'DUK__TVAL_STRING(&%s)' % bi_str_map[v] + elif isinstance(v, (dict)): + if v['type'] == 'double': + init_type = 'duk_rom_tval_number' + init_lit = tval_number_initializer(v['bytes']) + elif v['type'] == 'undefined': + init_type = 'duk_rom_tval_undefined' + init_lit = 'DUK__TVAL_UNDEFINED()' + elif v['type'] == 'null': + init_type = 'duk_rom_tval_null' + init_lit = 'DUK__TVAL_UNDEFINED()' + elif v['type'] == 'object': + init_type = 'duk_rom_tval_object' + init_lit = 'DUK__TVAL_OBJECT(&%s)' % bi_obj_map[v['id']] + elif v['type'] == 'accessor': + getter_object = metadata_lookup_object(meta, v['getter_id']) + setter_object = metadata_lookup_object(meta, v['setter_id']) + init_type = 'duk_rom_tval_accessor' + init_lit = 'DUK__TVAL_ACCESSOR(&%s, &%s)' % (bi_obj_map[getter_object['id']], bi_obj_map[setter_object['id']]) + + elif v['type'] == 'lightfunc': + # Match DUK_LFUNC_FLAGS_PACK() in duk_tval.h. + if v.has_key('length'): + assert(v['length'] >= 0 and v['length'] <= 15) + lf_length = v['length'] + else: + lf_length = 0 + if v.get('varargs', True): + lf_nargs = 15 # varargs marker + else: + assert(v['nargs'] >= 0 and v['nargs'] <= 14) + lf_nargs = v['nargs'] + if v.has_key('magic'): + assert(v['magic'] >= -0x80 and v['magic'] <= 0x7f) + lf_magic = v['magic'] & 0xff + else: + lf_magic = 0 + lf_flags = (lf_magic << 8) + (lf_length << 4) + lf_nargs + init_type = 'duk_rom_tval_lightfunc' + init_lit = 'DUK__TVAL_LIGHTFUNC(%s, %dL)' % (v['native'], lf_flags) + else: + raise Exception('unhandled value: %r' % val) + else: + raise Exception('internal error: %r' % val) + return init_type, init_lit + +# Helpers to get either initializer type or value only (not both). +def rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map): + init_type, init_lit = rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map) + return init_type +def rom_get_value_initializer_literal(meta, val, bi_str_map, bi_obj_map): + init_type, init_lit = rom_get_value_initializer(meta, val, bi_str_map, bi_obj_map) + return init_lit + +# Emit ROM strings source: structs/typedefs and their initializers. +# Separate initialization structs are needed for strings of different +# length. +def rom_emit_strings_source(genc, meta): + # Write built-in strings as code section initializers. + + strs = meta['_strings_plain'] # all strings, plain versions + reserved_words = meta['_is_plain_reserved_word'] + strict_reserved_words = meta['_is_plain_strict_reserved_word'] + strs_needing_stridx = meta['strings_stridx'] + + # Sort used lengths and declare per-length initializers. + lens = [] + for v in strs: + strlen = len(v) + if strlen not in lens: + lens.append(strlen) + lens.sort() + for strlen in lens: + genc.emitLine('typedef struct duk_romstr_%d duk_romstr_%d; ' % (strlen, strlen) + + 'struct duk_romstr_%d { duk_hstring hdr; duk_uint8_t data[%d]; };' % (strlen, strlen + 1)) + genc.emitLine('') + + # String hash values depend on endianness and other factors, + # use an initializer macro to select the appropriate hash. + genc.emitLine('/* When unaligned access possible, 32-bit values are fetched using host order.') + genc.emitLine(' * When unaligned access not possible, always simulate little endian order.') + genc.emitLine(' * See: src/duk_util_hashbytes.c:duk_util_hashbytes().') + genc.emitLine(' */') + genc.emitLine('#if defined(DUK_USE_STRHASH_DENSE)') + genc.emitLine('#if defined(DUK_USE_HASHBYTES_UNALIGNED_U32_ACCESS)') # XXX: config option to be reworked + genc.emitLine('#if defined(DUK_USE_INTEGER_BE)') + genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16be)') + genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32be)') + genc.emitLine('#else') + genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16le)') + genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32le)') + genc.emitLine('#endif') + genc.emitLine('#else') + genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16le)') + genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32le)') + genc.emitLine('#endif') + genc.emitLine('#else /* DUK_USE_STRHASH_DENSE */') + genc.emitLine('#define DUK__STRHASH16(hash16le,hash16be,hash16sparse) (hash16sparse)') + genc.emitLine('#define DUK__STRHASH32(hash32le,hash32be,hash32sparse) (hash32sparse)') + genc.emitLine('#endif /* DUK_USE_STRHASH_DENSE */') + + # String header initializer macro, takes into account lowmem etc. + genc.emitLine('#if defined(DUK_USE_HEAPPTR16)') + genc.emitLine('#if !defined(DUK_USE_REFCOUNT16)') + genc.emitLine('#error currently assumes DUK_USE_HEAPPTR16 and DUK_USE_REFCOUNT16 are both defined') + genc.emitLine('#endif') + genc.emitLine('#if defined(DUK_USE_HSTRING_CLEN)') + genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') + genc.emitLine('\t{ { (heaphdr_flags) | ((hash16) << 16), (refcount), (blen) }, (clen) }') + genc.emitLine('#else /* DUK_USE_HSTRING_CLEN */') + genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') + genc.emitLine('\t{ { (heaphdr_flags) | ((hash16) << 16), (refcount), (blen) } }') + genc.emitLine('#endif /* DUK_USE_HSTRING_CLEN */') + genc.emitLine('#else /* DUK_USE_HEAPPTR16 */') + genc.emitLine('#define DUK__STRINIT(heaphdr_flags,refcount,hash32,hash16,blen,clen) \\') + genc.emitLine('\t{ { (heaphdr_flags), (refcount) }, (hash32), (blen), (clen) }') + genc.emitLine('#endif /* DUK_USE_HEAPPTR16 */') + + # Emit string initializers. + genc.emitLine('') + bi_str_map = {} # string -> initializer variable name + for str_index,v in enumerate(strs): + bi_str_map[v] = 'duk_str_%d' % str_index + + tmp = 'DUK_INTERNAL const duk_romstr_%d duk_str_%d = {' % (len(v), str_index) + flags = [ 'DUK_HTYPE_STRING', 'DUK_HEAPHDR_FLAG_READONLY' ] + is_arridx = string_is_arridx(v) + + blen = len(v) + clen = rom_charlen(v) + + if blen == clen: + flags.append('DUK_HSTRING_FLAG_ASCII') + if is_arridx: + #print('%r is arridx' % v) + flags.append('DUK_HSTRING_FLAG_ARRIDX') + if len(v) >= 1 and v[0] == '\xff': + flags.append('DUK_HSTRING_FLAG_INTERNAL') + if v in [ 'eval', 'arguments' ]: + flags.append('DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS') + if reserved_words.has_key(v): + flags.append('DUK_HSTRING_FLAG_RESERVED_WORD') + if strict_reserved_words.has_key(v): + flags.append('DUK_HSTRING_FLAG_STRICT_RESERVED_WORD') + + tmp += 'DUK__STRINIT(%s,%d,%s,%s,%d,%d),' % \ + ('|'.join(flags), 1, rom_get_strhash32_macro(v), \ + rom_get_strhash16_macro(v), blen, clen) + + tmpbytes = [] + for c in v: + if ord(c) < 128: + tmpbytes.append('%d' % ord(c)) + else: + tmpbytes.append('%dU' % ord(c)) + tmpbytes.append('%d' % 0) # NUL term + tmp += '{' + ','.join(tmpbytes) + '}' + tmp += '};' + genc.emitLine(tmp) + + # Emit an array of ROM strings, used for string interning. + # + # XXX: String interning now simply walks through the list checking if + # an incoming string is present in ROM. It would be better to use + # binary search (or perhaps even a perfect hash) for this lookup. + # To support binary search we could emit the list in string hash + # order, but because there are multiple different hash variants + # there would need to be multiple lists. We could also order the + # strings based on the string data which is independent of the string + # hash and still possible to binary search relatively efficiently. + # + # cdecl> explain const int * const foo; + # declare foo as const pointer to const int + genc.emitLine('') + genc.emitLine('DUK_INTERNAL const duk_hstring * const duk_rom_strings[%d] = {'% len(strs)) + tmp = [] + linecount = 0 + for str_index,v in enumerate(strs): + if str_index > 0: + tmp.append(', ') + if linecount >= 6: + linecount = 0 + tmp.append('\n') + tmp.append('(const duk_hstring *) &duk_str_%d' % str_index) + linecount += 1 + for line in ''.join(tmp).split('\n'): + genc.emitLine(line) + genc.emitLine('};') + + # Emit an array of duk_hstring pointers indexed using DUK_STRIDX_xxx. + # This will back e.g. DUK_HTHREAD_STRING_XYZ(thr) directly, without + # needing an explicit array in thr/heap->strs[]. + # + # cdecl > explain const int * const foo; + # declare foo as const pointer to const int + genc.emitLine('') + genc.emitLine('DUK_INTERNAL const duk_hstring * const duk_rom_strings_stridx[%d] = {' % len(strs_needing_stridx)) + for s in strs_needing_stridx: + genc.emitLine('\t(const duk_hstring *) &%s,' % bi_str_map[s['str']]) # strs_needing_stridx is a list of objects, not plain strings + genc.emitLine('};') + + return bi_str_map + +# Emit ROM strings header. +def rom_emit_strings_header(genc, meta): + genc.emitLine('#if !defined(DUK_SINGLE_FILE)') # C++ static const workaround + genc.emitLine('DUK_INTERNAL_DECL const duk_hstring * const duk_rom_strings[%d];'% len(meta['strings'])) + genc.emitLine('DUK_INTERNAL_DECL const duk_hstring * const duk_rom_strings_stridx[%d];' % len(meta['strings_stridx'])) + genc.emitLine('#endif') + +# Emit ROM objects initialized types and macros. +def rom_emit_object_initializer_types_and_macros(genc): + # Objects and functions are straightforward because they just use the + # RAM structure which has no dynamic or variable size parts. + genc.emitLine('typedef struct duk_romobj duk_romobj; ' + \ + 'struct duk_romobj { duk_hobject hdr; };') + genc.emitLine('typedef struct duk_romarr duk_romarr; ' + \ + 'struct duk_romarr { duk_harray hdr; };') + genc.emitLine('typedef struct duk_romfun duk_romfun; ' + \ + 'struct duk_romfun { duk_hnatfunc hdr; };') + + # For ROM pointer compression we'd need a -compile time- variant. + # The current portable solution is to just assign running numbers + # to ROM compressed pointers, and provide the table for user pointer + # compression function. Much better solutions would be possible, + # but such solutions are often compiler/platform specific. + + # Emit object/function initializer which is aware of options affecting + # the header. Heap next/prev pointers are always NULL. + genc.emitLine('#if defined(DUK_USE_HEAPPTR16)') + genc.emitLine('#if !defined(DUK_USE_REFCOUNT16) || defined(DUK_USE_HOBJECT_HASH_PART)') + genc.emitLine('#error currently assumes DUK_USE_HEAPPTR16 and DUK_USE_REFCOUNT16 are both defined and DUK_USE_HOBJECT_HASH_PART is undefined') + genc.emitLine('#endif') + #genc.emitLine('#if !defined(DUK_USE_HEAPPTR_ENC16_STATIC)') + #genc.emitLine('#error need DUK_USE_HEAPPTR_ENC16_STATIC which provides compile-time pointer compression') + #genc.emitLine('#endif') + genc.emitLine('#define DUK__ROMOBJ_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize) \\') + genc.emitLine('\t{ { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) } }') + genc.emitLine('#define DUK__ROMARR_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,length) \\') + genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) }, (length), 0 /*length_nonwritable*/ } }') + genc.emitLine('#define DUK__ROMFUN_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,nativefunc,nargs,magic) \\') + genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), 0, 0, (props_enc16) }, (iproto_enc16), (esize), (enext), (asize) }, (nativefunc), (duk_int16_t) (nargs), (duk_int16_t) (magic) } }') + genc.emitLine('#else /* DUK_USE_HEAPPTR16 */') + genc.emitLine('#define DUK__ROMOBJ_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize) \\') + genc.emitLine('\t{ { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) } }') + genc.emitLine('#define DUK__ROMARR_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,length) \\') + genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) }, (length), 0 /*length_nonwritable*/ } }') + genc.emitLine('#define DUK__ROMFUN_INIT(heaphdr_flags,refcount,props,props_enc16,iproto,iproto_enc16,esize,enext,asize,hsize,nativefunc,nargs,magic) \\') + genc.emitLine('\t{ { { { (heaphdr_flags), (refcount), NULL, NULL }, (duk_uint8_t *) DUK_LOSE_CONST(props), (duk_hobject *) DUK_LOSE_CONST(iproto), (esize), (enext), (asize), (hsize) }, (nativefunc), (duk_int16_t) (nargs), (duk_int16_t) (magic) } }') + genc.emitLine('#endif /* DUK_USE_HEAPPTR16 */') + + # Initializer typedef for a dummy function pointer. ROM support assumes + # function pointers are 32 bits. Using a dummy function pointer type + # avoids function pointer to normal pointer cast which emits warnings. + genc.emitLine('typedef void (*duk_rom_funcptr)(void);') + + # Emit duk_tval structs. This gets a bit messier with packed/unpacked + # duk_tval, endianness variants, pointer sizes, etc. + genc.emitLine('#if defined(DUK_USE_PACKED_TVAL)') + genc.emitLine('typedef struct duk_rom_tval_undefined duk_rom_tval_undefined;') + genc.emitLine('typedef struct duk_rom_tval_null duk_rom_tval_null;') + genc.emitLine('typedef struct duk_rom_tval_lightfunc duk_rom_tval_lightfunc;') + genc.emitLine('typedef struct duk_rom_tval_boolean duk_rom_tval_boolean;') + genc.emitLine('typedef struct duk_rom_tval_number duk_rom_tval_number;') + genc.emitLine('typedef struct duk_rom_tval_object duk_rom_tval_object;') + genc.emitLine('typedef struct duk_rom_tval_string duk_rom_tval_string;') + genc.emitLine('typedef struct duk_rom_tval_accessor duk_rom_tval_accessor;') + genc.emitLine('struct duk_rom_tval_number { duk_uint8_t bytes[8]; };') + genc.emitLine('struct duk_rom_tval_accessor { const duk_hobject *get; const duk_hobject *set; };') + genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') + genc.emitLine('struct duk_rom_tval_object { const void *ptr; duk_uint32_t hiword; };') + genc.emitLine('struct duk_rom_tval_string { const void *ptr; duk_uint32_t hiword; };') + genc.emitLine('struct duk_rom_tval_undefined { const void *ptr; duk_uint32_t hiword; };') + genc.emitLine('struct duk_rom_tval_null { const void *ptr; duk_uint32_t hiword; };') + genc.emitLine('struct duk_rom_tval_lightfunc { duk_rom_funcptr ptr; duk_uint32_t hiword; };') + genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t dummy; duk_uint32_t hiword; };') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') + genc.emitLine('struct duk_rom_tval_object { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_string { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_undefined { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_null { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_lightfunc { duk_uint32_t hiword; duk_rom_funcptr ptr; };') + genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t hiword; duk_uint32_t dummy; };') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') + genc.emitLine('struct duk_rom_tval_object { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_string { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_undefined { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_null { duk_uint32_t hiword; const void *ptr; };') + genc.emitLine('struct duk_rom_tval_lightfunc { duk_uint32_t hiword; duk_rom_funcptr ptr; };') + genc.emitLine('struct duk_rom_tval_boolean { duk_uint32_t hiword; duk_uint32_t dummy; };') + genc.emitLine('#else') + genc.emitLine('#error invalid endianness defines') + genc.emitLine('#endif') + genc.emitLine('#else /* DUK_USE_PACKED_TVAL */') + # Unpacked initializers are written assuming normal struct alignment + # rules so that sizeof(duk_tval) == 16. 32-bit pointers need special + # handling to ensure the individual initializers pad to 16 bytes as + # necessary. + # XXX: 32-bit unpacked duk_tval is not yet supported. + genc.emitLine('#if defined(DUK_UINTPTR_MAX)') + genc.emitLine('#if (DUK_UINTPTR_MAX <= 0xffffffffUL)') + genc.emitLine('#error ROM initializer with unpacked duk_tval does not currently work on 32-bit targets') + genc.emitLine('#endif') + genc.emitLine('#endif') + genc.emitLine('typedef struct duk_rom_tval_undefined duk_rom_tval_undefined;') + genc.emitLine('struct duk_rom_tval_undefined { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') + genc.emitLine('typedef struct duk_rom_tval_null duk_rom_tval_null;') + genc.emitLine('struct duk_rom_tval_null { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') + genc.emitLine('typedef struct duk_rom_tval_boolean duk_rom_tval_boolean;') + genc.emitLine('struct duk_rom_tval_boolean { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint32_t val; duk_uint32_t unused; };') + genc.emitLine('typedef struct duk_rom_tval_number duk_rom_tval_number;') + genc.emitLine('struct duk_rom_tval_number { duk_small_uint_t tag; duk_small_uint_t extra; duk_uint8_t bytes[8]; };') + genc.emitLine('typedef struct duk_rom_tval_object duk_rom_tval_object;') + genc.emitLine('struct duk_rom_tval_object { duk_small_uint_t tag; duk_small_uint_t extra; const duk_heaphdr *val; };') + genc.emitLine('typedef struct duk_rom_tval_string duk_rom_tval_string;') + genc.emitLine('struct duk_rom_tval_string { duk_small_uint_t tag; duk_small_uint_t extra; const duk_heaphdr *val; };') + genc.emitLine('typedef struct duk_rom_tval_lightfunc duk_rom_tval_lightfunc;') + genc.emitLine('struct duk_rom_tval_lightfunc { duk_small_uint_t tag; duk_small_uint_t extra; duk_rom_funcptr ptr; };') + genc.emitLine('typedef struct duk_rom_tval_accessor duk_rom_tval_accessor;') + genc.emitLine('struct duk_rom_tval_accessor { const duk_hobject *get; const duk_hobject *set; };') + genc.emitLine('#endif /* DUK_USE_PACKED_TVAL */') + genc.emitLine('') + + # Double initializer byte shuffle macro to handle byte orders + # without duplicating the entire initializers. + genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') + genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (h), (g), (f), (e), (d), (c), (b), (a) }') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') + genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (a), (b), (c), (d), (e), (f), (g), (h) }') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') + genc.emitLine('#define DUK__DBLBYTES(a,b,c,d,e,f,g,h) { (d), (c), (b), (a), (h), (g), (f), (e) }') + genc.emitLine('#else') + genc.emitLine('#error invalid endianness defines') + genc.emitLine('#endif') + genc.emitLine('') + + # Emit duk_tval initializer literal macros. + genc.emitLine('#if defined(DUK_USE_PACKED_TVAL)') + genc.emitLine('#define DUK__TVAL_NUMBER(hostbytes) { hostbytes }') # bytes already in host order + genc.emitLine('#if defined(DUK_USE_DOUBLE_LE)') + genc.emitLine('#define DUK__TVAL_UNDEFINED() { (const void *) NULL, (DUK_TAG_UNDEFINED << 16) }') + genc.emitLine('#define DUK__TVAL_NULL() { (const void *) NULL, (DUK_TAG_NULL << 16) }') + genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (duk_rom_funcptr) (func), (DUK_TAG_LIGHTFUNC << 16) + (flags) }') + genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { 0, (DUK_TAG_BOOLEAN << 16) + (bval) }') + genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (const void *) (ptr), (DUK_TAG_OBJECT << 16) }') + genc.emitLine('#define DUK__TVAL_STRING(ptr) { (const void *) (ptr), (DUK_TAG_STRING << 16) }') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') + genc.emitLine('#define DUK__TVAL_UNDEFINED() { (DUK_TAG_UNDEFINED << 16), (const void *) NULL }') + genc.emitLine('#define DUK__TVAL_NULL() { (DUK_TAG_NULL << 16), (const void *) NULL }') + genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (DUK_TAG_LIGHTFUNC << 16) + (flags), (duk_rom_funcptr) (func) }') + genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { (DUK_TAG_BOOLEAN << 16) + (bval), 0 }') + genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (DUK_TAG_OBJECT << 16), (const void *) (ptr) }') + genc.emitLine('#define DUK__TVAL_STRING(ptr) { (DUK_TAG_STRING << 16), (const void *) (ptr) }') + genc.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') + genc.emitLine('#define DUK__TVAL_UNDEFINED() { (DUK_TAG_UNDEFINED << 16), (const void *) NULL }') + genc.emitLine('#define DUK__TVAL_NULL() { (DUK_TAG_NULL << 16), (const void *) NULL }') + genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { (DUK_TAG_LIGHTFUNC << 16) + (flags), (duk_rom_funcptr) (func) }') + genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { (DUK_TAG_BOOLEAN << 16) + (bval), 0 }') + genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { (DUK_TAG_OBJECT << 16), (const void *) (ptr) }') + genc.emitLine('#define DUK__TVAL_STRING(ptr) { (DUK_TAG_STRING << 16), (const void *) (ptr) }') + genc.emitLine('#else') + genc.emitLine('#error invalid endianness defines') + genc.emitLine('#endif') + genc.emitLine('#else /* DUK_USE_PACKED_TVAL */') + genc.emitLine('#define DUK__TVAL_NUMBER(hostbytes) { DUK__TAG_NUMBER, 0, hostbytes }') # bytes already in host order + genc.emitLine('#define DUK__TVAL_UNDEFINED() { DUK_TAG_UNDEFINED, 0, {0,0,0,0,0,0,0,0} }') + genc.emitLine('#define DUK__TVAL_NULL() { DUK_TAG_NULL, 0, {0,0,0,0,0,0,0,0} }') + genc.emitLine('#define DUK__TVAL_BOOLEAN(bval) { DUK_TAG_BOOLEAN, 0, (bval), 0 }') + genc.emitLine('#define DUK__TVAL_OBJECT(ptr) { DUK_TAG_OBJECT, 0, (const duk_heaphdr *) (ptr) }') + genc.emitLine('#define DUK__TVAL_STRING(ptr) { DUK_TAG_STRING, 0, (const duk_heaphdr *) (ptr) }') + genc.emitLine('#define DUK__TVAL_LIGHTFUNC(func,flags) { DUK_TAG_LIGHTFUNC, (flags), (duk_rom_funcptr) (func) }') + genc.emitLine('#endif /* DUK_USE_PACKED_TVAL */') + genc.emitLine('#define DUK__TVAL_ACCESSOR(getter,setter) { (const duk_hobject *) (getter), (const duk_hobject *) (setter) }') + +# Emit ROM objects source: the object/function headers themselves, property +# table structs for different property table sizes/types, and property table +# initializers. +def rom_emit_objects(genc, meta, bi_str_map): + objs = meta['objects'] + id_to_bidx = meta['_objid_to_bidx'] + + # Table for compressed ROM pointers; reserve high range of compressed pointer + # values for this purpose. This must contain all ROM pointers that might be + # referenced (all objects, strings, and property tables at least). + romptr_compress_list = [] + def compress_rom_ptr(x): + if x == 'NULL': + return 0 + try: + idx = romptr_compress_list.index(x) + res = ROMPTR_FIRST + idx + except ValueError: + romptr_compress_list.append(x) + res = ROMPTR_FIRST + len(romptr_compress_list) - 1 + assert(res <= 0xffff) + return res + + # Need string and object maps (id -> C symbol name) early. + bi_obj_map = {} # object id -> initializer variable name + for idx,obj in enumerate(objs): + bi_obj_map[obj['id']] = 'duk_obj_%d' % idx + + # Add built-in strings and objects to compressed ROM pointers first. + for k in sorted(bi_str_map.keys()): + compress_rom_ptr('&%s' % bi_str_map[k]) + for k in sorted(bi_obj_map.keys()): + compress_rom_ptr('&%s' % bi_obj_map[k]) + + # Property attributes lookup, map metadata attribute string into a + # C initializer. + attr_lookup = { + '': 'DUK_PROPDESC_FLAGS_NONE', + 'w': 'DUK_PROPDESC_FLAGS_W', + 'e': 'DUK_PROPDESC_FLAGS_E', + 'c': 'DUK_PROPDESC_FLAGS_C', + 'we': 'DUK_PROPDESC_FLAGS_WE', + 'wc': 'DUK_PROPDESC_FLAGS_WC', + 'ec': 'DUK_PROPDESC_FLAGS_EC', + 'wec': 'DUK_PROPDESC_FLAGS_WEC', + 'a': 'DUK_PROPDESC_FLAGS_NONE|DUK_PROPDESC_FLAG_ACCESSOR', + 'ea': 'DUK_PROPDESC_FLAGS_E|DUK_PROPDESC_FLAG_ACCESSOR', + 'ca': 'DUK_PROPDESC_FLAGS_C|DUK_PROPDESC_FLAG_ACCESSOR', + 'eca': 'DUK_PROPDESC_FLAGS_EC|DUK_PROPDESC_FLAG_ACCESSOR', + } + + # Emit property table structs. These are very complex because + # property count *and* individual property type affect the fields + # in the initializer, properties can be data properties or accessor + # properties or different duk_tval types. There are also several + # property table memory layouts, each with a different ordering of + # keys, values, etc. Union initializers would make things a bit + # easier but they're not very portable (being C99). + # + # The easy solution is to use a separate initializer type for each + # property type. Could also cache and reuse identical initializers + # but there'd be very few of them so it's more straightforward to + # not reuse the structs. + # + # NOTE: naming is a bit inconsistent here, duk_tval is used also + # to refer to property value initializers like a getter/setter pair. + + genc.emitLine('#if defined(DUK_USE_HOBJECT_LAYOUT_1)') + for idx,obj in enumerate(objs): + numprops = len(obj['properties']) + if numprops == 0: + continue + tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) + tmp += 'struct duk_romprops_%d { ' % idx + for idx,val in enumerate(obj['properties']): + tmp += 'const duk_hstring *key%d; ' % idx + for idx,val in enumerate(obj['properties']): + # XXX: fastint support + tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) + for idx,val in enumerate(obj['properties']): + tmp += 'duk_uint8_t flags%d; ' % idx + tmp += '};' + genc.emitLine(tmp) + genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_2)') + for idx,obj in enumerate(objs): + numprops = len(obj['properties']) + if numprops == 0: + continue + tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) + tmp += 'struct duk_romprops_%d { ' % idx + for idx,val in enumerate(obj['properties']): + # XXX: fastint support + tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) + for idx,val in enumerate(obj['properties']): + tmp += 'const duk_hstring *key%d; ' % idx + for idx,val in enumerate(obj['properties']): + tmp += 'duk_uint8_t flags%d; ' % idx + # Padding follows for flags, but we don't need to emit it + # (at the moment there is never an array or hash part). + tmp += '};' + genc.emitLine(tmp) + genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_3)') + for idx,obj in enumerate(objs): + numprops = len(obj['properties']) + if numprops == 0: + continue + tmp = 'typedef struct duk_romprops_%d duk_romprops_%d; ' % (idx, idx) + tmp += 'struct duk_romprops_%d { ' % idx + for idx,val in enumerate(obj['properties']): + # XXX: fastint support + tmp += '%s val%d; ' % (rom_get_value_initializer_type(meta, val, bi_str_map, bi_obj_map), idx) + # No array values + for idx,val in enumerate(obj['properties']): + tmp += 'const duk_hstring *key%d; ' % idx + # No hash index + for idx,val in enumerate(obj['properties']): + tmp += 'duk_uint8_t flags%d; ' % idx + tmp += '};' + genc.emitLine(tmp) + genc.emitLine('#else') + genc.emitLine('#error invalid object layout') + genc.emitLine('#endif') + genc.emitLine('') + + # Forward declare all property tables so that objects can reference them. + # Also pointer compress them. + + for idx,obj in enumerate(objs): + numprops = len(obj['properties']) + if numprops == 0: + continue + + # We would like to use DUK_INTERNAL_DECL here, but that maps + # to "static const" in a single file build which has C++ + # portability issues: you can't forward declare a static const. + # We can't reorder the property tables to avoid this because + # there are cyclic references. So, as the current workaround, + # declare as external. + genc.emitLine('DUK_EXTERNAL_DECL const duk_romprops_%d duk_prop_%d;' % (idx, idx)) + + # Add property tables to ROM compressed pointers too. + compress_rom_ptr('&duk_prop_%d' % idx) + genc.emitLine('') + + # Forward declare all objects so that objects can reference them, + # e.g. internal prototype reference. + + for idx,obj in enumerate(objs): + # Careful with C++: must avoid redefining a non-extern const. + # See commentary above for duk_prop_%d forward declarations. + if obj.get('callable', False): + genc.emitLine('DUK_EXTERNAL_DECL const duk_romfun duk_obj_%d;' % idx) + elif obj.get('class') == 'Array': + genc.emitLine('DUK_EXTERNAL_DECL const duk_romarr duk_obj_%d;' % idx) + else: + genc.emitLine('DUK_EXTERNAL_DECL const duk_romobj duk_obj_%d;' % idx) + genc.emitLine('') + + # Define objects, reference property tables. Objects will be + # logically non-extensible so also leave their extensible flag + # cleared despite what metadata requests; the runtime code expects + # ROM objects to be non-extensible. + for idx,obj in enumerate(objs): + numprops = len(obj['properties']) + + isfunc = obj.get('callable', False) + + if isfunc: + tmp = 'DUK_EXTERNAL const duk_romfun duk_obj_%d = ' % idx + elif obj.get('class') == 'Array': + tmp = 'DUK_EXTERNAL const duk_romarr duk_obj_%d = ' % idx + else: + tmp = 'DUK_EXTERNAL const duk_romobj duk_obj_%d = ' % idx + + flags = [ 'DUK_HTYPE_OBJECT', 'DUK_HEAPHDR_FLAG_READONLY' ] + if isfunc: + flags.append('DUK_HOBJECT_FLAG_NATFUNC') + flags.append('DUK_HOBJECT_FLAG_STRICT') + flags.append('DUK_HOBJECT_FLAG_NEWENV') + if obj.get('constructable', False): + flags.append('DUK_HOBJECT_FLAG_CONSTRUCTABLE') + if obj.get('class') == 'Array': + flags.append('DUK_HOBJECT_FLAG_EXOTIC_ARRAY') + flags.append('DUK_HOBJECT_CLASS_AS_FLAGS(%d)' % class_to_number(obj['class'])) # XXX: use constant, not number + + refcount = 1 # refcount is faked to be always 1 + if numprops == 0: + props = 'NULL' + else: + props = '&duk_prop_%d' % idx + props_enc16 = compress_rom_ptr(props) + + if obj.has_key('internal_prototype'): + iproto = '&%s' % bi_obj_map[obj['internal_prototype']] + else: + iproto = 'NULL' + iproto_enc16 = compress_rom_ptr(iproto) + + e_size = numprops + e_next = e_size + a_size = 0 # never an array part for now + h_size = 0 # never a hash for now; not appropriate for perf relevant builds + + if isfunc: + nativefunc = obj['native'] + if obj.get('varargs', False): + nargs = 'DUK_VARARGS' + elif obj.has_key('nargs'): + nargs = '%d' % obj['nargs'] + else: + assert(False) # 'nargs' should be defaulted from 'length' at metadata load + magic = '%d' % resolve_magic(obj.get('magic', None), id_to_bidx) + else: + nativefunc = 'dummy' + nargs = '0' + magic = '0' + + assert(a_size == 0) + assert(h_size == 0) + if isfunc: + tmp += 'DUK__ROMFUN_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d,%s,%s,%s);' % \ + ('|'.join(flags), refcount, props, props_enc16, \ + iproto, iproto_enc16, e_size, e_next, a_size, h_size, \ + nativefunc, nargs, magic) + elif obj.get('class') == 'Array': + arrlen = 0 + tmp += 'DUK__ROMARR_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d,%d);' % \ + ('|'.join(flags), refcount, props, props_enc16, \ + iproto, iproto_enc16, e_size, e_next, a_size, h_size, arrlen) + else: + tmp += 'DUK__ROMOBJ_INIT(%s,%d,%s,%d,%s,%d,%d,%d,%d,%d);' % \ + ('|'.join(flags), refcount, props, props_enc16, \ + iproto, iproto_enc16, e_size, e_next, a_size, h_size) + + genc.emitLine(tmp) + + # Property tables. Can reference arbitrary strings and objects as + # they're defined before them. + + # Properties will be non-configurable, but must be writable so that + # standard property semantics allow shadowing properties to be + # established in inherited objects (e.g. "var obj={}; obj.toString + # = myToString"). Enumerable can also be kept. + + def _prepAttrs(val): + attrs = val['attributes'] + assert('c' not in attrs) + return attr_lookup[attrs] + + def _emitPropTableInitializer(idx, obj, layout): + init_vals = [] + init_keys = [] + init_flags = [] + + numprops = len(obj['properties']) + for val in obj['properties']: + init_keys.append('(const duk_hstring *)&%s' % bi_str_map[val['key']]) + for val in obj['properties']: + # XXX: fastint support + init_vals.append('%s' % rom_get_value_initializer_literal(meta, val, bi_str_map, bi_obj_map)) + for val in obj['properties']: + init_flags.append('%s' % _prepAttrs(val)) + + if layout == 1: + initlist = init_keys + init_vals + init_flags + elif layout == 2: + initlist = init_vals + init_keys + init_flags + elif layout == 3: + # Same as layout 2 now, no hash/array + initlist = init_vals + init_keys + init_flags + + if len(initlist) > 0: + genc.emitLine('DUK_EXTERNAL const duk_romprops_%d duk_prop_%d = {%s};' % (idx, idx, ','.join(initlist))) + + genc.emitLine('#if defined(DUK_USE_HOBJECT_LAYOUT_1)') + for idx,obj in enumerate(objs): + _emitPropTableInitializer(idx, obj, 1) + genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_2)') + for idx,obj in enumerate(objs): + _emitPropTableInitializer(idx, obj, 2) + genc.emitLine('#elif defined(DUK_USE_HOBJECT_LAYOUT_3)') + for idx,obj in enumerate(objs): + _emitPropTableInitializer(idx, obj, 3) + genc.emitLine('#else') + genc.emitLine('#error invalid object layout') + genc.emitLine('#endif') + genc.emitLine('') + + # Emit a list of ROM builtins (those objects needing a bidx). + # + # cdecl > explain const int * const foo; + # declare foo as const pointer to const int + + count_bidx = 0 + for bi in objs: + if bi.get('bidx_used', False): + count_bidx += 1 + genc.emitLine('DUK_INTERNAL const duk_hobject * const duk_rom_builtins_bidx[%d] = {' % count_bidx) + for bi in objs: + if not bi.get('bidx_used', False): + continue # for this we want the toplevel objects only + genc.emitLine('\t(const duk_hobject *) &%s,' % bi_obj_map[bi['id']]) + genc.emitLine('};') + + # Emit a table of compressed ROM pointers. We must be able to + # compress ROM pointers at compile time so we assign running + # indices to them. User pointer compression macros must use this + # array to encode/decode ROM pointers. + + genc.emitLine('') + genc.emitLine('#if defined(DUK_USE_ROM_OBJECTS) && defined(DUK_USE_HEAPPTR16)') + genc.emitLine('DUK_EXTERNAL const void * const duk_rom_compressed_pointers[%d] = {' % (len(romptr_compress_list) + 1)) + for idx,ptr in enumerate(romptr_compress_list): + genc.emitLine('\t(const void *) %s, /* 0x%04x */' % (ptr, ROMPTR_FIRST + idx)) + romptr_highest = ROMPTR_FIRST + len(romptr_compress_list) - 1 + genc.emitLine('\tNULL') # for convenience + genc.emitLine('};') + genc.emitLine('#endif') + + print('%d compressed rom pointers (used range is [0x%04x,0x%04x], %d space left)' % \ + (len(romptr_compress_list), ROMPTR_FIRST, romptr_highest, 0xffff - romptr_highest)) + + # Undefine helpers. + genc.emitLine('') + for i in [ + 'DUK__STRHASH16', + 'DUK__STRHASH32', + 'DUK__DBLBYTES', + 'DUK__TVAL_NUMBER', + 'DUK__TVAL_UNDEFINED', + 'DUK__TVAL_NULL', + 'DUK__TVAL_BOOLEAN', + 'DUK__TVAL_OBJECT', + 'DUK__TVAL_STRING', + 'DUK__STRINIT', + 'DUK__ROMOBJ_INIT', + 'DUK__ROMFUN_INIT' + ]: + genc.emitLine('#undef ' + i) + + return romptr_compress_list + +# Emit ROM objects header. +def rom_emit_objects_header(genc, meta): + bidx = 0 + for bi in meta['objects']: + if not bi.get('bidx_used', False): + continue # for this we want the toplevel objects only + genc.emitDefine('DUK_BIDX_' + '_'.join(bi['id'].upper().split('_')[1:]), bidx) # bi_foo_bar -> FOO_BAR + bidx += 1 + count_bidx = bidx + genc.emitDefine('DUK_NUM_BUILTINS', count_bidx) + genc.emitDefine('DUK_NUM_BIDX_BUILTINS', count_bidx) + genc.emitDefine('DUK_NUM_ALL_BUILTINS', len(meta['objects'])) + genc.emitLine('') + genc.emitLine('#if !defined(DUK_SINGLE_FILE)') # C++ static const workaround + genc.emitLine('DUK_INTERNAL_DECL const duk_hobject * const duk_rom_builtins_bidx[%d];' % count_bidx) + genc.emitLine('#endif') + + # XXX: missing declarations here, not an issue for single source build. + # Add missing declarations. + # XXX: For example, 'DUK_EXTERNAL_DECL ... duk_rom_compressed_pointers[]' is missing. + +# +# Shared for both RAM and ROM +# + +def emit_header_native_function_declarations(genc, meta): + emitted = {} # To suppress duplicates + funclist = [] + def _emit(fname): + if not emitted.has_key(fname): + emitted[fname] = True + funclist.append(fname) + + for o in meta['objects']: + if o.has_key('native'): + _emit(o['native']) + + for p in o['properties']: + v = p['value'] + if isinstance(v, dict) and v['type'] == 'lightfunc': + assert(v.has_key('native')) + _emit(v['native']) + #print('Lightfunc function declaration: %r' % v['native']) + + for fname in funclist: + # Visibility depends on whether the function is Duktape internal or user. + # Use a simple prefix for now. + if fname[:4] == 'duk_': + genc.emitLine('DUK_INTERNAL_DECL duk_ret_t %s(duk_context *ctx);' % fname) + else: + genc.emitLine('extern duk_ret_t %s(duk_context *ctx);' % fname) + +# +# Main +# + +def main(): + parser = optparse.OptionParser() + parser.add_option('--buildinfo', dest='buildinfo', help='Build info, JSON format') + parser.add_option('--used-stridx-metadata', dest='used_stridx_metadata', help='DUK_STRIDX_xxx used by source/headers, JSON format') + parser.add_option('--strings-metadata', dest='strings_metadata', help='Built-in strings metadata file, YAML format') + parser.add_option('--objects-metadata', dest='objects_metadata', help='Built-in objects metadata file, YAML format') + parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', action='append', default=[], help='User strings and objects to add, YAML format (can be repeated for multiple overrides)') + parser.add_option('--ram-support', dest='ram_support', action='store_true', default=False, help='Support RAM strings/objects') + parser.add_option('--rom-support', dest='rom_support', action='store_true', default=False, help='Support ROM strings/objects (increases output size considerably)') + parser.add_option('--rom-auto-lightfunc', dest='rom_auto_lightfunc', action='store_true', default=False, help='Convert ROM built-in function properties into lightfuncs automatically whenever possible') + parser.add_option('--out-header', dest='out_header', help='Output header file') + parser.add_option('--out-source', dest='out_source', help='Output source file') + parser.add_option('--out-metadata-json', dest='out_metadata_json', help='Output metadata file') + parser.add_option('--dev-dump-final-ram-metadata', dest='dev_dump_final_ram_metadata', help='Development option') + parser.add_option('--dev-dump-final-rom-metadata', dest='dev_dump_final_rom_metadata', help='Development option') + (opts, args) = parser.parse_args() + + # Options processing. + + if opts.buildinfo is None: + raise Exception('missing buildinfo') + + with open(opts.buildinfo, 'rb') as f: + build_info = dukutil.json_decode(f.read().strip()) + + # Read in metadata files, normalizing and merging as necessary. + + ram_meta = load_metadata(opts, rom=False, build_info=build_info) + rom_meta = load_metadata(opts, rom=True, build_info=build_info) + if opts.dev_dump_final_ram_metadata is not None: + dump_metadata(ram_meta, opts.dev_dump_final_ram_metadata) + if opts.dev_dump_final_rom_metadata is not None: + dump_metadata(rom_meta, opts.dev_dump_final_rom_metadata) + + # Create RAM init data bitstreams. + + ramstr_data, ramstr_maxlen = gen_ramstr_initdata_bitpacked(ram_meta) + ram_native_funcs, ram_natfunc_name_to_natidx = get_ramobj_native_func_maps(ram_meta) + + if opts.ram_support: + ramobj_data_le = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'little') + ramobj_data_be = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'big') + ramobj_data_me = gen_ramobj_initdata_bitpacked(ram_meta, ram_native_funcs, ram_natfunc_name_to_natidx, 'mixed') + + # Write source and header files. + + gc_src = dukutil.GenerateC() + gc_src.emitHeader('genbuiltins.py') + gc_src.emitLine('#include "duk_internal.h"') + gc_src.emitLine('') + gc_src.emitLine('#if defined(DUK_USE_ROM_STRINGS)') + if opts.rom_support: + rom_bi_str_map = rom_emit_strings_source(gc_src, rom_meta) + rom_emit_object_initializer_types_and_macros(gc_src) + rom_emit_objects(gc_src, rom_meta, rom_bi_str_map) + else: + gc_src.emitLine('#error ROM support not enabled, rerun prepare_sources.py with --rom-support') + gc_src.emitLine('#else /* DUK_USE_ROM_STRINGS */') + emit_ramstr_source_strinit_data(gc_src, ramstr_data) + gc_src.emitLine('#endif /* DUK_USE_ROM_STRINGS */') + gc_src.emitLine('') + gc_src.emitLine('#if defined(DUK_USE_ROM_OBJECTS)') + if opts.rom_support: + gc_src.emitLine('#if !defined(DUK_USE_ROM_STRINGS)') + gc_src.emitLine('#error DUK_USE_ROM_OBJECTS requires DUK_USE_ROM_STRINGS') + gc_src.emitLine('#endif') + else: + gc_src.emitLine('#error ROM support not enabled, rerun prepare_sources.py with --rom-support') + gc_src.emitLine('#else /* DUK_USE_ROM_OBJECTS */') + if opts.ram_support: + emit_ramobj_source_nativefunc_array(gc_src, ram_native_funcs) # endian independent + gc_src.emitLine('#if defined(DUK_USE_DOUBLE_LE)') + emit_ramobj_source_objinit_data(gc_src, ramobj_data_le) + gc_src.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') + emit_ramobj_source_objinit_data(gc_src, ramobj_data_be) + gc_src.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') + emit_ramobj_source_objinit_data(gc_src, ramobj_data_me) + gc_src.emitLine('#else') + gc_src.emitLine('#error invalid endianness defines') + gc_src.emitLine('#endif') + else: + gc_src.emitLine('#error RAM support not enabled, rerun prepare_sources.py with --ram-support') + gc_src.emitLine('#endif /* DUK_USE_ROM_OBJECTS */') + + gc_hdr = dukutil.GenerateC() + gc_hdr.emitHeader('genbuiltins.py') + gc_hdr.emitLine('#ifndef DUK_BUILTINS_H_INCLUDED') + gc_hdr.emitLine('#define DUK_BUILTINS_H_INCLUDED') + gc_hdr.emitLine('') + gc_hdr.emitLine('#if defined(DUK_USE_ROM_STRINGS)') + if opts.rom_support: + emit_header_stridx_defines(gc_hdr, rom_meta) + rom_emit_strings_header(gc_hdr, rom_meta) + else: + gc_hdr.emitLine('#error ROM support not enabled, rerun prepare_sources.py with --rom-support') + gc_hdr.emitLine('#else /* DUK_USE_ROM_STRINGS */') + if opts.ram_support: + emit_header_stridx_defines(gc_hdr, ram_meta) + emit_ramstr_header_strinit_defines(gc_hdr, ram_meta, ramstr_data, ramstr_maxlen) + else: + gc_hdr.emitLine('#error RAM support not enabled, rerun prepare_sources.py with --ram-support') + gc_hdr.emitLine('#endif /* DUK_USE_ROM_STRINGS */') + gc_hdr.emitLine('') + gc_hdr.emitLine('#if defined(DUK_USE_ROM_OBJECTS)') + if opts.rom_support: + # Currently DUK_USE_ROM_PTRCOMP_FIRST must match our fixed + # define, and the two must be updated in sync. Catch any + # mismatch to avoid difficult to diagnose errors. + gc_hdr.emitLine('#if !defined(DUK_USE_ROM_PTRCOMP_FIRST)') + gc_hdr.emitLine('#error missing DUK_USE_ROM_PTRCOMP_FIRST define') + gc_hdr.emitLine('#endif') + gc_hdr.emitLine('#if (DUK_USE_ROM_PTRCOMP_FIRST != %dL)' % ROMPTR_FIRST) + gc_hdr.emitLine('#error DUK_USE_ROM_PTRCOMP_FIRST must match ROMPTR_FIRST in genbuiltins.py (%d), update manually and re-dist' % ROMPTR_FIRST) + gc_hdr.emitLine('#endif') + emit_header_native_function_declarations(gc_hdr, rom_meta) + rom_emit_objects_header(gc_hdr, rom_meta) + else: + gc_hdr.emitLine('#error RAM support not enabled, rerun prepare_sources.py with --ram-support') + gc_hdr.emitLine('#else /* DUK_USE_ROM_OBJECTS */') + if opts.ram_support: + emit_header_native_function_declarations(gc_hdr, ram_meta) + emit_ramobj_header_nativefunc_array(gc_hdr, ram_native_funcs) + emit_ramobj_header_objects(gc_hdr, ram_meta) + gc_hdr.emitLine('#if defined(DUK_USE_DOUBLE_LE)') + emit_ramobj_header_initdata(gc_hdr, ramobj_data_le) + gc_hdr.emitLine('#elif defined(DUK_USE_DOUBLE_BE)') + emit_ramobj_header_initdata(gc_hdr, ramobj_data_be) + gc_hdr.emitLine('#elif defined(DUK_USE_DOUBLE_ME)') + emit_ramobj_header_initdata(gc_hdr, ramobj_data_me) + gc_hdr.emitLine('#else') + gc_hdr.emitLine('#error invalid endianness defines') + gc_hdr.emitLine('#endif') + else: + gc_hdr.emitLine('#error RAM support not enabled, rerun prepare_sources.py with --ram-support') + gc_hdr.emitLine('#endif /* DUK_USE_ROM_OBJECTS */') + gc_hdr.emitLine('#endif /* DUK_BUILTINS_H_INCLUDED */') + + with open(opts.out_source, 'wb') as f: + f.write(gc_src.getString()) + + with open(opts.out_header, 'wb') as f: + f.write(gc_hdr.getString()) + + # Write a JSON file with build metadata, e.g. built-in strings. + + ver = long(build_info['version']) + plain_strs = [] + base64_strs = [] + str_objs = [] + for s in ram_meta['strings_stridx']: # XXX: provide all lists? + t1 = bytes_to_unicode(s['str']) + t2 = unicode_to_bytes(s['str']).encode('base64').strip() + plain_strs.append(t1) + base64_strs.append(t2) + str_objs.append({ + 'plain': t1, 'base64': t2, 'define': s['define'] + }) + meta = { + 'comment': 'Metadata for Duktape build', + 'duk_version': ver, + 'duk_version_string': '%d.%d.%d' % (ver / 10000, (ver / 100) % 100, ver % 100), + 'git_commit': build_info['git_commit'], + 'git_branch': build_info['git_branch'], + 'git_describe': build_info['git_describe'], + 'builtin_strings': plain_strs, + 'builtin_strings_base64': base64_strs, + 'builtin_strings_info': str_objs + } + + with open(opts.out_metadata_json, 'wb') as f: + f.write(json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True)) + +if __name__ == '__main__': + main() diff --git a/tools/genconfig.py b/tools/genconfig.py new file mode 100644 index 00000000..ba8a3dc7 --- /dev/null +++ b/tools/genconfig.py @@ -0,0 +1,1530 @@ +#!/usr/bin/env python2 +# +# Process Duktape option metadata and produce various useful outputs: +# +# - duk_config.h with specific or autodetected platform, compiler, and +# architecture; forced options; sanity checks; etc +# - option documentation for Duktape 1.x feature options (DUK_OPT_xxx) +# - option documentation for Duktape 1.x/2.x config options (DUK_USE_xxx) +# +# Genconfig tries to build all outputs based on modular metadata, so that +# managing a large number of config options (which is hard to avoid given +# the wide range of targets Duktape supports) remains maintainable. +# +# Genconfig does *not* try to support all exotic platforms out there. +# Instead, the goal is to allow the metadata to be extended, or to provide +# a reasonable starting point for manual duk_config.h tweaking. +# +# For Duktape 1.3 release the main goal was to autogenerate a Duktape 1.2 +# compatible "autodetect" header from legacy snippets, with other outputs +# being experimental. For Duktape 1.4 duk_config.h is always created from +# modular sources. +# + +import os +import sys +import re +import json +import yaml +import optparse +import tarfile +import tempfile +import atexit +import shutil +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +# +# Globals holding scanned metadata, helper snippets, etc +# + +# Metadata to scan from config files. +use_defs = None +use_defs_list = None +opt_defs = None +opt_defs_list = None +use_tags = None +use_tags_list = None +tags_meta = None +required_use_meta_keys = [ + 'define', + 'introduced', + 'default', + 'tags', + 'description' +] +allowed_use_meta_keys = [ + 'define', + 'feature_enables', + 'feature_disables', + 'feature_snippet', + 'feature_no_default', + 'related_feature_defines', + 'introduced', + 'deprecated', + 'removed', + 'unused', + 'requires', + 'conflicts', + 'related', + 'default', + 'tags', + 'description', +] +required_opt_meta_keys = [ + 'define', + 'introduced', + 'tags', + 'description' +] +allowed_opt_meta_keys = [ + 'define', + 'introduced', + 'deprecated', + 'removed', + 'unused', + 'requires', + 'conflicts', + 'related', + 'tags', + 'description' +] + +# Preferred tag order for option documentation. +doc_tag_order = [ + 'portability', + 'memory', + 'lowmemory', + 'ecmascript', + 'execution', + 'debugger', + 'debug', + 'development' +] + +# Preferred tag order for generated C header files. +header_tag_order = doc_tag_order + +# Helper headers snippets. +helper_snippets = None + +# Assume these provides come from outside. +assumed_provides = { + 'DUK_SINGLE_FILE': True, # compiling Duktape from a single source file (duktape.c) version + 'DUK_COMPILING_DUKTAPE': True, # compiling Duktape (not user application) + 'DUK_CONFIG_H_INCLUDED': True, # artifact, include guard +} + +# Platform files must provide at least these (additional checks +# in validate_platform_file()). Fill-ins provide missing optionals. +platform_required_provides = [ + 'DUK_USE_OS_STRING' # must be #define'd +] + +# Architecture files must provide at least these (additional checks +# in validate_architecture_file()). Fill-ins provide missing optionals. +architecture_required_provides = [ + 'DUK_USE_ARCH_STRING' +] + +# Compiler files must provide at least these (additional checks +# in validate_compiler_file()). Fill-ins provide missing optionals. +compiler_required_provides = [ + # Compilers need a lot of defines; missing defines are automatically + # filled in with defaults (which are mostly compiler independent), so + # the requires define list is not very large. + + 'DUK_USE_COMPILER_STRING', # must be #define'd + 'DUK_USE_BRANCH_HINTS', # may be #undef'd, as long as provided + 'DUK_USE_VARIADIC_MACROS', # may be #undef'd, as long as provided + 'DUK_USE_UNION_INITIALIZERS' # may be #undef'd, as long as provided +] + +# +# Miscellaneous helpers +# + +def get_auto_delete_tempdir(): + tmpdir = tempfile.mkdtemp(suffix='-genconfig') + def _f(dirname): + #print('Deleting temporary directory: %r' % dirname) + if os.path.isdir(dirname) and '-genconfig' in dirname: + shutil.rmtree(dirname) + atexit.register(_f, tmpdir) + return tmpdir + +def strip_comments_from_lines(lines): + # Not exact but close enough. Doesn't handle string literals etc, + # but these are not a concrete issue for scanning preprocessor + # #define references. + # + # Comment contents are stripped of any DUK_ prefixed text to avoid + # incorrect requires/provides detection. Other comment text is kept; + # in particular a "/* redefine */" comment must remain intact here. + # (The 'redefine' hack is not actively needed now.) + # + # Avoid Python 2.6 vs. Python 2.7 argument differences. + + def censor(x): + return re.sub(re.compile('DUK_\w+', re.MULTILINE), 'xxx', x.group(0)) + + tmp = '\n'.join(lines) + tmp = re.sub(re.compile('/\*.*?\*/', re.MULTILINE | re.DOTALL), censor, tmp) + tmp = re.sub(re.compile('//.*?$', re.MULTILINE), censor, tmp) + return tmp.split('\n') + +# Header snippet representation: lines, provides defines, requires defines. +re_line_provides = re.compile(r'^#(?:define|undef)\s+(\w+).*$') +re_line_requires = re.compile(r'(DUK_[A-Z0-9_]+)') # uppercase only, don't match DUK_USE_xxx for example +class Snippet: + lines = None # lines of text and/or snippets + provides = None # map from define to 'True' for now + requires = None # map from define to 'True' for now + + def __init__(self, lines, provides=None, requires=None, autoscan_requires=True, autoscan_provides=True): + self.lines = [] + if not isinstance(lines, list): + raise Exception('Snippet constructor must be a list (not e.g. a string): %s' % repr(lines)) + for line in lines: + if isinstance(line, str): + self.lines.append(line) + elif isinstance(line, unicode): + self.lines.append(line.encode('utf-8')) + else: + raise Exception('invalid line: %r' % line) + self.provides = {} + if provides is not None: + for k in provides.keys(): + self.provides[k] = True + self.requires = {} + if requires is not None: + for k in requires.keys(): + self.requires[k] = True + + stripped_lines = strip_comments_from_lines(lines) + # for line in stripped_lines: print(line) + + for line in stripped_lines: + # Careful with order, snippet may self-reference its own + # defines in which case there's no outward dependency. + # (This is not 100% because the order of require/provide + # matters and this is not handled now.) + # + # Also, some snippets may #undef/#define another define but + # they don't "provide" the define as such. Such redefinitions + # are marked "/* redefine */" in the snippets. They're best + # avoided (and not currently needed in Duktape 1.4.0). + + if autoscan_provides: + m = re_line_provides.match(line) + if m is not None and '/* redefine */' not in line and \ + len(m.group(1)) > 0 and m.group(1)[-1] != '_': + # Don't allow e.g. DUK_USE_ which results from matching DUK_USE_xxx + #print('PROVIDES: %r' % m.group(1)) + self.provides[m.group(1)] = True + if autoscan_requires: + matches = re.findall(re_line_requires, line) + for m in matches: + if len(m) > 0 and m[-1] == '_': + # Don't allow e.g. DUK_USE_ which results from matching DUK_USE_xxx + pass + elif m[:7] == 'DUK_OPT': + # DUK_OPT_xxx always come from outside + pass + elif m[:7] == 'DUK_USE': + # DUK_USE_xxx are internal and they should not be 'requirements' + pass + elif self.provides.has_key(m): + # Snippet provides it's own require; omit + pass + else: + #print('REQUIRES: %r' % m) + self.requires[m] = True + + def fromFile(cls, filename): + lines = [] + with open(filename, 'rb') as f: + for line in f: + if line[-1] == '\n': + line = line[:-1] + if line[:8] == '#snippet': + m = re.match(r'#snippet\s+"(.*?)"', line) + # XXX: better plumbing for lookup path + sub_fn = os.path.normpath(os.path.join(filename, '..', '..', 'header-snippets', m.group(1))) + #print('#snippet ' + sub_fn) + sn = Snippet.fromFile(sub_fn) + lines += sn.lines + else: + lines.append(line) + return Snippet(lines, autoscan_requires=True, autoscan_provides=True) + fromFile = classmethod(fromFile) + + def merge(cls, snippets): + ret = Snippet([], [], []) + for s in snippets: + ret.lines += s.lines + for k in s.provides.keys(): + ret.provides[k] = True + for k in s.requires.keys(): + ret.requires[k] = True + return ret + merge = classmethod(merge) + +# Helper for building a text file from individual lines, injected files, etc. +# Inserted values are converted to Snippets so that their provides/requires +# information can be tracked. When non-C outputs are created, these will be +# bogus but ignored. +class FileBuilder: + vals = None # snippet list + base_dir = None + use_cpp_warning = False + + def __init__(self, base_dir=None, use_cpp_warning=False): + self.vals = [] + self.base_dir = base_dir + self.use_cpp_warning = use_cpp_warning + + def line(self, line): + self.vals.append(Snippet([ line ])) + + def lines(self, lines): + if len(lines) > 0 and lines[-1] == '\n': + lines = lines[:-1] # strip last newline to avoid empty line + self.vals.append(Snippet(lines.split('\n'))) + + def empty(self): + self.vals.append(Snippet([ '' ])) + + def rst_heading(self, title, char, doubled=False): + tmp = [] + if doubled: + tmp.append(char * len(title)) + tmp.append(title) + tmp.append(char * len(title)) + self.vals.append(Snippet(tmp)) + + def snippet_relative(self, fn): + sn = Snippet.fromFile(os.path.join(self.base_dir, fn)) + self.vals.append(sn) + return sn + + def snippet_absolute(self, fn): + sn = Snippet.fromFile(fn) + self.vals.append(sn) + return sn + + def cpp_error(self, msg): + # XXX: assume no newlines etc + self.vals.append(Snippet([ '#error %s' % msg ])) + + def cpp_warning(self, msg): + # XXX: assume no newlines etc + # XXX: support compiler specific warning mechanisms + if self.use_cpp_warning: + # C preprocessor '#warning' is often supported + self.vals.append(Snippet([ '#warning %s' % msg ])) + else: + self.vals.append(Snippet([ '/* WARNING: %s */' % msg ])) + + def cpp_warning_or_error(self, msg, is_error=True): + if is_error: + self.cpp_error(msg) + else: + self.cpp_warning(msg) + + def chdr_comment_line(self, msg): + self.vals.append(Snippet([ '/* %s */' % msg ])) + + def chdr_block_heading(self, msg): + lines = [] + lines.append('') + lines.append('/*') + lines.append(' * ' + msg) + lines.append(' */') + lines.append('') + self.vals.append(Snippet(lines)) + + def join(self): + tmp = [] + for line in self.vals: + if not isinstance(line, object): + raise Exception('self.vals must be all snippets') + for x in line.lines: # x is a Snippet + tmp.append(x) + return '\n'.join(tmp) + + def fill_dependencies_for_snippets(self, idx_deps): + fill_dependencies_for_snippets(self.vals, idx_deps) + +# Insert missing define dependencies into index 'idx_deps' repeatedly +# until no unsatisfied dependencies exist. This is used to pull in +# the required DUK_F_xxx helper defines without pulling them all in. +# The resolution mechanism also ensures dependencies are pulled in the +# correct order, i.e. DUK_F_xxx helpers may depend on each other (as +# long as there are no circular dependencies). +# +# XXX: this can be simplified a lot +def fill_dependencies_for_snippets(snippets, idx_deps): + # graph[A] = [ B, ... ] <-> B, ... provide something A requires. + graph = {} + snlist = [] + resolved = [] # for printing only + + def add(sn): + if sn in snlist: + return # already present + snlist.append(sn) + + to_add = [] + + for k in sn.requires.keys(): + if assumed_provides.has_key(k): + continue + + found = False + for sn2 in snlist: + if sn2.provides.has_key(k): + if not graph.has_key(sn): + graph[sn] = [] + graph[sn].append(sn2) + found = True # at least one other node provides 'k' + + if not found: + #print('Resolving %r' % k) + resolved.append(k) + + # Find a header snippet which provides the missing define. + # Some DUK_F_xxx files provide multiple defines, so we don't + # necessarily know the snippet filename here. + + sn_req = None + for sn2 in helper_snippets: + if sn2.provides.has_key(k): + sn_req = sn2 + break + if sn_req is None: + print(repr(sn.lines)) + raise Exception('cannot resolve missing require: %r' % k) + + # Snippet may have further unresolved provides; add recursively + to_add.append(sn_req) + + if not graph.has_key(sn): + graph[sn] = [] + graph[sn].append(sn_req) + + for sn in to_add: + add(sn) + + # Add original snippets. This fills in the required nodes + # recursively. + for sn in snippets: + add(sn) + + # Figure out fill-ins by looking for snippets not in original + # list and without any unserialized dependent nodes. + handled = {} + for sn in snippets: + handled[sn] = True + keepgoing = True + while keepgoing: + keepgoing = False + for sn in snlist: + if handled.has_key(sn): + continue + + success = True + for dep in graph.get(sn, []): + if not handled.has_key(dep): + success = False + if success: + snippets.insert(idx_deps, sn) + idx_deps += 1 + snippets.insert(idx_deps, Snippet([ '' ])) + idx_deps += 1 + handled[sn] = True + keepgoing = True + break + + # XXX: detect and handle loops cleanly + for sn in snlist: + if handled.has_key(sn): + continue + print('UNHANDLED KEY') + print('PROVIDES: %r' % sn.provides) + print('REQUIRES: %r' % sn.requires) + print('\n'.join(sn.lines)) + +# print(repr(graph)) +# print(repr(snlist)) +# print('Resolved helper defines: %r' % resolved) +# print('Resolved %d helper defines' % len(resolved)) + +def serialize_snippet_list(snippets): + ret = [] + + emitted_provides = {} + for k in assumed_provides.keys(): + emitted_provides[k] = True + + for sn in snippets: + ret += sn.lines + for k in sn.provides.keys(): + emitted_provides[k] = True + for k in sn.requires.keys(): + if not emitted_provides.has_key(k): + # XXX: conditional warning, happens in some normal cases + #print('WARNING: define %r required, not provided so far' % k) + pass + + return '\n'.join(ret) + +def remove_duplicate_newlines(x): + ret = [] + empty = False + for line in x.split('\n'): + if line == '': + if empty: + pass + else: + ret.append(line) + empty = True + else: + empty = False + ret.append(line) + return '\n'.join(ret) + +def scan_use_defs(dirname): + global use_defs, use_defs_list + use_defs = {} + use_defs_list = [] + + for fn in os.listdir(dirname): + root, ext = os.path.splitext(fn) + if not root.startswith('DUK_USE_') or ext != '.yaml': + continue + with open(os.path.join(dirname, fn), 'rb') as f: + doc = yaml.load(f) + if doc.get('example', False): + continue + if doc.get('unimplemented', False): + print('WARNING: unimplemented: %s' % fn) + continue + dockeys = doc.keys() + for k in dockeys: + if not k in allowed_use_meta_keys: + print('WARNING: unknown key %s in metadata file %s' % (k, fn)) + for k in required_use_meta_keys: + if not k in dockeys: + print('WARNING: missing key %s in metadata file %s' % (k, fn)) + + use_defs[doc['define']] = doc + + keys = use_defs.keys() + keys.sort() + for k in keys: + use_defs_list.append(use_defs[k]) + +def scan_opt_defs(dirname): + global opt_defs, opt_defs_list + opt_defs = {} + opt_defs_list = [] + + for fn in os.listdir(dirname): + root, ext = os.path.splitext(fn) + if not root.startswith('DUK_OPT_') or ext != '.yaml': + continue + with open(os.path.join(dirname, fn), 'rb') as f: + doc = yaml.load(f) + if doc.get('example', False): + continue + if doc.get('unimplemented', False): + print('WARNING: unimplemented: %s' % fn) + continue + dockeys = doc.keys() + for k in dockeys: + if not k in allowed_opt_meta_keys: + print('WARNING: unknown key %s in metadata file %s' % (k, fn)) + for k in required_opt_meta_keys: + if not k in dockeys: + print('WARNING: missing key %s in metadata file %s' % (k, fn)) + + opt_defs[doc['define']] = doc + + keys = opt_defs.keys() + keys.sort() + for k in keys: + opt_defs_list.append(opt_defs[k]) + +def scan_use_tags(): + global use_tags, use_tags_list + use_tags = {} + + for doc in use_defs_list: + for tag in doc.get('tags', []): + use_tags[tag] = True + + use_tags_list = use_tags.keys() + use_tags_list.sort() + +def scan_tags_meta(filename): + global tags_meta + + with open(filename, 'rb') as f: + tags_meta = yaml.load(f) + +def scan_helper_snippets(dirname): # DUK_F_xxx snippets + global helper_snippets + helper_snippets = [] + + for fn in os.listdir(dirname): + if (fn[0:6] != 'DUK_F_'): + continue + #print('Autoscanning snippet: %s' % fn) + helper_snippets.append(Snippet.fromFile(os.path.join(dirname, fn))) + +def get_opt_defs(removed=True, deprecated=True, unused=True): + ret = [] + for doc in opt_defs_list: + # XXX: aware of target version + if removed == False and doc.get('removed', None) is not None: + continue + if deprecated == False and doc.get('deprecated', None) is not None: + continue + if unused == False and doc.get('unused', False) == True: + continue + ret.append(doc) + return ret + +def get_use_defs(removed=True, deprecated=True, unused=True): + ret = [] + for doc in use_defs_list: + # XXX: aware of target version + if removed == False and doc.get('removed', None) is not None: + continue + if deprecated == False and doc.get('deprecated', None) is not None: + continue + if unused == False and doc.get('unused', False) == True: + continue + ret.append(doc) + return ret + +def validate_platform_file(filename): + sn = Snippet.fromFile(filename) + + for req in platform_required_provides: + if req not in sn.provides: + raise Exception('Platform %s is missing %s' % (filename, req)) + + # DUK_SETJMP, DUK_LONGJMP, DUK_JMPBUF_TYPE are optional, fill-in + # provides if none defined. + +def validate_architecture_file(filename): + sn = Snippet.fromFile(filename) + + for req in architecture_required_provides: + if req not in sn.provides: + raise Exception('Architecture %s is missing %s' % (filename, req)) + + # Byte order and alignment defines are allowed to be missing, + # a fill-in will handle them. This is necessary because for + # some architecture byte order and/or alignment may vary between + # targets and may be software configurable. + + # XXX: require automatic detection to be signaled? + # e.g. define DUK_USE_ALIGN_BY -1 + # define DUK_USE_BYTE_ORDER -1 + +def validate_compiler_file(filename): + sn = Snippet.fromFile(filename) + + for req in compiler_required_provides: + if req not in sn.provides: + raise Exception('Compiler %s is missing %s' % (filename, req)) + +def get_tag_title(tag): + meta = tags_meta.get(tag, None) + if meta is None: + return tag + else: + return meta.get('title', tag) + +def get_tag_description(tag): + meta = tags_meta.get(tag, None) + if meta is None: + return None + else: + return meta.get('description', None) + +def get_tag_list_with_preferred_order(preferred): + tags = [] + + # Preferred tags first + for tag in preferred: + if tag not in tags: + tags.append(tag) + + # Remaining tags in alphabetic order + for tag in use_tags_list: + if tag not in tags: + tags.append(tag) + + #print('Effective tag order: %r' % tags) + return tags + +def rst_format(text): + # XXX: placeholder, need to decide on markup conventions for YAML files + ret = [] + for para in text.split('\n'): + if para == '': + continue + ret.append(para) + return '\n\n'.join(ret) + +def cint_encode(x): + if not isinstance(x, (int, long)): + raise Exception('invalid input: %r' % x) + + # XXX: unsigned constants? + if x > 0x7fffffff or x < -0x80000000: + return '%dLL' % x + elif x > 0x7fff or x < -0x8000: + return '%dL' % x + else: + return '%d' % x + +def cstr_encode(x): + if isinstance(x, unicode): + x = x.encode('utf-8') + if not isinstance(x, str): + raise Exception('invalid input: %r' % x) + + res = '"' + term = False + has_terms = False + for c in x: + if term: + # Avoid ambiguous hex escapes + res += '" "' + term = False + has_terms = True + o = ord(c) + if o < 0x20 or o > 0x7e or c in '"\\': + res += '\\x%02x' % o + term = True + else: + res += c + res += '"' + + if has_terms: + res = '(' + res + ')' + + return res + +# +# Autogeneration of option documentation +# + +# Shared helper to generate DUK_OPT_xxx and DUK_USE_xxx documentation. +# XXX: unfinished placeholder +def generate_option_documentation(opts, opt_list=None, rst_title=None, include_default=False): + ret = FileBuilder(use_cpp_warning=opts.use_cpp_warning) + + tags = get_tag_list_with_preferred_order(doc_tag_order) + + title = rst_title + ret.rst_heading(title, '=', doubled=True) + + handled = {} + + for tag in tags: + first = True + + for doc in opt_list: + if tag != doc['tags'][0]: # sort under primary tag + continue + dname = doc['define'] + desc = doc.get('description', None) + + if handled.has_key(dname): + raise Exception('define handled twice, should not happen: %r' % dname) + handled[dname] = True + + if first: # emit tag heading only if there are subsections + ret.empty() + ret.rst_heading(get_tag_title(tag), '=') + + tag_desc = get_tag_description(tag) + if tag_desc is not None: + ret.empty() + ret.line(rst_format(tag_desc)) + first = False + + ret.empty() + ret.rst_heading(dname, '-') + + if desc is not None: + ret.empty() + ret.line(rst_format(desc)) + + if include_default: + ret.empty() + ret.line('Default: ``' + str(doc['default']) + '``') # XXX: rst or other format + + for doc in opt_list: + dname = doc['define'] + if not handled.has_key(dname): + raise Exception('unhandled define (maybe missing from tags list?): %r' % dname) + + ret.empty() + return ret.join() + +def generate_feature_option_documentation(opts): + defs = get_opt_defs() + return generate_option_documentation(opts, opt_list=defs, rst_title='Duktape feature options', include_default=False) + +def generate_config_option_documentation(opts): + defs = get_use_defs() + return generate_option_documentation(opts, opt_list=defs, rst_title='Duktape config options', include_default=True) + +# +# Helpers for duk_config.h generation +# + +def get_forced_options(opts): + # Forced options, last occurrence wins (allows a base config file to be + # overridden by a more specific one). + forced_opts = {} + for val in opts.force_options_yaml: + doc = yaml.load(StringIO(val)) + for k in doc.keys(): + if use_defs.has_key(k): + pass # key is known + else: + print('WARNING: option override key %s not defined in metadata, ignoring' % k) + forced_opts[k] = doc[k] # shallow copy + + if len(forced_opts.keys()) > 0: + print('Overrides: %s' % json.dumps(forced_opts)) + + return forced_opts + +# Emit a default #define / #undef for an option based on +# a config option metadata node (parsed YAML doc). +def emit_default_from_config_meta(ret, doc, forced_opts, undef_done): + defname = doc['define'] + defval = forced_opts.get(defname, doc['default']) + + if defval == True: + ret.line('#define ' + defname) + elif defval == False: + if not undef_done: + ret.line('#undef ' + defname) + else: + # Default value is false, and caller has emitted + # an unconditional #undef, so don't emit a duplicate + pass + elif isinstance(defval, (int, long)): + # integer value + ret.line('#define ' + defname + ' ' + cint_encode(defval)) + elif isinstance(defval, (str, unicode)): + # verbatim value + ret.line('#define ' + defname + ' ' + defval) + elif isinstance(defval, dict): + if defval.has_key('verbatim'): + # verbatim text for the entire line + ret.line(defval['verbatim']) + elif defval.has_key('string'): + # C string value + ret.line('#define ' + defname + ' ' + cstr_encode(defval['string'])) + else: + raise Exception('unsupported value for option %s: %r' % (defname, defval)) + else: + raise Exception('unsupported value for option %s: %r' % (defname, defval)) + +# Add a header snippet for detecting presence of DUK_OPT_xxx feature +# options which will be removed in Duktape 2.x. +def add_legacy_feature_option_checks(opts, ret): + ret.chdr_block_heading('Checks for legacy feature options (DUK_OPT_xxx)') + ret.empty() + + defs = [] + for doc in get_opt_defs(): + if doc['define'] not in defs: + defs.append(doc['define']) + for doc in get_opt_defs(): + for dname in doc.get('related_feature_defines', []): + if dname not in defs: + defs.append(dname) + defs.sort() + + for optname in defs: + suggested = [] + for doc in get_use_defs(): + if optname in doc.get('related_feature_defines', []): + suggested.append(doc['define']) + ret.line('#if defined(%s)' % optname) + if len(suggested) > 0: + ret.cpp_warning_or_error('unsupported legacy feature option %s used, consider options: %s' % (optname, ', '.join(suggested)), opts.sanity_strict) + else: + ret.cpp_warning_or_error('unsupported legacy feature option %s used' % optname, opts.sanity_strict) + ret.line('#endif') + + ret.empty() + +# Add a header snippet for checking consistency of DUK_USE_xxx config +# options, e.g. inconsistent options, invalid option values. +def add_config_option_checks(opts, ret): + ret.chdr_block_heading('Checks for config option consistency (DUK_USE_xxx)') + ret.empty() + + defs = [] + for doc in get_use_defs(): + if doc['define'] not in defs: + defs.append(doc['define']) + defs.sort() + + for optname in defs: + doc = use_defs[optname] + dname = doc['define'] + + # XXX: more checks + + if doc.get('removed', None) is not None: + ret.line('#if defined(%s)' % dname) + ret.cpp_warning_or_error('unsupported config option used (option has been removed): %s' % dname, opts.sanity_strict) + ret.line('#endif') + elif doc.get('deprecated', None) is not None: + ret.line('#if defined(%s)' % dname) + ret.cpp_warning_or_error('unsupported config option used (option has been deprecated): %s' % dname, opts.sanity_strict) + ret.line('#endif') + + for req in doc.get('requires', []): + ret.line('#if defined(%s) && !defined(%s)' % (dname, req)) + ret.cpp_warning_or_error('config option %s requires option %s (which is missing)' % (dname, req), opts.sanity_strict) + ret.line('#endif') + + for req in doc.get('conflicts', []): + ret.line('#if defined(%s) && defined(%s)' % (dname, req)) + ret.cpp_warning_or_error('config option %s conflicts with option %s (which is also defined)' % (dname, req), opts.sanity_strict) + ret.line('#endif') + + ret.empty() + ret.snippet_relative('cpp_exception_sanity.h.in') + ret.empty() + +# Add a header snippet for providing a __OVERRIDE_DEFINES__ section. +def add_override_defines_section(opts, ret): + ret.empty() + ret.line('/*') + ret.line(' * You may add overriding #define/#undef directives below for') + ret.line(' * customization. You of course cannot un-#include or un-typedef') + ret.line(' * anything; these require direct changes above.') + ret.line(' */') + ret.empty() + ret.line('/* __OVERRIDE_DEFINES__ */') + ret.empty() + +# Add automatic DUK_OPT_XXX and DUK_OPT_NO_XXX handling for backwards +# compatibility with Duktape 1.2 and before. +def add_feature_option_handling(opts, ret, forced_opts, already_provided_keys): + ret.chdr_block_heading('Feature option handling') + + for doc in get_use_defs(removed=False, deprecated=False, unused=False): + # If a related feature option exists, it can be used to force + # enable/disable the target feature. If neither feature option + # (DUK_OPT_xxx or DUK_OPT_NO_xxx) is given, revert to default. + + config_define = doc['define'] + + feature_define = None + feature_no_define = None + inverted = False + if doc.has_key('feature_enables'): + feature_define = doc['feature_enables'] + elif doc.has_key('feature_disables'): + feature_define = doc['feature_disables'] + inverted = True + else: + pass + + if feature_define is not None: + feature_no_define = 'DUK_OPT_NO_' + feature_define[8:] + ret.line('#if defined(%s)' % feature_define) + if inverted: + ret.line('#undef %s' % config_define) + else: + ret.line('#define %s' % config_define) + ret.line('#elif defined(%s)' % feature_no_define) + if inverted: + ret.line('#define %s' % config_define) + else: + ret.line('#undef %s' % config_define) + ret.line('#else') + undef_done = False + + # For some options like DUK_OPT_PACKED_TVAL the default comes + # from platform definition. + if doc.get('feature_no_default', False): + print('Skip default for option %s' % config_define) + ret.line('/* Already provided above */') + elif already_provided_keys.has_key(config_define): + # This is a fallback in case config option metadata is wrong. + print('Skip default for option %s (already provided but not flagged in metadata!)' % config_define) + ret.line('/* Already provided above */') + else: + emit_default_from_config_meta(ret, doc, forced_opts, undef_done) + ret.line('#endif') + elif doc.has_key('feature_snippet'): + ret.lines(doc['feature_snippet']) + else: + pass + + ret.empty() + + ret.empty() + +# Development time helper: add DUK_ACTIVE which provides a runtime C string +# indicating what DUK_USE_xxx config options are active at run time. This +# is useful in genconfig development so that one can e.g. diff the active +# run time options of two headers. This is intended just for genconfig +# development and is not available in normal headers. +def add_duk_active_defines_macro(ret): + ret.chdr_block_heading('DUK_ACTIVE_DEFINES macro (development only)') + + idx = 0 + for doc in get_use_defs(): + defname = doc['define'] + + ret.line('#if defined(%s)' % defname) + ret.line('#define DUK_ACTIVE_DEF%d " %s"' % (idx, defname)) + ret.line('#else') + ret.line('#define DUK_ACTIVE_DEF%d ""' % idx) + ret.line('#endif') + + idx += 1 + + tmp = [] + for i in xrange(idx): + tmp.append('DUK_ACTIVE_DEF%d' % i) + + ret.line('#define DUK_ACTIVE_DEFINES ("Active: ["' + ' '.join(tmp) + ' " ]")') + +# +# duk_config.h generation +# + +# Generate a duk_config.h where platform, architecture, and compiler are +# all either autodetected or specified by user. +# +# Autodetection is based on a configured list of supported platforms, +# architectures, and compilers. For example, platforms.yaml defines the +# supported platforms and provides a helper define (DUK_F_xxx) to use for +# detecting that platform, and names the header snippet to provide the +# platform-specific definitions. Necessary dependencies (DUK_F_xxx) are +# automatically pulled in. +# +# Automatic "fill ins" are used for mandatory platform, architecture, and +# compiler defines which have a reasonable portable default. This reduces +# e.g. compiler-specific define count because there are a lot compiler +# macros which have a good default. +def generate_duk_config_header(opts, meta_dir): + ret = FileBuilder(base_dir=os.path.join(meta_dir, 'header-snippets'), \ + use_cpp_warning=opts.use_cpp_warning) + + forced_opts = get_forced_options(opts) + + platforms = None + with open(os.path.join(meta_dir, 'platforms.yaml'), 'rb') as f: + platforms = yaml.load(f) + architectures = None + with open(os.path.join(meta_dir, 'architectures.yaml'), 'rb') as f: + architectures = yaml.load(f) + compilers = None + with open(os.path.join(meta_dir, 'compilers.yaml'), 'rb') as f: + compilers = yaml.load(f) + + # XXX: indicate feature option support, sanity checks enabled, etc + # in general summary of options, perhaps genconfig command line? + + ret.line('/*') + ret.line(' * duk_config.h configuration header generated by genconfig.py.') + ret.line(' *') + ret.line(' * Git commit: %s' % opts.git_commit or 'n/a') + ret.line(' * Git describe: %s' % opts.git_describe or 'n/a') + ret.line(' * Git branch: %s' % opts.git_branch or 'n/a') + ret.line(' *') + if opts.platform is not None: + ret.line(' * Platform: ' + opts.platform) + else: + ret.line(' * Supported platforms:') + for platf in platforms['autodetect']: + ret.line(' * - %s' % platf.get('name', platf.get('check'))) + ret.line(' *') + if opts.architecture is not None: + ret.line(' * Architecture: ' + opts.architecture) + else: + ret.line(' * Supported architectures:') + for arch in architectures['autodetect']: + ret.line(' * - %s' % arch.get('name', arch.get('check'))) + ret.line(' *') + if opts.compiler is not None: + ret.line(' * Compiler: ' + opts.compiler) + else: + ret.line(' * Supported compilers:') + for comp in compilers['autodetect']: + ret.line(' * - %s' % comp.get('name', comp.get('check'))) + ret.line(' *') + ret.line(' */') + ret.empty() + ret.line('#if !defined(DUK_CONFIG_H_INCLUDED)') + ret.line('#define DUK_CONFIG_H_INCLUDED') + ret.empty() + + ret.chdr_block_heading('Intermediate helper defines') + + # DLL build affects visibility attributes on Windows but unfortunately + # cannot be detected automatically from preprocessor defines or such. + # DLL build status is hidden behind DUK_F_DLL_BUILD and there are two + # ways for that to be set: + # + # - Duktape 1.3 backwards compatible DUK_OPT_DLL_BUILD + # - Genconfig --dll option + ret.chdr_comment_line('DLL build detection') + ret.line('#if defined(DUK_OPT_DLL_BUILD)') + ret.line('#define DUK_F_DLL_BUILD') + ret.line('#elif defined(DUK_OPT_NO_DLL_BUILD)') + ret.line('#undef DUK_F_DLL_BUILD') + ret.line('#else') + if opts.dll: + ret.line('/* configured for DLL build */') + ret.line('#define DUK_F_DLL_BUILD') + else: + ret.line('/* not configured for DLL build */') + ret.line('#undef DUK_F_DLL_BUILD') + ret.line('#endif') + ret.empty() + + idx_deps = len(ret.vals) # position where to emit DUK_F_xxx dependencies + + # Feature selection, system include, Date provider + # Most #include statements are here + + if opts.platform is not None: + ret.chdr_block_heading('Platform: ' + opts.platform) + + ret.snippet_relative('platform_cppextras.h.in') + ret.empty() + + # XXX: better to lookup platforms metadata + include = 'platform_%s.h.in' % opts.platform + abs_fn = os.path.join(meta_dir, 'platforms', include) + validate_platform_file(abs_fn) + ret.snippet_absolute(abs_fn) + else: + ret.chdr_block_heading('Platform autodetection') + + ret.snippet_relative('platform_cppextras.h.in') + ret.empty() + + for idx, platf in enumerate(platforms['autodetect']): + check = platf.get('check', None) + include = platf['include'] + abs_fn = os.path.join(meta_dir, 'platforms', include) + + validate_platform_file(abs_fn) + + if idx == 0: + ret.line('#if defined(%s)' % check) + else: + if check is None: + ret.line('#else') + else: + ret.line('#elif defined(%s)' % check) + ret.line('/* --- %s --- */' % platf.get('name', '???')) + ret.snippet_absolute(abs_fn) + ret.line('#endif /* autodetect platform */') + + ret.empty() + ret.snippet_relative('platform_sharedincludes.h.in') + ret.empty() + + byteorder_provided_by_all = True # byteorder provided by all architecture files + alignment_provided_by_all = True # alignment provided by all architecture files + packedtval_provided_by_all = True # packed tval provided by all architecture files + + if opts.architecture is not None: + ret.chdr_block_heading('Architecture: ' + opts.architecture) + + # XXX: better to lookup architectures metadata + include = 'architecture_%s.h.in' % opts.architecture + abs_fn = os.path.join(meta_dir, 'architectures', include) + validate_architecture_file(abs_fn) + sn = ret.snippet_absolute(abs_fn) + if not sn.provides.get('DUK_USE_BYTEORDER', False): + byteorder_provided_by_all = False + if not sn.provides.get('DUK_USE_ALIGN_BY', False): + alignment_provided_by_all = False + if sn.provides.get('DUK_USE_PACKED_TVAL', False): + ret.line('#define DUK_F_PACKED_TVAL_PROVIDED') # signal to fillin + else: + packedtval_provided_by_all = False + else: + ret.chdr_block_heading('Architecture autodetection') + + for idx, arch in enumerate(architectures['autodetect']): + check = arch.get('check', None) + include = arch['include'] + abs_fn = os.path.join(meta_dir, 'architectures', include) + + validate_architecture_file(abs_fn) + + if idx == 0: + ret.line('#if defined(%s)' % check) + else: + if check is None: + ret.line('#else') + else: + ret.line('#elif defined(%s)' % check) + ret.line('/* --- %s --- */' % arch.get('name', '???')) + sn = ret.snippet_absolute(abs_fn) + if not sn.provides.get('DUK_USE_BYTEORDER', False): + byteorder_provided_by_all = False + if not sn.provides.get('DUK_USE_ALIGN_BY', False): + alignment_provided_by_all = False + if sn.provides.get('DUK_USE_PACKED_TVAL', False): + ret.line('#define DUK_F_PACKED_TVAL_PROVIDED') # signal to fillin + else: + packedtval_provided_by_all = False + ret.line('#endif /* autodetect architecture */') + + ret.empty() + + if opts.compiler is not None: + ret.chdr_block_heading('Compiler: ' + opts.compiler) + + # XXX: better to lookup compilers metadata + include = 'compiler_%s.h.in' % opts.compiler + abs_fn = os.path.join(meta_dir, 'compilers', include) + validate_compiler_file(abs_fn) + sn = ret.snippet_absolute(abs_fn) + else: + ret.chdr_block_heading('Compiler autodetection') + + for idx, comp in enumerate(compilers['autodetect']): + check = comp.get('check', None) + include = comp['include'] + abs_fn = os.path.join(meta_dir, 'compilers', include) + + validate_compiler_file(abs_fn) + + if idx == 0: + ret.line('#if defined(%s)' % check) + else: + if check is None: + ret.line('#else') + else: + ret.line('#elif defined(%s)' % check) + ret.line('/* --- %s --- */' % comp.get('name', '???')) + sn = ret.snippet_absolute(abs_fn) + ret.line('#endif /* autodetect compiler */') + + ret.empty() + + # DUK_F_UCLIBC is special because __UCLIBC__ is provided by an #include + # file, so the check must happen after platform includes. It'd be nice + # for this to be automatic (e.g. DUK_F_UCLIBC.h.in could indicate the + # dependency somehow). + + ret.snippet_absolute(os.path.join(meta_dir, 'helper-snippets', 'DUK_F_UCLIBC.h.in')) + ret.empty() + + # XXX: platform/compiler could provide types; if so, need some signaling + # defines like DUK_F_TYPEDEFS_DEFINED + + # Number types + if opts.c99_types_only: + ret.snippet_relative('types1.h.in') + ret.line('/* C99 types assumed */') + ret.snippet_relative('types_c99.h.in') + ret.empty() + else: + ret.snippet_relative('types1.h.in') + ret.line('#if defined(DUK_F_HAVE_INTTYPES)') + ret.line('/* C99 or compatible */') + ret.empty() + ret.snippet_relative('types_c99.h.in') + ret.empty() + ret.line('#else /* C99 types */') + ret.empty() + ret.snippet_relative('types_legacy.h.in') + ret.empty() + ret.line('#endif /* C99 types */') + ret.empty() + ret.snippet_relative('types2.h.in') + ret.empty() + ret.snippet_relative('64bitops.h.in') + ret.empty() + + # Platform, architecture, compiler fillins. These are after all + # detection so that e.g. DUK_SPRINTF() can be provided by platform + # or compiler before trying a fill-in. + + ret.chdr_block_heading('Fill-ins for platform, architecture, and compiler') + + ret.snippet_relative('platform_fillins.h.in') + ret.empty() + ret.snippet_relative('architecture_fillins.h.in') + if not byteorder_provided_by_all: + ret.empty() + ret.snippet_relative('byteorder_fillin.h.in') + if not alignment_provided_by_all: + ret.empty() + ret.snippet_relative('alignment_fillin.h.in') + ret.empty() + ret.snippet_relative('compiler_fillins.h.in') + ret.empty() + ret.snippet_relative('inline_workaround.h.in') + ret.empty() + if not packedtval_provided_by_all: + ret.empty() + ret.snippet_relative('packed_tval_fillin.h.in') + + # Object layout + ret.snippet_relative('object_layout.h.in') + ret.empty() + + # Detect and reject 'fast math' + ret.snippet_relative('reject_fast_math.h.in') + ret.empty() + + # Automatic DUK_OPT_xxx feature option handling + if opts.support_feature_options: + print('Autogenerating feature option (DUK_OPT_xxx) support') + tmp = Snippet(ret.join().split('\n')) + add_feature_option_handling(opts, ret, forced_opts, tmp.provides) + + # Emit forced options. If a corresponding option is already defined + # by a snippet above, #undef it first. + + tmp = Snippet(ret.join().split('\n')) + first_forced = True + for doc in get_use_defs(removed=not opts.omit_removed_config_options, + deprecated=not opts.omit_deprecated_config_options, + unused=not opts.omit_unused_config_options): + defname = doc['define'] + + if not forced_opts.has_key(defname): + continue + + if not doc.has_key('default'): + raise Exception('config option %s is missing default value' % defname) + + if first_forced: + ret.chdr_block_heading('Forced options') + first_forced = False + + undef_done = False + if tmp.provides.has_key(defname): + ret.line('#undef ' + defname) + undef_done = True + + emit_default_from_config_meta(ret, doc, forced_opts, undef_done) + + ret.empty() + + # If manually-edited snippets don't #define or #undef a certain + # config option, emit a default value here. This is useful to + # fill-in for new config options not covered by manual snippets + # (which is intentional). + + tmp = Snippet(ret.join().split('\n')) + need = {} + for doc in get_use_defs(removed=False): + need[doc['define']] = True + for k in tmp.provides.keys(): + if need.has_key(k): + del need[k] + need_keys = sorted(need.keys()) + + if len(need_keys) > 0: + ret.chdr_block_heading('Autogenerated defaults') + + for k in need_keys: + #print('config option %s not covered by manual snippets, emitting default automatically' % k) + emit_default_from_config_meta(ret, use_defs[k], {}, False) + + ret.empty() + + ret.snippet_relative('custom_header.h.in') + ret.empty() + + if len(opts.fixup_header_lines) > 0: + ret.chdr_block_heading('Fixups') + for line in opts.fixup_header_lines: + ret.line(line) + ret.empty() + + add_override_defines_section(opts, ret) + + # Date provider snippet is after custom header and overrides, so that + # the user may define e.g. DUK_USE_DATE_NOW_GETTIMEOFDAY in their + # custom header. + ret.snippet_relative('date_provider.h.in') + ret.empty() + + ret.fill_dependencies_for_snippets(idx_deps) + + if opts.emit_legacy_feature_check: + add_legacy_feature_option_checks(opts, ret) + if opts.emit_config_sanity_check: + add_config_option_checks(opts, ret) + if opts.add_active_defines_macro: + add_duk_active_defines_macro(ret) + + # Derived defines (DUK_USE_INTEGER_LE, etc) from DUK_USE_BYTEORDER. + # Duktape internals currently rely on the derived defines. This is + # after sanity checks because the derived defines are marked removed. + ret.snippet_relative('byteorder_derived.h.in') + ret.empty() + + ret.line('#endif /* DUK_CONFIG_H_INCLUDED */') + ret.empty() # for trailing newline + return remove_duplicate_newlines(ret.join()) + +# +# Main +# + +def main(): + # Forced options from multiple sources are gathered into a shared list + # so that the override order remains the same as on the command line. + force_options_yaml = [] + def add_force_option_yaml(option, opt, value, parser): + # XXX: check that YAML parses + force_options_yaml.append(value) + def add_force_option_file(option, opt, value, parser): + # XXX: check that YAML parses + with open(value, 'rb') as f: + force_options_yaml.append(f.read()) + def add_force_option_define(option, opt, value, parser): + tmp = value.split('=') + if len(tmp) == 1: + doc = { tmp[0]: True } + elif len(tmp) == 2: + doc = { tmp[0]: tmp[1] } + else: + raise Exception('invalid option value: %r' % value) + force_options_yaml.append(yaml.safe_dump(doc)) + def add_force_option_undefine(option, opt, value, parser): + tmp = value.split('=') + if len(tmp) == 1: + doc = { tmp[0]: False } + else: + raise Exception('invalid option value: %r' % value) + force_options_yaml.append(yaml.safe_dump(doc)) + + fixup_header_lines = [] + def add_fixup_header_line(option, opt, value, parser): + fixup_header_lines.append(value) + def add_fixup_header_file(option, opt, value, parser): + with open(value, 'rb') as f: + for line in f: + if line[-1] == '\n': + line = line[:-1] + fixup_header_lines.append(line) + + commands = [ + 'duk-config-header', + 'feature-documentation', + 'config-documentation' + ] + parser = optparse.OptionParser( + usage='Usage: %prog [options] COMMAND', + description='Generate a duk_config.h or config option documentation based on config metadata.', + epilog='COMMAND can be one of: ' + ', '.join(commands) + '.' + ) + + parser.add_option('--metadata', dest='metadata', default=None, help='metadata directory or metadata tar.gz file') + parser.add_option('--output', dest='output', default=None, help='output filename for C header or RST documentation file') + parser.add_option('--platform', dest='platform', default=None, help='platform (default is autodetect)') + parser.add_option('--compiler', dest='compiler', default=None, help='compiler (default is autodetect)') + parser.add_option('--architecture', dest='architecture', default=None, help='architecture (default is autodetec)') + parser.add_option('--c99-types-only', dest='c99_types_only', action='store_true', default=False, help='assume C99 types, no legacy type detection') + parser.add_option('--dll', dest='dll', action='store_true', default=False, help='dll build of Duktape, affects symbol visibility macros especially on Windows') + parser.add_option('--support-feature-options', dest='support_feature_options', action='store_true', default=False, help='support DUK_OPT_xxx feature options in duk_config.h') + parser.add_option('--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help='emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)') + parser.add_option('--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help='emit preprocessor checks for config option consistency (DUK_OPT_xxx)') + parser.add_option('--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers') + parser.add_option('--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers') + parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers') + parser.add_option('--add-active-defines-macro', dest='add_active_defines_macro', action='store_true', default=False, help='add DUK_ACTIVE_DEFINES macro, for development only') + parser.add_option('--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"') + parser.add_option('-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"') + parser.add_option('--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"') + parser.add_option('-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"') + parser.add_option('--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help='force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")') + parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides') + parser.add_option('--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help='C header snippet file(s) to be appended to generated header, useful for manual option fixups') + parser.add_option('--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help='C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")') + parser.add_option('--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues') + parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate') + parser.add_option('--git-commit', dest='git_commit', default=None, help='git commit hash to be included in header comments') + parser.add_option('--git-describe', dest='git_describe', default=None, help='git describe string to be included in header comments') + parser.add_option('--git-branch', dest='git_branch', default=None, help='git branch string to be included in header comments') + (opts, args) = parser.parse_args() + + meta_dir = opts.metadata + if opts.metadata is None: + if os.path.isfile(os.path.join('.', 'genconfig_metadata.tar.gz')): + opts.metadata = 'genconfig_metadata.tar.gz' + elif os.path.isdir(os.path.join('.', 'config-options')): + opts.metadata = '.' + + if opts.metadata is not None and os.path.isdir(opts.metadata): + meta_dir = opts.metadata + metadata_src_text = 'Using metadata directory: %r' % meta_dir + elif opts.metadata is not None and os.path.isfile(opts.metadata) and tarfile.is_tarfile(opts.metadata): + meta_dir = get_auto_delete_tempdir() + tar = tarfile.open(name=opts.metadata, mode='r:*') + tar.extractall(path=meta_dir) + metadata_src_text = 'Using metadata tar file %r, unpacked to directory: %r' % (opts.metadata, meta_dir) + else: + raise Exception('metadata source must be a directory or a tar.gz file') + + scan_helper_snippets(os.path.join(meta_dir, 'helper-snippets')) + scan_use_defs(os.path.join(meta_dir, 'config-options')) + scan_opt_defs(os.path.join(meta_dir, 'feature-options')) + scan_use_tags() + scan_tags_meta(os.path.join(meta_dir, 'tags.yaml')) + print('%s, scanned %d DUK_OPT_xxx, %d DUK_USE_XXX, %d helper snippets' % \ + (metadata_src_text, len(opt_defs.keys()), len(use_defs.keys()), len(helper_snippets))) + #print('Tags: %r' % use_tags_list) + + if len(args) == 0: + raise Exception('missing command') + cmd = args[0] + + if cmd == 'duk-config-header': + # Generate a duk_config.h header with platform, compiler, and + # architecture either autodetected (default) or specified by + # user. Support for autogenerated DUK_OPT_xxx flags is also + # selected by user. + result = generate_duk_config_header(opts, meta_dir) + with open(opts.output, 'wb') as f: + f.write(result) + elif cmd == 'feature-documentation': + result = generate_feature_option_documentation(opts) + with open(opts.output, 'wb') as f: + f.write(result) + elif cmd == 'config-documentation': + result = generate_config_option_documentation(opts) + with open(opts.output, 'wb') as f: + f.write(result) + else: + raise Exception('invalid command: %r' % cmd) + +if __name__ == '__main__': + main() diff --git a/tools/json2yaml.py b/tools/json2yaml.py new file mode 100644 index 00000000..b3a05270 --- /dev/null +++ b/tools/json2yaml.py @@ -0,0 +1,5 @@ +import os, sys, json, yaml + +if __name__ == '__main__': + # Use safe_dump() instead of dump() to avoid tags like "!!python/unicode" + print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False)) diff --git a/tools/merge_debug_meta.py b/tools/merge_debug_meta.py new file mode 100644 index 00000000..a5360923 --- /dev/null +++ b/tools/merge_debug_meta.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python2 +# +# Merge debugger YAML metadata files and output a merged JSON metadata file. +# + +import os, sys, json, yaml +import optparse + +if __name__ == '__main__': + parser = optparse.OptionParser() + parser.add_option('--output', dest='output', default=None, help='output JSON filename') + parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names') + parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands') + parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes') + parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes') + (opts, args) = parser.parse_args() + + res = {} + def merge(fn): + with open(fn, 'rb') as f: + doc = yaml.load(f) + for k in doc.keys(): + res[k] = doc[k] + + merge(opts.class_names) + merge(opts.debug_commands) + merge(opts.debug_errors) + merge(opts.opcodes) + + with open(opts.output, 'wb') as f: + f.write(json.dumps(res, indent=4) + '\n') + print('Wrote merged debugger metadata to ' + str(opts.output)) diff --git a/tools/prepare_sources.py b/tools/prepare_sources.py new file mode 100644 index 00000000..f8b674d7 --- /dev/null +++ b/tools/prepare_sources.py @@ -0,0 +1,854 @@ +#!/usr/bin/env python2 +# +# Config-and-prepare: create a duk_config.h and combined/separate sources +# for configuration options specified on the command line. +# + +import os +import sys +import re +import shutil +import glob +import optparse +import tarfile +import json +import yaml +import subprocess + +# Helpers + +def exec_get_stdout(cmd, input=None, default=None, print_stdout=False): + try: + proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = proc.communicate(input=input) + if print_stdout: + sys.stdout.write(ret[0]) + sys.stdout.flush() + if proc.returncode != 0: + sys.stdout.write(ret[1]) # print stderr on error + sys.stdout.flush() + if default is not None: + print('WARNING: command %r failed, return default' % cmd) + return default + raise Exception('command failed, return code %d: %r' % (proc.returncode, cmd)) + return ret[0] + except: + if default is not None: + print('WARNING: command %r failed, return default' % cmd) + return default + raise + +def exec_print_stdout(cmd, input=None): + ret = exec_get_stdout(cmd, input=input, print_stdout=True) + +def mkdir(path): + os.mkdir(path) + +def copy_file(src, dst): + with open(src, 'rb') as f_in: + with open(dst, 'wb') as f_out: + f_out.write(f_in.read()) + +def copy_files(filelist, srcdir, dstdir): + for i in filelist: + copy_file(os.path.join(srcdir, i), os.path.join(dstdir, i)) + +def copy_and_replace(src, dst, rules): + # Read and write separately to allow in-place replacement + keys = sorted(rules.keys()) + res = [] + with open(src, 'rb') as f_in: + for line in f_in: + for k in keys: + line = line.replace(k, rules[k]) + res.append(line) + with open(dst, 'wb') as f_out: + f_out.write(''.join(res)) + +def copy_and_cquote(src, dst): + with open(src, 'rb') as f_in: + with open(dst, 'wb') as f_out: + f_out.write('/*\n') + for line in f_in: + line = line.decode('utf-8') + f_out.write(' * ') + for c in line: + if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): + f_out.write(c.encode('ascii')) + else: + f_out.write('\\u%04x' % ord(c)) + f_out.write(' */\n') + +def read_file(src, strip_last_nl=False): + with open(src, 'rb') as f: + data = f.read() + if len(data) > 0 and data[-1] == '\n': + data = data[:-1] + return data + +def delete_matching_files(dirpath, cb): + for fn in os.listdir(dirpath): + if os.path.isfile(os.path.join(dirpath, fn)) and cb(fn): + #print('Deleting %r' % os.path.join(dirpath, fn)) + os.unlink(os.path.join(dirpath, fn)) + +def create_targz(dstfile, filelist): + # https://docs.python.org/2/library/tarfile.html#examples + + def _add(tf, fn): # recursive add + #print('Adding to tar: ' + fn) + if os.path.isdir(fn): + for i in sorted(os.listdir(fn)): + _add(tf, os.path.join(fn, i)) + elif os.path.isfile(fn): + tf.add(fn) + else: + raise Exception('invalid file: %r' % fn) + + with tarfile.open(dstfile, 'w:gz') as tf: + for fn in filelist: + _add(tf, fn) + +def cstring(x): + return '"' + x + '"' # good enough for now + +# DUK_VERSION is grepped from duk_api_public.h.in: it is needed for the +# public API and we want to avoid defining it in two places. +def get_duk_version(apiheader_filename): + r = re.compile(r'^#define\s+DUK_VERSION\s+(.*?)L?\s*$') + with open(apiheader_filename, 'rb') as f: + for line in f: + m = r.match(line) + if m is not None: + duk_version = int(m.group(1)) + duk_major = duk_version / 10000 + duk_minor = (duk_version % 10000) / 100 + duk_patch = duk_version % 100 + duk_version_formatted = '%d.%d.%d' % (duk_major, duk_minor, duk_patch) + return duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted + + raise Exception('cannot figure out duktape version') + +# Python module check and friendly errors + +def check_python_modules(): + # make_dist.py doesn't need yaml but other dist utils will; check for it and + # warn if it is missing. + failed = False + + def _warning(module, aptPackage, pipPackage): + sys.stderr.write('\n') + sys.stderr.write('*** NOTE: Could not "import %s" needed for dist. Install it using e.g.:\n' % module) + sys.stderr.write('\n') + sys.stderr.write(' # Linux\n') + sys.stderr.write(' $ sudo apt-get install %s\n' % aptPackage) + sys.stderr.write('\n') + sys.stderr.write(' # Windows\n') + sys.stderr.write(' > pip install %s\n' % pipPackage) + + try: + import yaml + except ImportError: + _warning('yaml', 'python-yaml', 'PyYAML') + failed = True + + if failed: + sys.stderr.write('\n') + raise Exception('Missing some required Python modules') + +check_python_modules() + +# Option parsing + +def main(): + parser = optparse.OptionParser() + + # Forced options from multiple sources are gathered into a shared list + # so that the override order remains the same as on the command line. + force_options_yaml = [] + def add_force_option_yaml(option, opt, value, parser): + # XXX: check that YAML parses + force_options_yaml.append(value) + def add_force_option_file(option, opt, value, parser): + # XXX: check that YAML parses + with open(value, 'rb') as f: + force_options_yaml.append(f.read()) + def add_force_option_define(option, opt, value, parser): + tmp = value.split('=') + if len(tmp) == 1: + doc = { tmp[0]: True } + elif len(tmp) == 2: + doc = { tmp[0]: tmp[1] } + else: + raise Exception('invalid option value: %r' % value) + force_options_yaml.append(yaml.safe_dump(doc)) + def add_force_option_undefine(option, opt, value, parser): + tmp = value.split('=') + if len(tmp) == 1: + doc = { tmp[0]: False } + else: + raise Exception('invalid option value: %r' % value) + force_options_yaml.append(yaml.safe_dump(doc)) + + fixup_header_lines = [] + def add_fixup_header_line(option, opt, value, parser): + fixup_header_lines.append(value) + def add_fixup_header_file(option, opt, value, parser): + with open(value, 'rb') as f: + for line in f: + if line[-1] == '\n': + line = line[:-1] + fixup_header_lines.append(line) + + # Options for config-and-prepare tool itself. + parser.add_option('--source-directory', dest='source_directory', default=None, help='Directory with raw input sources (src-input/)') + parser.add_option('--output-directory', dest='output_directory', default=None, help='Directory for output files, must already exist') + parser.add_option('--duk-build-meta', dest='duk_build_meta', default=None, help='duk_build_meta.json for git commit info etc') + parser.add_option('--git-commit', dest='git_commit', default=None, help='Force git commit hash') + parser.add_option('--git-describe', dest='git_describe', default=None, help='Force git describe') + parser.add_option('--git-branch', dest='git_branch', default=None, help='Force git branch name') + + # Options forwarded to genbuiltins.py. + parser.add_option('--rom-support', dest='rom_support', action='store_true', help='Add support for ROM strings/objects (increases duktape.c size considerably)') + parser.add_option('--rom-auto-lightfunc', dest='rom_auto_lightfunc', action='store_true', default=False, help='Convert ROM built-in function properties into lightfuncs automatically whenever possible') + parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', action='append', default=[], help='User strings and objects to add, YAML format (can be repeated for multiple overrides)') + + # Options forwarded to genconfig.py. + parser.add_option('--config-metadata', dest='config_metadata', default=None, help='metadata directory or metadata tar.gz file') + parser.add_option('--platform', dest='platform', default=None, help='platform (default is autodetect)') + parser.add_option('--compiler', dest='compiler', default=None, help='compiler (default is autodetect)') + parser.add_option('--architecture', dest='architecture', default=None, help='architecture (default is autodetec)') + parser.add_option('--c99-types-only', dest='c99_types_only', action='store_true', default=False, help='assume C99 types, no legacy type detection') + parser.add_option('--dll', dest='dll', action='store_true', default=False, help='dll build of Duktape, affects symbol visibility macros especially on Windows') + parser.add_option('--support-feature-options', dest='support_feature_options', action='store_true', default=False, help='support DUK_OPT_xxx feature options in duk_config.h') + parser.add_option('--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help='emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)') + parser.add_option('--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help='emit preprocessor checks for config option consistency (DUK_OPT_xxx)') + parser.add_option('--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers') + parser.add_option('--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers') + parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers') + parser.add_option('--add-active-defines-macro', dest='add_active_defines_macro', action='store_true', default=False, help='add DUK_ACTIVE_DEFINES macro, for development only') + parser.add_option('--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"') + parser.add_option('-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"') + parser.add_option('--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"') + parser.add_option('-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"') + parser.add_option('--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help='force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")') + parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides') + parser.add_option('--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help='C header snippet file(s) to be appended to generated header, useful for manual option fixups') + parser.add_option('--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help='C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")') + parser.add_option('--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues') + parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate') + + (opts, args) = parser.parse_args() + + assert(opts.source_directory) + srcdir = opts.source_directory + assert(opts.output_directory) + outdir = opts.output_directory + + # Figure out directories, git info, etc + + entry_pwd = os.getcwd() + + duk_build_meta = None + if opts.duk_build_meta is not None: + with open(opts.duk_build_meta, 'rb') as f: + duk_build_meta = json.loads(f.read()) + + duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted = \ + get_duk_version(os.path.join(srcdir, 'duk_api_public.h.in')) + + git_commit = None + git_branch = None + git_describe = None + + if duk_build_meta is not None: + git_commit = duk_build_meta['git_commit'] + git_branch = duk_build_meta['git_branch'] + git_describe = duk_build_meta['git_describe'] + else: + print('No --duk-build-meta, git commit information determined automatically') + + if opts.git_commit is not None: + git_commit = opts.git_commit + if opts.git_describe is not None: + git_describe = opts.git_describe + if opts.git_branch is not None: + git_branch = opts.git_branch + + if git_commit is None: + git_commit = exec_get_stdout([ 'git', 'rev-parse', 'HEAD' ], default='external').strip() + if git_describe is None: + git_describe = exec_get_stdout([ 'git', 'describe', '--always', '--dirty' ], default='external').strip() + if git_branch is None: + git_branch = exec_get_stdout([ 'git', 'rev-parse', '--abbrev-ref', 'HEAD' ], default='external').strip() + + git_commit = str(git_commit) + git_describe = str(git_describe) + git_branch = str(git_branch) + + git_commit_cstring = cstring(git_commit) + git_describe_cstring = cstring(git_describe) + git_branch_cstring = cstring(git_branch) + + print('Config-and-prepare for Duktape version %s, commit %s, describe %s, branch %s' % \ + (duk_version_formatted, git_commit, git_describe, git_branch)) + + # For now, create the src/, src-noline/, and src-separate/ structure into the + # output directory. Later on the output directory should get the specific + # variant output directly. + mkdir(os.path.join(outdir, 'src')) + mkdir(os.path.join(outdir, 'src-noline')) + mkdir(os.path.join(outdir, 'src-separate')) + + # Separate sources are mostly copied as is at present. + copy_files([ + 'duk_alloc_default.c', + 'duk_api_internal.h', + 'duk_api_stack.c', + 'duk_api_heap.c', + 'duk_api_buffer.c', + 'duk_api_call.c', + 'duk_api_codec.c', + 'duk_api_compile.c', + 'duk_api_bytecode.c', + 'duk_api_memory.c', + 'duk_api_object.c', + 'duk_api_string.c', + 'duk_api_time.c', + 'duk_api_debug.c', + 'duk_bi_array.c', + 'duk_bi_boolean.c', + 'duk_bi_buffer.c', + 'duk_bi_date.c', + 'duk_bi_date_unix.c', + 'duk_bi_date_windows.c', + 'duk_bi_duktape.c', + 'duk_bi_error.c', + 'duk_bi_function.c', + 'duk_bi_global.c', + 'duk_bi_json.c', + 'duk_bi_math.c', + 'duk_bi_number.c', + 'duk_bi_object.c', + 'duk_bi_pointer.c', + 'duk_bi_protos.h', + 'duk_bi_regexp.c', + 'duk_bi_string.c', + 'duk_bi_proxy.c', + 'duk_bi_thread.c', + 'duk_bi_thrower.c', + 'duk_debug_fixedbuffer.c', + 'duk_debug.h', + 'duk_debug_macros.c', + 'duk_debug_vsnprintf.c', + 'duk_error_augment.c', + 'duk_error.h', + 'duk_error_longjmp.c', + 'duk_error_macros.c', + 'duk_error_misc.c', + 'duk_error_throw.c', + 'duk_forwdecl.h', + 'duk_harray.h', + 'duk_hbuffer_alloc.c', + 'duk_hbuffer.h', + 'duk_hbuffer_ops.c', + 'duk_hcompfunc.h', + 'duk_heap_alloc.c', + 'duk_heap.h', + 'duk_heap_hashstring.c', + 'duk_heaphdr.h', + 'duk_heap_markandsweep.c', + 'duk_heap_memory.c', + 'duk_heap_misc.c', + 'duk_heap_refcount.c', + 'duk_heap_stringcache.c', + 'duk_heap_stringtable.c', + 'duk_hnatfunc.h', + 'duk_hobject_alloc.c', + 'duk_hobject_class.c', + 'duk_hobject_enum.c', + 'duk_hobject_finalizer.c', + 'duk_hobject.h', + 'duk_hobject_misc.c', + 'duk_hobject_pc2line.c', + 'duk_hobject_props.c', + 'duk_hstring.h', + 'duk_hstring_misc.c', + 'duk_hthread_alloc.c', + 'duk_hthread_builtins.c', + 'duk_hthread.h', + 'duk_hthread_misc.c', + 'duk_hthread_stacks.c', + 'duk_hbufobj.h', + 'duk_hbufobj_misc.c', + 'duk_debugger.c', + 'duk_debugger.h', + 'duk_internal.h', + 'duk_jmpbuf.h', + 'duk_exception.h', + 'duk_js_bytecode.h', + 'duk_js_call.c', + 'duk_js_compiler.c', + 'duk_js_compiler.h', + 'duk_js_executor.c', + 'duk_js.h', + 'duk_json.h', + 'duk_js_ops.c', + 'duk_js_var.c', + 'duk_lexer.c', + 'duk_lexer.h', + 'duk_numconv.c', + 'duk_numconv.h', + 'duk_regexp_compiler.c', + 'duk_regexp_executor.c', + 'duk_regexp.h', + 'duk_tval.c', + 'duk_tval.h', + 'duk_unicode.h', + 'duk_unicode_support.c', + 'duk_unicode_tables.c', + 'duk_util_bitdecoder.c', + 'duk_util_bitencoder.c', + 'duk_util.h', + 'duk_util_hashbytes.c', + 'duk_util_hashprime.c', + 'duk_util_misc.c', + 'duk_util_tinyrandom.c', + 'duk_util_bufwriter.c', + 'duk_selftest.c', + 'duk_selftest.h', + 'duk_strings.h', + 'duk_replacements.c', + 'duk_replacements.h' + ], srcdir, os.path.join(outdir, 'src-separate')) + + # Build temp versions of LICENSE.txt and AUTHORS.rst for embedding into + # autogenerated C/H files. + + # XXX: use a proper temp directory + + copy_and_cquote('LICENSE.txt', os.path.join(outdir, 'LICENSE.txt.tmp')) + copy_and_cquote('AUTHORS.rst', os.path.join(outdir, 'AUTHORS.rst.tmp')) + + # Create a duk_config.h. + # XXX: might be easier to invoke genconfig directly + def forward_genconfig_options(): + res = [] + res += [ '--metadata', os.path.abspath(opts.config_metadata) ] # rename option, --config-metadata => --metadata + if opts.platform is not None: + res += [ '--platform', opts.platform ] + if opts.compiler is not None: + res += [ '--compiler', opts.compiler ] + if opts.architecture is not None: + res += [ '--architecture', opts.architecture ] + if opts.c99_types_only: + res += [ '--c99-types-only' ] + if opts.dll: + res += [ '--dll' ] + if opts.support_feature_options: + res += [ '--support-feature-options' ] + if opts.emit_legacy_feature_check: + res += [ '--emit-legacy-feature-check' ] + if opts.emit_config_sanity_check: + res += [ '--emit-config-sanity-check' ] + if opts.omit_removed_config_options: + res += [ '--omit-removed-config-options' ] + if opts.omit_deprecated_config_options: + res += [ '--omit-deprecated-config-options' ] + if opts.omit_unused_config_options: + res += [ '--omit-unused-config-options' ] + if opts.add_active_defines_macro: + res += [ '--add-active-defines-macro' ] + for i in force_options_yaml: + res += [ '--option-yaml', i ] + for i in fixup_header_lines: + res += [ '--fixup-linu', i ] + if not opts.sanity_strict: + res += [ '--sanity-warning' ] + if opts.use_cpp_warning: + res += [ '--use-cpp-warning' ] + return res + + cmd = [ + sys.executable, os.path.join('tools', 'genconfig.py'), + '--output', os.path.join(outdir, 'duk_config.h.tmp'), + '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch + ] + cmd += forward_genconfig_options() + cmd += [ + 'duk-config-header' + ] + print(repr(cmd)) + exec_print_stdout(cmd) + + copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src', 'duk_config.h')) + copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-noline', 'duk_config.h')) + copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-separate', 'duk_config.h')) + + # Build duktape.h from parts, with some git-related replacements. + # The only difference between single and separate file duktape.h + # is the internal DUK_SINGLE_FILE define. + # + # Newline after 'i \': + # http://stackoverflow.com/questions/25631989/sed-insert-line-command-osx + copy_and_replace(os.path.join(srcdir, 'duktape.h.in'), os.path.join(outdir, 'src', 'duktape.h'), { + '@DUK_SINGLE_FILE@': '#define DUK_SINGLE_FILE', + '@LICENSE_TXT@': read_file(os.path.join(outdir, 'LICENSE.txt.tmp'), strip_last_nl=True), + '@AUTHORS_RST@': read_file(os.path.join(outdir, 'AUTHORS.rst.tmp'), strip_last_nl=True), + '@DUK_API_PUBLIC_H@': read_file(os.path.join(srcdir, 'duk_api_public.h.in'), strip_last_nl=True), + '@DUK_DBLUNION_H@': read_file(os.path.join(srcdir, 'duk_dblunion.h.in'), strip_last_nl=True), + '@DUK_VERSION_FORMATTED@': duk_version_formatted, + '@GIT_COMMIT@': git_commit, + '@GIT_COMMIT_CSTRING@': git_commit_cstring, + '@GIT_DESCRIBE@': git_describe, + '@GIT_DESCRIBE_CSTRING@': git_describe_cstring, + '@GIT_BRANCH@': git_branch, + '@GIT_BRANCH_CSTRING@': git_branch_cstring + }) + # keep the line so line numbers match between the two variant headers + copy_and_replace(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-separate', 'duktape.h'), { + '#define DUK_SINGLE_FILE': '#undef DUK_SINGLE_FILE' + }) + copy_file(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-noline', 'duktape.h')) + + # Autogenerated strings and built-in files + # + # There are currently no profile specific variants of strings/builtins, but + # this will probably change when functions are added/removed based on profile. + + # XXX: nuke this util, it's pointless + exec_print_stdout([ + sys.executable, + os.path.join('tools', 'genbuildparams.py'), + '--version=' + str(duk_version), + '--git-commit=' + git_commit, + '--git-describe=' + git_describe, + '--git-branch=' + git_branch, + '--out-json=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'), + '--out-header=' + os.path.join(outdir, 'src-separate', 'duk_buildparams.h.tmp') + ]) + + res = exec_get_stdout([ + sys.executable, + os.path.join('tools', 'scan_used_stridx_bidx.py') + ] + glob.glob(os.path.join(srcdir, '*.c')) \ + + glob.glob(os.path.join(srcdir, '*.h')) \ + + glob.glob(os.path.join(srcdir, '*.h.in')) + ) + with open(os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'), 'wb') as f: + f.write(res) + + gb_opts = [] + gb_opts.append('--ram-support') # enable by default + if opts.rom_support: + # ROM string/object support is not enabled by default because + # it increases the generated duktape.c considerably. + print('Enabling --rom-support for genbuiltins.py') + gb_opts.append('--rom-support') + if opts.rom_auto_lightfunc: + print('Enabling --rom-auto-lightfunc for genbuiltins.py') + gb_opts.append('--rom-auto-lightfunc') + for fn in opts.user_builtin_metadata: + print('Forwarding --user-builtin-metadata %s' % fn) + gb_opts.append('--user-builtin-metadata') + gb_opts.append(fn) + exec_print_stdout([ + sys.executable, + os.path.join('tools', 'genbuiltins.py'), + '--buildinfo=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'), + '--used-stridx-metadata=' + os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'), + '--strings-metadata=' + os.path.join(srcdir, 'strings.yaml'), + '--objects-metadata=' + os.path.join(srcdir, 'builtins.yaml'), + '--out-header=' + os.path.join(outdir, 'src-separate', 'duk_builtins.h'), + '--out-source=' + os.path.join(outdir, 'src-separate', 'duk_builtins.c'), + '--out-metadata-json=' + os.path.join(outdir, 'duk_build_meta.json') + ] + gb_opts) + + # Autogenerated Unicode files + # + # Note: not all of the generated headers are used. For instance, the + # match table for "WhiteSpace-Z" is not used, because a custom piece + # of code handles that particular match. + # + # UnicodeData.txt contains ranges expressed like this: + # + # 4E00;;Lo;0;L;;;;;N;;;;; + # 9FCB;;Lo;0;L;;;;;N;;;;; + # + # These are currently decoded into individual characters as a prestep. + # + # For IDPART: + # UnicodeCombiningMark -> categories Mn, Mc + # UnicodeDigit -> categories Nd + # UnicodeConnectorPunctuation -> categories Pc + + # Whitespace (unused now) + WHITESPACE_INCL='Zs' # USP = Any other Unicode space separator + WHITESPACE_EXCL='NONE' + + # Unicode letter (unused now) + LETTER_INCL='Lu,Ll,Lt,Lm,Lo' + LETTER_EXCL='NONE' + LETTER_NOA_INCL='Lu,Ll,Lt,Lm,Lo' + LETTER_NOA_EXCL='ASCII' + LETTER_NOABMP_INCL=LETTER_NOA_INCL + LETTER_NOABMP_EXCL='ASCII,NONBMP' + + # Identifier start + # E5 Section 7.6 + IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' + IDSTART_EXCL='NONE' + IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' + IDSTART_NOA_EXCL='ASCII' + IDSTART_NOABMP_INCL=IDSTART_NOA_INCL + IDSTART_NOABMP_EXCL='ASCII,NONBMP' + + # Identifier start - Letter: allows matching of (rarely needed) 'Letter' + # production space efficiently with the help of IdentifierStart. The + # 'Letter' production is only needed in case conversion of Greek final + # sigma. + IDSTART_MINUS_LETTER_INCL=IDSTART_NOA_INCL + IDSTART_MINUS_LETTER_EXCL='Lu,Ll,Lt,Lm,Lo' + IDSTART_MINUS_LETTER_NOA_INCL=IDSTART_NOA_INCL + IDSTART_MINUS_LETTER_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII' + IDSTART_MINUS_LETTER_NOABMP_INCL=IDSTART_NOA_INCL + IDSTART_MINUS_LETTER_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII,NONBMP' + + # Identifier start - Identifier part + # E5 Section 7.6: IdentifierPart, but remove IdentifierStart (already above) + IDPART_MINUS_IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' + IDPART_MINUS_IDSTART_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' + IDPART_MINUS_IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' + IDPART_MINUS_IDSTART_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII' + IDPART_MINUS_IDSTART_NOABMP_INCL=IDPART_MINUS_IDSTART_NOA_INCL + IDPART_MINUS_IDSTART_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII,NONBMP' + + print('Expand UnicodeData.txt ranges') + + exec_print_stdout([ + sys.executable, + os.path.join('tools', 'prepare_unicode_data.py'), + os.path.join(srcdir, 'UnicodeData.txt'), + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp') + ]) + + def extract_chars(incl, excl, suffix): + #print('- extract_chars: %s %s %s' % (incl, excl, suffix)) + res = exec_get_stdout([ + sys.executable, + os.path.join('tools', 'extract_chars.py'), + '--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), + '--include-categories=' + incl, + '--exclude-categories=' + excl, + '--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.c.tmp' % suffix), + '--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.h.tmp' % suffix), + '--table-name=' + 'duk_unicode_%s' % suffix + ]) + with open(os.path.join(outdir, 'src-separate', suffix + '.txt'), 'wb') as f: + f.write(res) + + def extract_caseconv(): + #print('- extract_caseconv case conversion') + res = exec_get_stdout([ + sys.executable, + os.path.join('tools', 'extract_caseconv.py'), + '--command=caseconv_bitpacked', + '--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), + '--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'), + '--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'), + '--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'), + '--table-name-lc=duk_unicode_caseconv_lc', + '--table-name-uc=duk_unicode_caseconv_uc' + ]) + with open(os.path.join(outdir, 'src-separate', 'caseconv.txt'), 'wb') as f: + f.write(res) + + #print('- extract_caseconv canon lookup') + res = exec_get_stdout([ + sys.executable, + os.path.join('tools', 'extract_caseconv.py'), + '--command=re_canon_lookup', + '--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), + '--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'), + '--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'), + '--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'), + '--table-name-re-canon-lookup=duk_unicode_re_canon_lookup' + ]) + with open(os.path.join(outdir, 'src-separate', 'caseconv_re_canon_lookup.txt'), 'wb') as f: + f.write(res) + + print('Create Unicode tables for codepoint classes') + extract_chars(WHITESPACE_INCL, WHITESPACE_EXCL, 'ws') + extract_chars(LETTER_INCL, LETTER_EXCL, 'let') + extract_chars(LETTER_NOA_INCL, LETTER_NOA_EXCL, 'let_noa') + extract_chars(LETTER_NOABMP_INCL, LETTER_NOABMP_EXCL, 'let_noabmp') + extract_chars(IDSTART_INCL, IDSTART_EXCL, 'ids') + extract_chars(IDSTART_NOA_INCL, IDSTART_NOA_EXCL, 'ids_noa') + extract_chars(IDSTART_NOABMP_INCL, IDSTART_NOABMP_EXCL, 'ids_noabmp') + extract_chars(IDSTART_MINUS_LETTER_INCL, IDSTART_MINUS_LETTER_EXCL, 'ids_m_let') + extract_chars(IDSTART_MINUS_LETTER_NOA_INCL, IDSTART_MINUS_LETTER_NOA_EXCL, 'ids_m_let_noa') + extract_chars(IDSTART_MINUS_LETTER_NOABMP_INCL, IDSTART_MINUS_LETTER_NOABMP_EXCL, 'ids_m_let_noabmp') + extract_chars(IDPART_MINUS_IDSTART_INCL, IDPART_MINUS_IDSTART_EXCL, 'idp_m_ids') + extract_chars(IDPART_MINUS_IDSTART_NOA_INCL, IDPART_MINUS_IDSTART_NOA_EXCL, 'idp_m_ids_noa') + extract_chars(IDPART_MINUS_IDSTART_NOABMP_INCL, IDPART_MINUS_IDSTART_NOABMP_EXCL, 'idp_m_ids_noabmp') + + print('Create Unicode tables for case conversion') + extract_caseconv() + + print('Combine sources and clean up') + + # Inject autogenerated files into source and header files so that they are + # usable (for all profiles and define cases) directly. + # + # The injection points use a standard C preprocessor #include syntax + # (earlier these were actual includes). + + copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode.h'), os.path.join(outdir, 'src-separate', 'duk_unicode.h'), { + '#include "duk_unicode_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_m_let_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_m_let_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_idp_m_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_idp_m_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_caseconv.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'), strip_last_nl=True), + '#include "duk_unicode_re_canon_lookup.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'), strip_last_nl=True) + }) + + copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), { + '#include "duk_unicode_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_m_let_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_ids_m_let_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_idp_m_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_idp_m_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_caseconv.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'), strip_last_nl=True), + '#include "duk_unicode_re_canon_lookup.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'), strip_last_nl=True) + }) + + # Clean up some temporary files + + delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[-4:] == '.tmp') + delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x in [ + 'ws.txt', + 'let.txt', 'let_noa.txt', 'let_noabmp.txt', + 'ids.txt', 'ids_noa.txt', 'ids_noabmp.txt', + 'ids_m_let.txt', 'ids_m_let_noa.txt', 'ids_m_let_noabmp.txt', + 'idp_m_ids.txt', 'idp_m_ids_noa.txt', 'idp_m_ids_noabmp.txt' + ]) + delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[0:8] == 'caseconv' and x[-4:] == '.txt') + + # Create a combined source file, duktape.c, into a separate combined source + # directory. This allows user to just include "duktape.c", "duktape.h", and + # "duk_config.h" into a project and maximizes inlining and size optimization + # opportunities even with older compilers. Because some projects include + # these files into their repository, the result should be deterministic and + # diffable. Also, it must retain __FILE__/__LINE__ behavior through + # preprocessor directives. Whitespace and comments can be stripped as long + # as the other requirements are met. For some users it's preferable *not* + # to use #line directives in the combined source, so a separate variant is + # created for that, see: https://github.com/svaarala/duktape/pull/363. + + def create_source_prologue(license_file, authors_file): + res = [] + + # Because duktape.c/duktape.h/duk_config.h are often distributed or + # included in project sources as is, add a license reminder and + # Duktape version information to the duktape.c header (duktape.h + # already contains them). + + duk_major = duk_version / 10000 + duk_minor = duk_version / 100 % 100 + duk_patch = duk_version % 100 + res.append('/*') + res.append(' * Single source autogenerated distributable for Duktape %d.%d.%d.' % (duk_major, duk_minor, duk_patch)) + res.append(' *') + res.append(' * Git commit %s (%s).' % (git_commit, git_describe)) + res.append(' * Git branch %s.' % git_branch) + res.append(' *') + res.append(' * See Duktape AUTHORS.rst and LICENSE.txt for copyright and') + res.append(' * licensing information.') + res.append(' */') + res.append('') + + # Add LICENSE.txt and AUTHORS.rst to combined source so that they're automatically + # included and are up-to-date. + + res.append('/* LICENSE.txt */') + with open(license_file, 'rb') as f: + for line in f: + res.append(line.strip()) + res.append('') + res.append('/* AUTHORS.rst */') + with open(authors_file, 'rb') as f: + for line in f: + res.append(line.strip()) + + return '\n'.join(res) + '\n' + + def select_combined_sources(): + # These files must appear before the alphabetically sorted + # ones so that static variables get defined before they're + # used. We can't forward declare them because that would + # cause C++ issues (see GH-63). When changing, verify by + # compiling with g++. + handpick = [ + 'duk_replacements.c', + 'duk_debug_macros.c', + 'duk_builtins.c', + 'duk_error_macros.c', + 'duk_unicode_support.c', + 'duk_util_misc.c', + 'duk_util_hashprime.c', + 'duk_hobject_class.c' + ] + + files = [] + for fn in handpick: + files.append(fn) + + for fn in sorted(os.listdir(os.path.join(outdir, 'src-separate'))): + f_ext = os.path.splitext(fn)[1] + if f_ext not in [ '.c' ]: + continue + if fn in files: + continue + files.append(fn) + + res = map(lambda x: os.path.join(outdir, 'src-separate', x), files) + #print(repr(files)) + #print(repr(res)) + return res + + with open(os.path.join(outdir, 'prologue.tmp'), 'wb') as f: + f.write(create_source_prologue(os.path.join(outdir, 'LICENSE.txt.tmp'), os.path.join(outdir, 'AUTHORS.rst.tmp'))) + + exec_print_stdout([ + sys.executable, + os.path.join('tools', 'combine_src.py'), + '--include-path', os.path.join(outdir, 'src-separate'), + '--include-exclude', 'duk_config.h', # don't inline + '--include-exclude', 'duktape.h', # don't inline + '--prologue', os.path.join(outdir, 'prologue.tmp'), + '--output-source', os.path.join(outdir, 'src', 'duktape.c'), + '--output-metadata', os.path.join(outdir, 'src', 'metadata.json'), + '--line-directives' + ] + select_combined_sources()) + + exec_print_stdout([ + sys.executable, + os.path.join('tools', 'combine_src.py'), + '--include-path', os.path.join(outdir, 'src-separate'), + '--include-exclude', 'duk_config.h', # don't inline + '--include-exclude', 'duktape.h', # don't inline + '--prologue', os.path.join(outdir, 'prologue.tmp'), + '--output-source', os.path.join(outdir, 'src-noline', 'duktape.c'), + '--output-metadata', os.path.join(outdir, 'src-noline', 'metadata.json') + ] + select_combined_sources()) + + # Clean up remaining temp files + delete_matching_files(outdir, lambda x: x[-4:] == '.tmp') + + print('Config-and-prepare finished successfully') + +if __name__ == '__main__': + main() diff --git a/tools/prepare_unicode_data.py b/tools/prepare_unicode_data.py new file mode 100644 index 00000000..2b310a61 --- /dev/null +++ b/tools/prepare_unicode_data.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python2 +# +# UnicodeData.txt may contain ranges in addition to individual characters. +# Unpack the ranges into individual characters for the other scripts to use. +# + +import os +import sys + +def main(): + f_in = open(sys.argv[1], 'rb') + f_out = open(sys.argv[2], 'wb') + while True: + line = f_in.readline() + if line == '' or line == '\n': + break + parts = line.split(';') # keep newline + if parts[1].endswith('First>'): + line2 = f_in.readline() + parts2 = line2.split(';') + if not parts2[1].endswith('Last>'): + raise Exception('cannot parse range') + cp1 = long(parts[0], 16) + cp2 = long(parts2[0], 16) + + suffix = ';'.join(parts[1:]) + for i in xrange(cp1, cp2 + 1): # inclusive + f_out.write('%04X;%s' % (i, suffix)) + else: + f_out.write(line) + + f_in.close() + f_out.flush() + f_out.close() + +if __name__ == '__main__': + main() diff --git a/tools/resolve_combined_lineno.py b/tools/resolve_combined_lineno.py new file mode 100644 index 00000000..5e526558 --- /dev/null +++ b/tools/resolve_combined_lineno.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python2 +# +# Resolve a line number in the combined source into an uncombined file/line +# using a dist/src/metadata.json file. +# +# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345 +# + +import os +import sys +import json + +def main(): + with open(sys.argv[1], 'rb') as f: + metadata = json.loads(f.read()) + lineno = int(sys.argv[2]) + + for e in reversed(metadata['line_map']): + if lineno >= e['combined_line']: + orig_lineno = e['original_line'] + (lineno - e['combined_line']) + print('%s:%d -> %s:%d' % ('duktape.c', lineno, + e['original_file'], orig_lineno)) + break + +if __name__ == '__main__': + main() diff --git a/tools/scan_strings.py b/tools/scan_strings.py new file mode 100644 index 00000000..2765ac4f --- /dev/null +++ b/tools/scan_strings.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python2 +# +# Scan potential external strings from Ecmascript and C files. +# +# Very simplistic example with a lot of limitations: +# +# - Doesn't handle multiple variables in a variable declaration +# +# - Only extracts strings from C files, these may correspond to +# Duktape/C bindings (but in many cases don't) +# + +import os +import sys +import re +import json + +strmap = {} + +# Ecmascript function declaration +re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE) + +# Ecmascript variable declaration +# XXX: doesn't handle multiple variables +re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE) + +# Ecmascript variable assignment +re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE) + +# Ecmascript dotted property reference (also matches numbers like +# '4.0', which are separately rejected below) +re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE) +re_digits = re.compile(r'^\d+$', re.UNICODE) + +# Ecmascript or C string literal +re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE) +re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE) + +def strDecode(x): + # Need to decode hex, unicode, and other escapes. Python syntax + # is close enough to C and Ecmascript so use eval for now. + + try: + return eval('u' + x) # interpret as unicode string + except: + sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n') + return None + +def scan(f, fn): + global strmap + + # Scan rules depend on file type + if fn[-2:] == '.c': + use_funcname = False + use_vardecl = False + use_varassign = False + use_propref = False + use_strlit_dquot = True + use_strlit_squot = False + else: + use_funcname = True + use_vardecl = True + use_varassign = True + use_propref = True + use_strlit_dquot = True + use_strlit_squot = True + + for line in f: + # Assume input data is UTF-8 + line = line.decode('utf-8') + + if use_funcname: + for m in re_funcname.finditer(line): + strmap[m.group(1)] = True + + if use_vardecl: + for m in re_vardecl.finditer(line): + strmap[m.group(1)] = True + + if use_varassign: + for m in re_varassign.finditer(line): + strmap[m.group(1)] = True + + if use_propref: + for m in re_propref.finditer(line): + parts = m.group(1).split('.') + if re_digits.match(parts[0]) is not None: + # Probably a number ('4.0' or such) + pass + else: + for part in parts: + strmap[part] = True + + if use_strlit_dquot: + for m in re_strlit_dquot.finditer(line): + s = strDecode(m.group(1)) + if s is not None: + strmap[s] = True + + if use_strlit_squot: + for m in re_strlit_squot.finditer(line): + s = strDecode(m.group(1)) + if s is not None: + strmap[s] = True + +def main(): + for fn in sys.argv[1:]: + f = open(fn, 'rb') + scan(f, fn) + f.close() + + strs = [] + strs_base64 = [] + doc = { + # Strings as Unicode strings + 'scanned_strings': strs, + + # Strings as base64-encoded UTF-8 data, which should be ready + # to be used in C code (Duktape internal string representation + # is UTF-8) + 'scanned_strings_base64': strs_base64 + } + k = strmap.keys() + k.sort() + for s in k: + strs.append(s) + t = s.encode('utf-8').encode('base64') + if len(t) > 0 and t[-1] == '\n': + t = t[0:-1] + strs_base64.append(t) + + print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True)) + +if __name__ == '__main__': + main() diff --git a/tools/scan_used_stridx_bidx.py b/tools/scan_used_stridx_bidx.py new file mode 100644 index 00000000..e9631e8d --- /dev/null +++ b/tools/scan_used_stridx_bidx.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2 +# +# Scan Duktape code base for references to built-in strings and built-in +# objects, i.e. for: +# +# - Strings which will need DUK_STRIDX_xxx constants and a place in the +# thr->strs[] array. +# +# - Objects which will need DUK_BIDX_xxx constants and a place in the +# thr->builtins[] array. +# + +import os +import sys +import re +import json + +re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE) +re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE) +re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE) +re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE) + +def main(): + str_defs = {} + obj_defs = {} + + for fn in sys.argv[1:]: + with open(fn, 'rb') as f: + d = f.read() + for m in re.finditer(re_str_stridx, d): + str_defs[m.group(1)] = True + for m in re.finditer(re_str_heap, d): + str_defs[m.group(1)] = True + for m in re.finditer(re_str_hthread, d): + str_defs[m.group(1)] = True + for m in re.finditer(re_obj_bidx, d): + obj_defs[m.group(1)] = True + + str_used = [] + for k in sorted(str_defs.keys()): + str_used.append('DUK_STRIDX_' + k) + + obj_used = [] + for k in sorted(obj_defs.keys()): + obj_used.append('DUK_BIDX_' + k) + + doc = { + 'used_stridx_defines': str_used, + 'used_bidx_defines': obj_used, + 'count_used_stridx_defines': len(str_used), + 'count_used_bidx_defines': len(obj_used) + } + print(json.dumps(doc, indent=4)) + +if __name__ == '__main__': + main() diff --git a/util/yaml2json.py b/tools/yaml2json.py similarity index 55% rename from util/yaml2json.py rename to tools/yaml2json.py index d6bd3f00..731b5390 100644 --- a/util/yaml2json.py +++ b/tools/yaml2json.py @@ -1,4 +1,4 @@ import os, sys, json, yaml if __name__ == '__main__': - print(json.dumps(yaml.load(sys.stdin))) + print(json.dumps(yaml.load(sys.stdin))) diff --git a/util/autofix_debuglog_calls.py b/util/autofix_debuglog_calls.py index 7278aa04..02e48f33 100644 --- a/util/autofix_debuglog_calls.py +++ b/util/autofix_debuglog_calls.py @@ -26,47 +26,47 @@ import re re_callsite = re.compile(r'^\s*(DUK_D+PRINT).*?;$') wrappers = { - 'DUK_DPRINT': 'DUK_D', - 'DUK_DDPRINT': 'DUK_DD', - 'DUK_DDDPRINT': 'DUK_DDD' + 'DUK_DPRINT': 'DUK_D', + 'DUK_DDPRINT': 'DUK_DD', + 'DUK_DDDPRINT': 'DUK_DDD' } warnings = [] def process(filename): - f = open(filename, 'rb') - output = [] + f = open(filename, 'rb') + output = [] - linenumber = 0 - fixes = 0 - for line in f: - linenumber += 1 - if 'DPRINT' not in line: - output.append(line) - continue - m = re_callsite.match(line) - if m is None: - output.append(line) - continue - log_macro = m.group(1) - log_wrapper = wrappers[log_macro] - line = line.replace(log_macro, log_wrapper + '(' + log_macro) # DUK_DPRINT( -> DUK_D(DUK_DPRINT( - line = line.replace(');', '));') # ...); -> ...)); - output.append(line) - fixes += 1 + linenumber = 0 + fixes = 0 + for line in f: + linenumber += 1 + if 'DPRINT' not in line: + output.append(line) + continue + m = re_callsite.match(line) + if m is None: + output.append(line) + continue + log_macro = m.group(1) + log_wrapper = wrappers[log_macro] + line = line.replace(log_macro, log_wrapper + '(' + log_macro) # DUK_DPRINT( -> DUK_D(DUK_DPRINT( + line = line.replace(');', '));') # ...); -> ...)); + output.append(line) + fixes += 1 - f.close() + f.close() - if fixes > 0: - print '%s: %d fixes' % (filename, fixes) + if fixes > 0: + print '%s: %d fixes' % (filename, fixes) - f = open(filename, 'wb') - f.write(''.join(output)) - f.close() + f = open(filename, 'wb') + f.write(''.join(output)) + f.close() def main(): - for filename in sys.argv[1:]: - process(filename) + for filename in sys.argv[1:]: + process(filename) if __name__ == '__main__': - main() + main() diff --git a/util/check_code_policy.py b/util/check_code_policy.py index f86cc965..97161e15 100644 --- a/util/check_code_policy.py +++ b/util/check_code_policy.py @@ -13,17 +13,16 @@ import re import optparse class Problem: - filename = None - linenumber = None - line = None - reason = None - - def __init__(self, filename, linenumber, line, reason): - self.filename = filename - self.linenumber = linenumber - self.line = line - self.reason = reason + filename = None + linenumber = None + line = None + reason = None + def __init__(self, filename, linenumber, line, reason): + self.filename = filename + self.linenumber = linenumber + self.line = line + self.reason = reason re_debuglog_callsite = re.compile(r'^.*?(DUK_D+PRINT).*?$') re_trailing_ws = re.compile(r'^.*?\s$') @@ -34,115 +33,117 @@ re_nonascii = re.compile(r'^.*?[\x80-\xff].*?$') re_func_decl_or_def = re.compile(r'^(\w+)\s+(?:\w+\s+)*(\w+)\(.*?.*?$') # may not finish on same line re_cpp_comment = re.compile(r'^.*?//.*?$') +fixmeString = 'FIX' + 'ME' # avoid triggering a code policy check warning :) + # These identifiers are wrapped in duk_config.h, and should only be used # through the wrappers elsewhere. rejected_plain_identifiers_list = [ - # math classification - 'fpclassify', - 'signbit', - 'isfinite', - 'isnan', - 'isinf', - 'FP_NAN', - 'FP_INFINITE', - 'FP_ZERO', - 'FP_SUBNORMAL', - 'FP_NORMAL', - - # math functions - 'fabs', - 'fmin', - 'fmax', - 'floor', - 'ceil', - 'fmod', - 'pow', - 'acos', - 'asin', - 'atan', - 'atan2', - 'sin', - 'cos', - 'tan', - 'exp', - 'log', - 'sqrt', - - # memory functions - 'malloc', - 'realloc', - 'calloc', - 'free', - 'memcpy', - 'memmove', - 'memcmp', - 'memset', - - # string functions - 'strlen', - 'strcmp', - 'strncmp', - 'printf', - 'fprintf', - 'sprintf', - '_snprintf', - 'snprintf', - 'vsprintf', - '_vsnprintf', - 'vsnprintf', - 'sscanf', - 'vsscanf', - - # streams - 'stdout', - 'stderr', - 'stdin', - - # file ops - 'fopen', - 'fclose', - 'fread', - 'fwrite', - 'fseek', - 'ftell', - 'fflush', - 'fputc', - - # misc - 'abort', - 'exit', - 'setjmp', - 'longjmp', - - # variable/argument names which have shadowing issues with platform headers - # see e.g. https://github.com/svaarala/duktape/pull/810 - 'index', - 'rindex', - - # for consistency avoid these too, use obj_idx rather than obj_index, etc - 'obj_index', - 'from_index', - 'to_index', - 'arr_index', - 'uindex', + # math classification + 'fpclassify', + 'signbit', + 'isfinite', + 'isnan', + 'isinf', + 'FP_NAN', + 'FP_INFINITE', + 'FP_ZERO', + 'FP_SUBNORMAL', + 'FP_NORMAL', + + # math functions + 'fabs', + 'fmin', + 'fmax', + 'floor', + 'ceil', + 'fmod', + 'pow', + 'acos', + 'asin', + 'atan', + 'atan2', + 'sin', + 'cos', + 'tan', + 'exp', + 'log', + 'sqrt', + + # memory functions + 'malloc', + 'realloc', + 'calloc', + 'free', + 'memcpy', + 'memmove', + 'memcmp', + 'memset', + + # string functions + 'strlen', + 'strcmp', + 'strncmp', + 'printf', + 'fprintf', + 'sprintf', + '_snprintf', + 'snprintf', + 'vsprintf', + '_vsnprintf', + 'vsnprintf', + 'sscanf', + 'vsscanf', + + # streams + 'stdout', + 'stderr', + 'stdin', + + # file ops + 'fopen', + 'fclose', + 'fread', + 'fwrite', + 'fseek', + 'ftell', + 'fflush', + 'fputc', + + # misc + 'abort', + 'exit', + 'setjmp', + 'longjmp', + + # variable/argument names which have shadowing issues with platform headers + # see e.g. https://github.com/svaarala/duktape/pull/810 + 'index', + 'rindex', + + # for consistency avoid these too, use obj_idx rather than obj_index, etc + 'obj_index', + 'from_index', + 'to_index', + 'arr_index', + 'uindex', ] rejected_plain_identifiers = {} for id in rejected_plain_identifiers_list: - rejected_plain_identifiers[id] = True + rejected_plain_identifiers[id] = True debuglog_wrappers = { - 'DUK_DPRINT': 'DUK_D', - 'DUK_DDPRINT': 'DUK_DD', - 'DUK_DDDPRINT': 'DUK_DDD' + 'DUK_DPRINT': 'DUK_D', + 'DUK_DDPRINT': 'DUK_DD', + 'DUK_DDDPRINT': 'DUK_DDD' } allowed_visibility_macros = [ - 'DUK_EXTERNAL_DECL', - 'DUK_EXTERNAL', - 'DUK_INTERNAL_DECL', - 'DUK_INTERNAL', - 'DUK_LOCAL_DECL', - 'DUK_LOCAL' + 'DUK_EXTERNAL_DECL', + 'DUK_EXTERNAL', + 'DUK_INTERNAL_DECL', + 'DUK_INTERNAL', + 'DUK_LOCAL_DECL', + 'DUK_LOCAL' ] problems = [] @@ -155,305 +156,318 @@ re_repl_expect_strings = re.compile(r'/\*===.*?===*?\*/', re.DOTALL) re_not_newline = re.compile(r'[^\n]+', re.DOTALL) def repl_c(m): - tmp = re.sub(re_not_newline, '', m.group(0)) - if tmp == '': - tmp = ' ' # avoid /**/ - return '/*' + tmp + '*/' + tmp = re.sub(re_not_newline, '', m.group(0)) + if tmp == '': + tmp = ' ' # avoid /**/ + return '/*' + tmp + '*/' def repl_cpp(m): - return '// removed\n' + return '// removed\n' def repl_dquot(m): - return '"' + ('.' * (len(m.group(0)) - 2)) + '"' + return '"' + ('.' * (len(m.group(0)) - 2)) + '"' def repl_squot(m): - return "'" + ('.' * (len(m.group(0)) - 2)) + "'" + return "'" + ('.' * (len(m.group(0)) - 2)) + "'" def removeLiterals(data): - data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) - data = re.sub(re_repl_string_literals_squot, repl_squot, data) - return data + data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) + data = re.sub(re_repl_string_literals_squot, repl_squot, data) + return data def removeCCommentsAndLiterals(data): - data = re.sub(re_repl_c_comments, repl_c, data) - data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) - data = re.sub(re_repl_string_literals_squot, repl_squot, data) - return data + data = re.sub(re_repl_c_comments, repl_c, data) + data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) + data = re.sub(re_repl_string_literals_squot, repl_squot, data) + return data def removeAnyCommentsAndLiterals(data): - data = re.sub(re_repl_c_comments, repl_c, data) - data = re.sub(re_repl_cpp_comments, repl_cpp, data) - data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) - data = re.sub(re_repl_string_literals_squot, repl_squot, data) - return data + data = re.sub(re_repl_c_comments, repl_c, data) + data = re.sub(re_repl_cpp_comments, repl_cpp, data) + data = re.sub(re_repl_string_literals_dquot, repl_dquot, data) + data = re.sub(re_repl_string_literals_squot, repl_squot, data) + return data def removeExpectStrings(data): - def repl(m): - tmp = re.sub(re_not_newline, '', m.group(0)) - if tmp == '': - tmp = ' ' # avoid /*======*/ - return '/*===' + tmp + '===*/' + def repl(m): + tmp = re.sub(re_not_newline, '', m.group(0)) + if tmp == '': + tmp = ' ' # avoid /*======*/ + return '/*===' + tmp + '===*/' - data = re.sub(re_repl_expect_strings, repl, data) - return data + data = re.sub(re_repl_expect_strings, repl, data) + return data def checkDebugLogCalls(lines, idx, filename): - # Allowed debug log forms: - # - # DUK_D(DUK_DPRINT(...)) - # DUK_DD(DUK_DDPRINT(...)) - # DUK_DDD(DUK_DDDPRINT(...)) - # - # The calls may span multiple lines, but the wrapper (DUK_D) - # and the log macro (DUK_DPRINT) must be on the same line. - - line = lines[idx] - if 'DPRINT' not in line: - return - - m = re_debuglog_callsite.match(line) - if m is None: - return - - log_macro = m.group(1) - log_wrapper = debuglog_wrappers[log_macro] - if log_wrapper + '(' in line: - return - - # exclude '#define DUK_DPRINT...' macros in duk_debug.h - if len(line) >= 1 and line[0] == '#': - return - - # exclude a few comment lines in duk_debug.h - if len(line) >= 3 and line[0:3] == ' * ': - return - - raise Exception('invalid debug log call form') + # Allowed debug log forms: + # + # DUK_D(DUK_DPRINT(...)) + # DUK_DD(DUK_DDPRINT(...)) + # DUK_DDD(DUK_DDDPRINT(...)) + # + # The calls may span multiple lines, but the wrapper (DUK_D) + # and the log macro (DUK_DPRINT) must be on the same line. + + line = lines[idx] + if 'DPRINT' not in line: + return + + m = re_debuglog_callsite.match(line) + if m is None: + return + + log_macro = m.group(1) + log_wrapper = debuglog_wrappers[log_macro] + if log_wrapper + '(' in line: + return + + # exclude '#define DUK_DPRINT...' macros in duk_debug.h + if len(line) >= 1 and line[0] == '#': + return + + # exclude a few comment lines in duk_debug.h + if len(line) >= 3 and line[0:3] == ' * ': + return + + raise Exception('invalid debug log call form') def checkTrailingWhitespace(lines, idx, filename): - line = lines[idx] - if len(line) > 0 and line[-1] == '\n': - line = line[:-1] + line = lines[idx] + if len(line) > 0 and line[-1] == '\n': + line = line[:-1] - m = re_trailing_ws.match(line) - if m is None: - return + m = re_trailing_ws.match(line) + if m is None: + return - raise Exception('trailing whitespace') + raise Exception('trailing whitespace') def checkCarriageReturns(lines, idx, filename): - line = lines[idx] - if not '\x0d' in line: - return + line = lines[idx] + if not '\x0d' in line: + return - raise Exception('carriage return') + raise Exception('carriage return') def checkMixedIndent(lines, idx, filename): - line = lines[idx] - if not '\x20\x09' in line: - return + line = lines[idx] + if not '\x20\x09' in line: + return + + # Mixed tab/space are only allowed after non-whitespace characters + idx = line.index('\x20\x09') + tmp = line[0:idx] + m = re_only_ws.match(tmp) + if m is None: + return + + raise Exception('mixed space/tab indent (idx %d)' % idx) + +def checkTabIndent(lines, idx, filename): + line = lines[idx] + if not '\x09' in line: + return - # Mixed tab/space are only allowed after non-whitespace characters - idx = line.index('\x20\x09') - tmp = line[0:idx] - m = re_only_ws.match(tmp) - if m is None: - return + # Now just checks for presence of TAB characters which is fine for Python + # code (which this check is used for). - raise Exception('mixed space/tab indent (idx %d)' % idx) + raise Exception('tab indent (idx %d)' % idx) def checkNonLeadingTab(lines, idx, filename): - line = lines[idx] - m = re_nonleading_tab.match(line) - if m is None: - return + line = lines[idx] + m = re_nonleading_tab.match(line) + if m is None: + return - raise Exception('non-leading tab (idx %d)' % idx) + raise Exception('non-leading tab (idx %d)' % idx) def checkFixme(lines, idx, filename): - line = lines[idx] - if not 'FIXME' in line: - return + line = lines[idx] + if not fixmeString in line: + return - raise Exception('FIXME on line') + raise Exception(fixmeString + ' on line') def checkIdentifiers(lines, idx, filename): - line = lines[idx] - # XXX: this now executes for every line which is pointless - bn = os.path.basename(filename) - excludePlain = (bn[0:5] == 'test-') + line = lines[idx] + # XXX: this now executes for every line which is pointless + bn = os.path.basename(filename) + excludePlain = (bn[0:5] == 'test-') - for m in re.finditer(re_identifier, line): - if rejected_plain_identifiers.has_key(m.group(0)): - if not excludePlain: - raise Exception('invalid identifier %r (perhaps plain)' % m.group(0)) + for m in re.finditer(re_identifier, line): + if rejected_plain_identifiers.has_key(m.group(0)): + if not excludePlain: + raise Exception('invalid identifier %r (perhaps plain)' % m.group(0)) def checkNonAscii(lines, idx, filename): - line = lines[idx] - m = re_nonascii.match(line) - if m is None: - return - - bn = os.path.basename(filename) - if bn == 'test-lex-utf8.js': - # this specific file is intentionally exempt - pass - else: - raise Exception('non-ascii character') + line = lines[idx] + m = re_nonascii.match(line) + if m is None: + return + + bn = os.path.basename(filename) + if bn == 'test-lex-utf8.js': + # this specific file is intentionally exempt + pass + else: + raise Exception('non-ascii character') def checkNoSymbolVisibility(lines, idx, filename): - line = lines[idx] + line = lines[idx] - # Workaround for DUK_ALWAYS_INLINE preceding a declaration - # (e.g. "DUK_ALWAYS_INLINE DUK_LOCAL ...") - if line.startswith('DUK_ALWAYS_INLINE '): - line = line[18:] + # Workaround for DUK_ALWAYS_INLINE preceding a declaration + # (e.g. "DUK_ALWAYS_INLINE DUK_LOCAL ...") + if line.startswith('DUK_ALWAYS_INLINE '): + line = line[18:] - m = re_func_decl_or_def.match(line) - if m is None: - return + m = re_func_decl_or_def.match(line) + if m is None: + return - bn = os.path.basename(filename) - if not ((bn[-2:] == '.c' or bn[-2:] == '.h' or bn[-5:] == '.h.in') and bn[0:5] != 'test-'): - # Apply to only specific files in src/ - return + bn = os.path.basename(filename) + if not ((bn[-2:] == '.c' or bn[-2:] == '.h' or bn[-5:] == '.h.in') and bn[0:5] != 'test-'): + # Apply to only specific files in src/ + return - if m.group(1) in allowed_visibility_macros and \ - not ((m.group(1) != 'DUK_LOCAL' and m.group(1) != 'DUK_LOCAL_DECL') and 'duk__' in m.group(2)) and \ - not ((m.group(1) == 'DUK_LOCAL' or m.group(1) == 'DUK_LOCAL_DECL') and 'duk__' not in m.group(2)): - return + if m.group(1) in allowed_visibility_macros and \ + not ((m.group(1) != 'DUK_LOCAL' and m.group(1) != 'DUK_LOCAL_DECL') and 'duk__' in m.group(2)) and \ + not ((m.group(1) == 'DUK_LOCAL' or m.group(1) == 'DUK_LOCAL_DECL') and 'duk__' not in m.group(2)): + return - # Previous line may contain the declaration (alone) - if idx > 0 and lines[idx - 1].strip() in allowed_visibility_macros: - return + # Previous line may contain the declaration (alone) + if idx > 0 and lines[idx - 1].strip() in allowed_visibility_macros: + return - # Special exceptions - # (None now) + # Special exceptions + # (None now) - raise Exception('missing symbol visibility macro') + raise Exception('missing symbol visibility macro') def checkCppComment(lines, idx, filename): - line = lines[idx] - m = re_cpp_comment.match(line) - if m is None: - return + line = lines[idx] + m = re_cpp_comment.match(line) + if m is None: + return - raise Exception('c++ comment') + raise Exception('c++ comment') def processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings): - f = open(filename, 'rb') - dataRaw = f.read() - f.close() - - dataNoCommentsOrLiterals = removeAnyCommentsAndLiterals(dataRaw) # no C/javascript comments, literals removed - dataNoCCommentsOrLiterals = removeCCommentsAndLiterals(dataRaw) # no C comments, literals removed - dataNoExpectStrings = removeExpectStrings(dataRaw) # no testcase expect strings - - linesRaw = dataRaw.split('\n') - linesNoCommentsOrLiterals = dataNoCommentsOrLiterals.split('\n') - linesNoCCommentsOrLiterals = dataNoCCommentsOrLiterals.split('\n') - linesNoExpectStrings = dataNoExpectStrings.split('\n') - - def f(lines, checkers): - for linenumber in xrange(len(lines)): - for fun in checkers: - try: - fun(lines, linenumber, filename) # linenumber is zero-based here - except Exception as e: - problems.append(Problem(filename, linenumber + 1, lines[linenumber], str(e))) - - f(linesRaw, checkersRaw) - f(linesNoCommentsOrLiterals, checkersNoCommentsOrLiterals) - f(linesNoCCommentsOrLiterals, checkersNoCCommentsOrLiterals) - f(linesNoExpectStrings, checkersNoExpectStrings) - - # Last line should have a newline, and there should not be an empty line. - # The 'split' result will have one empty string as its last item in the - # expected case. For a single line file there will be two split results - # (the line itself, and an empty string). - - if len(linesRaw) == 0 or \ - len(linesRaw) == 1 and linesRaw[-1] != '' or \ - len(linesRaw) >= 2 and linesRaw[-1] != '' or \ - len(linesRaw) >= 2 and linesRaw[-1] == '' and linesRaw[-2] == '': - problems.append(Problem(filename, len(linesRaw), '(no line)', 'No newline on last line or empty line at end of file')) - - # First line should not be empty (unless it's the only line, len(linesRaw)==2) - if len(linesRaw) > 2 and linesRaw[0] == '': - problems.append(Problem(filename, 1, '(no line)', 'First line is empty')) + f = open(filename, 'rb') + dataRaw = f.read() + f.close() + + dataNoCommentsOrLiterals = removeAnyCommentsAndLiterals(dataRaw) # no C/javascript comments, literals removed + dataNoCCommentsOrLiterals = removeCCommentsAndLiterals(dataRaw) # no C comments, literals removed + dataNoExpectStrings = removeExpectStrings(dataRaw) # no testcase expect strings + + linesRaw = dataRaw.split('\n') + linesNoCommentsOrLiterals = dataNoCommentsOrLiterals.split('\n') + linesNoCCommentsOrLiterals = dataNoCCommentsOrLiterals.split('\n') + linesNoExpectStrings = dataNoExpectStrings.split('\n') + + def f(lines, checkers): + for linenumber in xrange(len(lines)): + for fun in checkers: + try: + fun(lines, linenumber, filename) # linenumber is zero-based here + except Exception as e: + problems.append(Problem(filename, linenumber + 1, lines[linenumber], str(e))) + + f(linesRaw, checkersRaw) + f(linesNoCommentsOrLiterals, checkersNoCommentsOrLiterals) + f(linesNoCCommentsOrLiterals, checkersNoCCommentsOrLiterals) + f(linesNoExpectStrings, checkersNoExpectStrings) + + # Last line should have a newline, and there should not be an empty line. + # The 'split' result will have one empty string as its last item in the + # expected case. For a single line file there will be two split results + # (the line itself, and an empty string). + + if len(linesRaw) == 0 or \ + len(linesRaw) == 1 and linesRaw[-1] != '' or \ + len(linesRaw) >= 2 and linesRaw[-1] != '' or \ + len(linesRaw) >= 2 and linesRaw[-1] == '' and linesRaw[-2] == '': + problems.append(Problem(filename, len(linesRaw), '(no line)', 'No newline on last line or empty line at end of file')) + + # First line should not be empty (unless it's the only line, len(linesRaw)==2) + if len(linesRaw) > 2 and linesRaw[0] == '': + problems.append(Problem(filename, 1, '(no line)', 'First line is empty')) def asciiOnly(x): - return re.sub(r'[\x80-\xff]', '#', x) + return re.sub(r'[\x80-\xff]', '#', x) def main(): - parser = optparse.OptionParser() - parser.add_option('--dump-vim-commands', dest='dump_vim_commands', default=False, help='Dump oneline vim command') - parser.add_option('--check-debug-log-calls', dest='check_debug_log_calls', action='store_true', default=False, help='Check debug log call consistency') - parser.add_option('--check-carriage-returns', dest='check_carriage_returns', action='store_true', default=False, help='Check carriage returns') - parser.add_option('--check-fixme', dest='check_fixme', action='store_true', default=False, help='Check FIXME tags') - parser.add_option('--check-non-ascii', dest='check_non_ascii', action='store_true', default=False, help='Check non-ASCII characters') - parser.add_option('--check-no-symbol-visibility', dest='check_no_symbol_visibility', action='store_true', default=False, help='Check for missing symbol visibility macros') - parser.add_option('--check-rejected-identifiers', dest='check_rejected_identifiers', action='store_true', default=False, help='Check for rejected identifiers like plain "printf()" calls') - parser.add_option('--check-trailing-whitespace', dest='check_trailing_whitespace', action='store_true', default=False, help='Check for trailing whitespace') - parser.add_option('--check-mixed-indent', dest='check_mixed_indent', action='store_true', default=False, help='Check for mixed indent (space and tabs)') - parser.add_option('--check-nonleading-tab', dest='check_nonleading_tab', action='store_true', default=False, help='Check for non-leading tab characters') - parser.add_option('--check-cpp-comment', dest='check_cpp_comment', action='store_true', default=False, help='Check for c++ comments ("// ...")') - parser.add_option('--fail-on-errors', dest='fail_on_errors', action='store_true', default=False, help='Fail on errors (exit code != 0)') - - (opts, args) = parser.parse_args() - - checkersRaw = [] - if opts.check_debug_log_calls: - checkersRaw.append(checkDebugLogCalls) - if opts.check_carriage_returns: - checkersRaw.append(checkCarriageReturns) - if opts.check_fixme: - checkersRaw.append(checkFixme) - if opts.check_non_ascii: - checkersRaw.append(checkNonAscii) - if opts.check_no_symbol_visibility: - checkersRaw.append(checkNoSymbolVisibility) - - checkersNoCCommentsOrLiterals = [] - if opts.check_cpp_comment: - checkersNoCCommentsOrLiterals.append(checkCppComment) - - checkersNoCommentsOrLiterals = [] - if opts.check_rejected_identifiers: - checkersNoCommentsOrLiterals.append(checkIdentifiers) - - checkersNoExpectStrings = [] - if opts.check_trailing_whitespace: - checkersNoExpectStrings.append(checkTrailingWhitespace) - if opts.check_mixed_indent: - checkersNoExpectStrings.append(checkMixedIndent) - if opts.check_nonleading_tab: - checkersNoExpectStrings.append(checkNonLeadingTab) - - for filename in args: - processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings) - - if len(problems) > 0: - for i in problems: - tmp = 'vim +' + str(i.linenumber) - while len(tmp) < 10: - tmp = tmp + ' ' - tmp += ' ' + str(i.filename) + ' : ' + str(i.reason) - while len(tmp) < 80: - tmp = tmp + ' ' - tmp += ' - ' + asciiOnly(i.line.strip()) - print(tmp) - - print '*** Total: %d problems' % len(problems) - - if opts.dump_vim_commands: - cmds = [] - for i in problems: - cmds.append('vim +' + str(i.linenumber) + ' "' + i.filename + '"') - print '' - print('; '.join(cmds)) - - if opts.fail_on_errors: - sys.exit(1) - - sys.exit(0) + parser = optparse.OptionParser() + parser.add_option('--dump-vim-commands', dest='dump_vim_commands', default=False, help='Dump oneline vim command') + parser.add_option('--check-debug-log-calls', dest='check_debug_log_calls', action='store_true', default=False, help='Check debug log call consistency') + parser.add_option('--check-carriage-returns', dest='check_carriage_returns', action='store_true', default=False, help='Check carriage returns') + parser.add_option('--check-fixme', dest='check_fixme', action='store_true', default=False, help='Check ' + fixmeString + ' tags') + parser.add_option('--check-non-ascii', dest='check_non_ascii', action='store_true', default=False, help='Check non-ASCII characters') + parser.add_option('--check-no-symbol-visibility', dest='check_no_symbol_visibility', action='store_true', default=False, help='Check for missing symbol visibility macros') + parser.add_option('--check-rejected-identifiers', dest='check_rejected_identifiers', action='store_true', default=False, help='Check for rejected identifiers like plain "printf()" calls') + parser.add_option('--check-trailing-whitespace', dest='check_trailing_whitespace', action='store_true', default=False, help='Check for trailing whitespace') + parser.add_option('--check-mixed-indent', dest='check_mixed_indent', action='store_true', default=False, help='Check for mixed indent (space and tabs)') + parser.add_option('--check-tab-indent', dest='check_tab_indent', action='store_true', default=False, help='Check for tab indent') + parser.add_option('--check-nonleading-tab', dest='check_nonleading_tab', action='store_true', default=False, help='Check for non-leading tab characters') + parser.add_option('--check-cpp-comment', dest='check_cpp_comment', action='store_true', default=False, help='Check for c++ comments ("// ...")') + parser.add_option('--fail-on-errors', dest='fail_on_errors', action='store_true', default=False, help='Fail on errors (exit code != 0)') + + (opts, args) = parser.parse_args() + + checkersRaw = [] + if opts.check_debug_log_calls: + checkersRaw.append(checkDebugLogCalls) + if opts.check_carriage_returns: + checkersRaw.append(checkCarriageReturns) + if opts.check_fixme: + checkersRaw.append(checkFixme) + if opts.check_non_ascii: + checkersRaw.append(checkNonAscii) + if opts.check_no_symbol_visibility: + checkersRaw.append(checkNoSymbolVisibility) + + checkersNoCCommentsOrLiterals = [] + if opts.check_cpp_comment: + checkersNoCCommentsOrLiterals.append(checkCppComment) + + checkersNoCommentsOrLiterals = [] + if opts.check_rejected_identifiers: + checkersNoCommentsOrLiterals.append(checkIdentifiers) + + checkersNoExpectStrings = [] + if opts.check_trailing_whitespace: + checkersNoExpectStrings.append(checkTrailingWhitespace) + if opts.check_mixed_indent: + checkersNoExpectStrings.append(checkMixedIndent) + if opts.check_tab_indent: + checkersNoExpectStrings.append(checkTabIndent) + if opts.check_nonleading_tab: + checkersNoExpectStrings.append(checkNonLeadingTab) + + for filename in args: + processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings) + + if len(problems) > 0: + for i in problems: + tmp = 'vim +' + str(i.linenumber) + while len(tmp) < 10: + tmp = tmp + ' ' + tmp += ' ' + str(i.filename) + ' : ' + str(i.reason) + while len(tmp) < 80: + tmp = tmp + ' ' + tmp += ' - ' + asciiOnly(i.line.strip()) + print(tmp) + + print '*** Total: %d problems' % len(problems) + + if opts.dump_vim_commands: + cmds = [] + for i in problems: + cmds.append('vim +' + str(i.linenumber) + ' "' + i.filename + '"') + print '' + print('; '.join(cmds)) + + if opts.fail_on_errors: + sys.exit(1) + + sys.exit(0) if __name__ == '__main__': - main() + main() diff --git a/util/combine_src.py b/util/combine_src.py deleted file mode 100644 index 88b8354a..00000000 --- a/util/combine_src.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python2 -# -# Combine a set of a source files into a single C file. -# -# Overview of the process: -# -# * Parse user supplied C files. Add automatic #undefs at the end -# of each C file to avoid defined bleeding from one file to another. -# -# * Combine the C files in specified order. If sources have ordering -# dependencies (depends on application), order may matter. -# -# * Process #include statements in the combined source, categorizing -# them either as "internal" (found in specified include path) or -# "external". Internal includes, unless explicitly excluded, are -# inlined into the result while extenal includes are left as is. -# Duplicate #include statements are replaced with a comment. -# -# At every step, source and header lines are represented with explicit -# line objects which keep track of original filename and line. The -# output contains #line directives, if necessary, to ensure error -# throwing and other diagnostic info will work in a useful manner when -# deployed. It's also possible to generate a combined source with no -# #line directives. -# -# Making the process deterministic is important, so that if users have -# diffs that they apply to the combined source, such diffs would apply -# for as long as possible. -# -# Limitations and notes: -# -# * While there are automatic #undef's for #define's introduced in each -# C file, it's not possible to "undefine" structs, unions, etc. If -# there are structs/unions/typedefs with conflicting names, these -# have to be resolved in the source files first. -# -# * Because duplicate #include statements are suppressed, currently -# assumes #include statements are not conditional. -# -# * A system header might be #include'd in multiple source files with -# different feature defines (like _BSD_SOURCE). Because the #include -# file will only appear once in the resulting source, the first -# occurrence wins. The result may not work correctly if the feature -# defines must actually be different between two or more source files. -# - -import os -import sys -import re -import json -import optparse - -# Include path for finding include files which are amalgamated. -include_paths = [] - -# Include files specifically excluded from being inlined. -include_excluded = [] - -class File: - filename_full = None - filename = None - lines = None - - def __init__(self, filename, lines): - self.filename = os.path.basename(filename) - self.filename_full = filename - self.lines = lines - -class Line: - filename_full = None - filename = None - lineno = None - data = None - - def __init__(self, filename, lineno, data): - self.filename = os.path.basename(filename) - self.filename_full = filename - self.lineno = lineno - self.data = data - -def readFile(filename): - lines = [] - - with open(filename, 'rb') as f: - lineno = 0 - for line in f: - lineno += 1 - if len(line) > 0 and line[-1] == '\n': - line = line[:-1] - lines.append(Line(filename, lineno, line)) - - return File(filename, lines) - -def lookupInclude(incfn): - re_sep = re.compile(r'/|\\') - - inccomp = re.split(re_sep, incfn) # split include path, support / and \ - - for path in include_paths: - fn = apply(os.path.join, [ path ] + inccomp) - if os.path.exists(fn): - return fn # Return full path to first match - - return None - -def addAutomaticUndefs(f): - defined = {} - - re_def = re.compile(r'#define\s+(\w+).*$') - re_undef = re.compile(r'#undef\s+(\w+).*$') - - for line in f.lines: - m = re_def.match(line.data) - if m is not None: - #print('DEFINED: %s' % repr(m.group(1))) - defined[m.group(1)] = True - m = re_undef.match(line.data) - if m is not None: - # Could just ignore #undef's here: we'd then emit - # reliable #undef's (though maybe duplicates) at - # the end. - #print('UNDEFINED: %s' % repr(m.group(1))) - if defined.has_key(m.group(1)): - del defined[m.group(1)] - - # Undefine anything that seems to be left defined. This not a 100% - # process because some #undef's might be conditional which we don't - # track at the moment. Note that it's safe to #undef something that's - # not defined. - - keys = sorted(defined.keys()) # deterministic order - if len(keys) > 0: - #print('STILL DEFINED: %r' % repr(defined.keys())) - f.lines.append(Line(f.filename, len(f.lines) + 1, '')) - f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */')) - for k in keys: - f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k)) - -def createCombined(files, prologue_filename, line_directives): - res = [] - line_map = [] # indicate combined source lines where uncombined file/line would change - metadata = { - 'line_map': line_map - } - - emit_state = [ None, None ] # curr_filename, curr_lineno - - def emit(line): - if isinstance(line, (str, unicode)): - res.append(line) - emit_state[1] += 1 - else: - if line.filename != emit_state[0] or line.lineno != emit_state[1]: - if line_directives: - res.append('#line %d "%s"' % (line.lineno, line.filename)) - line_map.append({ 'original_file': line.filename, - 'original_line': line.lineno, - 'combined_line': len(res) + 1 }) - res.append(line.data) - emit_state[0] = line.filename - emit_state[1] = line.lineno + 1 - - included = {} # headers already included - - if prologue_filename is not None: - with open(prologue_filename, 'rb') as f: - for line in f.read().split('\n'): - res.append(line) - - re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$') - - # Process a file, appending it to the result; the input may be a - # source or an include file. #include directives are handled - # recursively. - def processFile(f): - #print('Process file: ' + f.filename) - - for line in f.lines: - if not line.data.startswith('#include'): - emit(line) - continue - - m = re_inc.match(line.data) - if m is None: - raise Exception('Couldn\'t match #include line: %s' % repr(line.data)) - incpath = m.group(2) - if incpath in include_excluded: - # Specific include files excluded from the - # inlining / duplicate suppression process. - emit(line) # keep as is - continue - - if included.has_key(incpath): - # We suppress duplicate includes, both internal and - # external, based on the assumption that includes are - # not behind #ifdef checks. This is the case for - # Duktape (except for the include files excluded). - emit('/* #include %s -> already included */' % incpath) - continue - included[incpath] = True - - # An include file is considered "internal" and is amalgamated - # if it is found in the include path provided by the user. - - incfile = lookupInclude(incpath) - if incfile is not None: - #print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile))) - emit('/* #include %s */' % incpath) - processFile(readFile(incfile)) - else: - #print('Include considered external: %s' % repr(line.data)) - emit(line) # keep as is - - for f in files: - processFile(f) - - return '\n'.join(res) + '\n', metadata - -def main(): - global include_paths, include_excluded - - parser = optparse.OptionParser() - parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times') - parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)') - parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file') - parser.add_option('--output-source', dest='output_source', help='Output source filename') - parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename') - parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source') - (opts, args) = parser.parse_args() - - assert(opts.include_paths is not None) - include_paths = opts.include_paths # global for easy access - include_excluded = opts.include_excluded - assert(opts.output_source) - assert(opts.output_metadata) - - print('Read input files, add automatic #undefs') - sources = args - files = [] - for fn in sources: - res = readFile(fn) - #print('Add automatic undefs for: ' + fn) - addAutomaticUndefs(res) - files.append(res) - - print('Create combined source file from %d source files' % len(files)) - combined_source, metadata = \ - createCombined(files, opts.prologue, opts.line_directives) - with open(opts.output_source, 'wb') as f: - f.write(combined_source) - with open(opts.output_metadata, 'wb') as f: - f.write(json.dumps(metadata, indent=4)) - - print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source)) - -if __name__ == '__main__': - main() diff --git a/util/create_spdx_license.py b/util/create_spdx_license.py deleted file mode 100644 index af660007..00000000 --- a/util/create_spdx_license.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python2 -# -# Helper to create an SPDX license file (http://spdx.org) -# -# This must be executed when the dist/ directory is otherwise complete, -# except for the SPDX license, so that the file lists and such contained -# in the SPDX license will be correct. -# -# The utility outputs RDF/XML to specified file: -# -# $ python create_spdx_license.py /tmp/license.spdx -# -# Then, validate with SPDXViewer and SPDXTools: -# -# $ java -jar SPDXViewer.jar /tmp/license.spdx -# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html -# -# Finally, copy to dist: -# -# $ cp /tmp/license.spdx dist/license.spdx -# -# SPDX FAQ indicates there is no standard extension for an SPDX license file -# but '.spdx' is a common practice. -# -# The algorithm to compute a "verification code", implemented in this file, -# can be verified as follows: -# -# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz -# $ cd /tmp -# $ tar xvfJ duktape-N.N.N.tar.xz -# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code -# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/ -# -# Compare the resulting verification code manually with the one in license.spdx. -# -# Resources: -# -# - http://spdx.org/about-spdx/faqs -# - http://wiki.spdx.org/view/Technical_Team/Best_Practices -# - -import os -import sys -import re -import datetime -import sha -import rdflib -from rdflib import URIRef, BNode, Literal, Namespace - -RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') -RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') -XSD = Namespace('http://www.w3.org/2001/XMLSchema#') -SPDX = Namespace('http://spdx.org/rdf/terms#') -DOAP = Namespace('http://usefulinc.com/ns/doap#') -DUKTAPE = Namespace('http://duktape.org/rdf/terms#') - -def checksumFile(g, filename): - f = open(filename, 'rb') - d = f.read() - f.close() - shasum = sha.sha(d).digest().encode('hex').lower() - - csum_node = BNode() - g.add((csum_node, RDF.type, SPDX.Checksum)) - g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) - g.add((csum_node, SPDX.checksumValue, Literal(shasum))) - - return csum_node - -def computePackageVerification(g, dirname, excluded): - # SPDX 1.2 Section 4.7 - # The SPDXTools command "GenerateVerificationCode" can be used to - # check the verification codes created. Note that you must manually - # remove "license.spdx" from the unpacked dist directory before - # computing the verification code. - - verify_node = BNode() - - hashes = [] - for dirpath, dirnames, filenames in os.walk(dirname): - for fn in filenames: - full_fn = os.path.join(dirpath, fn) - f = open(full_fn, 'rb') - d = f.read() - f.close() - - if full_fn in excluded: - #print('excluded in verification: ' + full_fn) - continue - #print('included in verification: ' + full_fn) - - file_sha1 = sha.sha(d).digest().encode('hex').lower() - hashes.append(file_sha1) - - #print(repr(hashes)) - hashes.sort() - #print(repr(hashes)) - verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower() - - for fn in excluded: - g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn))) - g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code))) - - return verify_node - -def fileType(filename): - ign, ext = os.path.splitext(filename) - if ext in [ '.c', '.h', '.js' ]: - return SPDX.fileType_source - else: - return SPDX.fileType_other - -def getDuktapeVersion(): - f = open('./src/duktape.h') - re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$') - for line in f: - line = line.strip() - m = re_ver.match(line) - if m is None: - continue - ver = int(m.group(1)) - return '%d.%d.%d' % ((ver / 10000) % 100, - (ver / 100) % 100, - ver % 100) - - raise Exception('could not figure out Duktape version') - -def main(): - outfile = sys.argv[1] - - if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'): - sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') - sys.exit(1) - os.chdir('dist') - if not os.path.exists('Makefile.cmdline'): - sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') - sys.exit(1) - - duktape_version = getDuktapeVersion() - duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz' - now = datetime.datetime.utcnow() - now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second) - creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime) - duktape_org = Literal('Organization: duktape.org') - mit_license = URIRef('http://spdx.org/licenses/MIT') - duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)') - - g = rdflib.Graph() - - crea_node = BNode() - g.add((crea_node, RDF.type, SPDX.CreationInfo)) - g.add((crea_node, RDFS.comment, Literal(''))) - g.add((crea_node, SPDX.creator, duktape_org)) - g.add((crea_node, SPDX.created, creation_date)) - g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/ - - # 'name' should not include a version number (see best practices) - pkg_node = BNode() - g.add((pkg_node, RDF.type, SPDX.Package)) - g.add((pkg_node, SPDX.name, Literal('Duktape'))) - g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version))) - g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname))) - g.add((pkg_node, SPDX.supplier, duktape_org)) - g.add((pkg_node, SPDX.originator, duktape_org)) - g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI))) - g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI))) - verify_node = computePackageVerification(g, '.', [ './license.spdx' ]) - g.add((pkg_node, SPDX.packageVerificationCode, verify_node)) - # SPDX.checksum: omitted because license is inside the package - g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.'))) - - # NOTE: MIT license alone is sufficient for now, because Duktape, Lua, - # Murmurhash2, and CommonJS (though probably not even relevant for - # licensing) are all MIT. - g.add((pkg_node, SPDX.licenseConcluded, mit_license)) - g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license)) - g.add((pkg_node, SPDX.licenseDeclared, mit_license)) - g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.'))) - g.add((pkg_node, SPDX.copyrightText, duktape_copyright)) - g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter'))) - g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint'))) - # hasFile properties added separately below - - #reviewed_node = BNode() - #g.add((reviewed_node, RDF.type, SPDX.Review)) - #g.add((reviewed_node, SPDX.reviewer, XXX)) - #g.add((reviewed_node, SPDX.reviewDate, XXX)) - #g.add((reviewed_node, RDFS.comment, '')) - - spdx_doc = BNode() - g.add((spdx_doc, RDF.type, SPDX.SpdxDocument)) - g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2'))) - g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0'))) - g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version))) - g.add((spdx_doc, SPDX.creationInfo, crea_node)) - g.add((spdx_doc, SPDX.describesPackage, pkg_node)) - # SPDX.hasExtractedLicensingInfo - # SPDX.reviewed - # SPDX.referencesFile: added below - - for dirpath, dirnames, filenames in os.walk('.'): - for fn in filenames: - full_fn = os.path.join(dirpath, fn) - #print('# file: ' + full_fn) - - file_node = BNode() - g.add((file_node, RDF.type, SPDX.File)) - g.add((file_node, SPDX.fileName, Literal(full_fn))) - g.add((file_node, SPDX.fileType, fileType(full_fn))) - g.add((file_node, SPDX.checksum, checksumFile(g, full_fn))) - - # Here we assume that LICENSE.txt provides the actual "in file" - # licensing information, and everything else is implicitly under - # MIT license. - g.add((file_node, SPDX.licenseConcluded, mit_license)) - if full_fn == './LICENSE.txt': - g.add((file_node, SPDX.licenseInfoInFile, mit_license)) - else: - g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none))) - - # SPDX.licenseComments - g.add((file_node, SPDX.copyrightText, duktape_copyright)) - # SPDX.noticeText - # SPDX.artifactOf - # SPDX.fileDependency - # SPDX.fileContributor - - # XXX: should referencesFile include all files? - g.add((spdx_doc, SPDX.referencesFile, file_node)) - - g.add((pkg_node, SPDX.hasFile, file_node)) - - # Serialize into RDF/XML directly. We could also serialize into - # N-Triples and use external tools (like 'rapper') to get cleaner, - # abbreviated output. - - #print('# Duktape SPDX license file (autogenerated)') - #print(g.serialize(format='turtle')) - #print(g.serialize(format='nt')) - f = open(outfile, 'wb') - #f.write(g.serialize(format='rdf/xml')) - f.write(g.serialize(format='xml')) - f.close() - -if __name__ == '__main__': - main() diff --git a/util/ditz_hack.py b/util/ditz_hack.py deleted file mode 100644 index 1c4fb9bd..00000000 --- a/util/ditz_hack.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python2 -# -# Throwaway utility to dump Ditz issues for grooming. -# - -import os -import sys -import yaml - -def main(): - def issueConstructor(loader, node): - return node - - yaml.add_constructor('!ditz.rubyforge.org,2008-03-06/issue', issueConstructor) - - for fn in os.listdir(sys.argv[1]): - if fn[0:6] != 'issue-': - continue - with open(os.path.join(sys.argv[1], fn), 'rb') as f: - doc = yaml.load(f) - tmp = {} - for k,v in doc.value: - tmp[k.value] = v.value - if tmp.get('status', '') != ':closed': - print('*** ' + fn) - print(tmp.get('title', u'NOTITLE').encode('utf-8') + '\n') - print(tmp.get('desc', u'').encode('utf-8') + '\n') - -if __name__ == '__main__': - main() diff --git a/util/duk_meta_to_strarray.py b/util/duk_meta_to_strarray.py deleted file mode 100644 index 89a68599..00000000 --- a/util/duk_meta_to_strarray.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python2 -# -# Create an array of C strings with Duktape built-in strings. -# Useful when using external strings. -# - -import os -import sys -import json - -def to_c_string(x): - res = '"' - term = False - for i, c in enumerate(x): - if term: - term = False - res += '" "' - - o = ord(c) - if o < 0x20 or o > 0x7e or c in '\'"\\': - # Terminate C string so that escape doesn't become - # ambiguous - res += '\\x%02x' % o - term = True - else: - res += c - res += '"' - return res - -def main(): - f = open(sys.argv[1], 'rb') - d = f.read() - f.close() - meta = json.loads(d) - - print('const char *duk_builtin_strings[] = {') - - strlist = meta['builtin_strings_base64'] - for i in xrange(len(strlist)): - s = strlist[i] - if i == len(strlist) - 1: - print(' %s' % to_c_string(s.decode('base64'))) - else: - print(' %s,' % to_c_string(s.decode('base64'))) - - print('};') - -if __name__ == '__main__': - main() diff --git a/util/dump_bytecode.py b/util/dump_bytecode.py deleted file mode 100644 index 20b034cc..00000000 --- a/util/dump_bytecode.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python2 -# -# Utility to dump bytecode into a human readable form. -# - -import os -import sys -import struct -import optparse - -def decode_string(buf, off): - strlen, = struct.unpack('>L', buf[off:off+4]) - off += 4 - strdata = buf[off:off+strlen] - off += strlen - - return off, strdata - -def sanitize_string(val): - # Don't try to UTF-8 decode, just escape non-printable ASCII. - def f(c): - if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"': - return '\\x%02x' % ord(c) - else: - return c - return "'" + ''.join(map(f, val)) + "'" - -def decode_sanitize_string(buf, off): - off, val = decode_string(buf, off) - return off, sanitize_string(val) - -def dump_function(buf, off, ind): - count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12]) - off += 12 - print '%sInstructions: %d' % (ind, count_inst) - print '%sConstants: %d' % (ind, count_const) - print '%sInner functions: %d' % (ind, count_funcs) - - nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12]) - off += 12 - print '%sNregs: %d' % (ind, nregs) - print '%sNargs: %d' % (ind, nargs) - print '%sStart line number: %d' % (ind, start_line) - print '%sEnd line number: %d' % (ind, end_line) - - compfunc_flags, = struct.unpack('>L', buf[off:off+4]) - off += 4 - print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags) - - for i in xrange(count_inst): - ins, = struct.unpack('>L', buf[off:off+4]) - off += 4 - print '%s %06d: %08lx' % (ind, i, ins) - - print '%sConstants:' % ind - for i in xrange(count_const): - const_type, = struct.unpack('B', buf[off:off+1]) - off += 1 - - if const_type == 0x00: - off, strdata = decode_sanitize_string(buf, off) - print '%s %06d: %s' % (ind, i, strdata) - elif const_type == 0x01: - num, = struct.unpack('>d', buf[off:off+8]) - off += 8 - print '%s %06d: %f' % (ind, i, num) - else: - raise Exception('invalid constant type: %d' % const_type) - - for i in xrange(count_funcs): - print '%sInner function %d:' % (ind, i) - off = dump_function(buf, off, ind + ' ') - - val, = struct.unpack('>L', buf[off:off+4]) - off += 4 - print '%s.length: %d' % (ind, val) - off, val = decode_sanitize_string(buf, off) - print '%s.name: %s' % (ind, val) - off, val = decode_sanitize_string(buf, off) - print '%s.fileName: %s' % (ind, val) - off, val = decode_string(buf, off) # actually a buffer - print '%s._Pc2line: %s' % (ind, val.encode('hex')) - - while True: - off, name = decode_string(buf, off) - if name == '': - break - name = sanitize_string(name) - val, = struct.unpack('>L', buf[off:off+4]) - off += 4 - print '%s_Varmap[%s] = %d' % (ind, name, val) - - idx = 0 - while True: - off, name = decode_string(buf, off) - if name == '': - break - name = sanitize_string(name) - print '%s_Formals[%d] = %s' % (ind, idx, name) - idx += 1 - - return off - -def dump_bytecode(buf, off, ind): - sig, ver = struct.unpack('BB', buf[off:off+2]) - off += 2 - if sig != 0xff: - raise Exception('invalid signature byte: %d' % sig) - if ver != 0x00: - raise Exception('unsupported bytecode version: %d' % ver) - print '%sBytecode version: 0x%02x' % (ind, ver) - - off = dump_function(buf, off, ind + ' ') - - return off - -def main(): - parser = optparse.OptionParser() - parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump') - (opts, args) = parser.parse_args() - - with open(args[0], 'rb') as f: - d = f.read() - if opts.hex_decode: - d = d.strip() - d = d.decode('hex') - dump_bytecode(d, 0, '') - -if __name__ == '__main__': - main() diff --git a/util/example_rombuild.sh b/util/example_rombuild.sh index 0c15f282..723ba092 100644 --- a/util/example_rombuild.sh +++ b/util/example_rombuild.sh @@ -6,28 +6,26 @@ set -e PYTHON=`which python2 python | head -1` -# Run dist manually, ROM support is not enabled by default so add --rom-support. -# User builtin metadata can be provided through one or more YAML files (applied +make clean dist + +# Prepare-and-config sources manually to enable ROM support. User builtin +# metadata can be provided through one or more YAML files (which are applied # in sequence). -make clean -$PYTHON util/make_dist.py \ +rm -rf dist/src dist/src-noline dist/src-separate +$PYTHON dist/tools/prepare_sources.py \ + --source-directory dist/src-input \ + --output-directory dist \ --rom-support \ --rom-auto-lightfunc \ --user-builtin-metadata util/example_user_builtins1.yaml \ - --user-builtin-metadata util/example_user_builtins2.yaml - -# Run genconfig.py and create a custom duk_config.h with ROM support etc. -$PYTHON config/genconfig.py \ - --metadata config \ - --output dist/src/duk_config.h \ + --user-builtin-metadata util/example_user_builtins2.yaml \ + --config-metadata dist/config/genconfig_metadata.tar.gz \ -DDUK_USE_ROM_STRINGS \ -DDUK_USE_ROM_OBJECTS \ -DDUK_USE_ROM_GLOBAL_INHERIT \ -DDUK_USE_DEBUG -DDUK_USE_DEBUG_LEVEL=0 \ --option-yaml 'DUK_USE_DEBUG_WRITE: { "verbatim": "#define DUK_USE_DEBUG_WRITE(level,file,line,func,msg) do {fprintf(stderr, \"%ld %s:%ld (%s): %s\\n\", (long) (level), (file), (long) (line), (func), (msg)); } while(0)" }' \ - -DDUK_USE_ASSERTIONS \ - autodetect-header -cp dist/src/duk_config.h dist/src-separate/ + -DDUK_USE_ASSERTIONS #gcc -std=c99 -Wall -Wextra -Os -Idist/src-separate/ -Idist/examples/cmdline dist/src-separate/*.c dist/examples/cmdline/duk_cmdline.c -o _duk -lm make duk dukd # XXX: currently fails to start, DUK_CMDLINE_LOGGING_SUPPORT, DUK_CMDLINE_MODULE_SUPPORT modify Duktape object (doesn't work with ROM built-ins) @@ -35,15 +33,20 @@ make duk dukd # XXX: currently fails to start, DUK_CMDLINE_LOGGING_SUPPORT, DUK # This would ideally be done directly using genconfig.py without # --support-feature-options by moving the options into a genconfig # YAML config file. -$PYTHON config/genconfig.py \ - --metadata config \ - --output dist/src/duk_config.h \ - --option-file config/examples/low_memory.yaml \ +rm -rf dist/src dist/src-noline dist/src-separate +$PYTHON dist/tools/prepare_sources.py \ + --source-directory dist/src-input \ + --output-directory dist \ + --rom-support \ + --rom-auto-lightfunc \ + --user-builtin-metadata util/example_user_builtins1.yaml \ + --user-builtin-metadata util/example_user_builtins2.yaml \ + --config-metadata dist/config/genconfig_metadata.tar.gz \ + --support-feature-options \ -DDUK_USE_ROM_STRINGS \ -DDUK_USE_ROM_OBJECTS \ -DDUK_USE_ROM_GLOBAL_INHERIT \ - --support-feature-options \ - autodetect-header -cp dist/src/duk_config.h dist/src-separate/ + -DDUK_USE_ASSERTIONS \ + -UDUK_USE_DEBUG #gcc -std=c99 -Wall -Wextra -Os -Idist/src-separate/ -Idist/examples/cmdline dist/src-separate/*.c dist/examples/cmdline/duk_cmdline.c -o _duk -lm make ajduk diff --git a/util/example_user_builtins1.yaml b/util/example_user_builtins1.yaml index b9a87d38..74b445a2 100644 --- a/util/example_user_builtins1.yaml +++ b/util/example_user_builtins1.yaml @@ -8,9 +8,9 @@ # # See examples below for details on how to use these. # -# Note that genbuiltins.py (and make_dist.py) accepts multiple user built-in -# YAML files, so that you can manage your custom strings and objects in -# individual YAML files for modularity. +# Note that genbuiltins.py (and prepare_sources.py) accepts multiple user +# built-in YAML files, so that you can manage your custom strings and +# objects in individual YAML files for modularity. # # When using pointer compression, all ROM strings and objects need a number # from the ROM pointer compression range (e.g. [0xf800,0xffff]). By default diff --git a/util/fastint_reps.py b/util/fastint_reps.py index e15d4e9d..19a075d9 100644 --- a/util/fastint_reps.py +++ b/util/fastint_reps.py @@ -3,68 +3,66 @@ # Print out a few IEEE double representations related to the Duktape fastint # number model. # -# NOTE: signed zero does not work correctly here. -# import struct import math def isFastint(x): - if math.floor(x) == x and \ - x >= -(2**47) and \ - x < (2**47) and \ - True: # FIXME: not neg zero - return True - return False + if math.floor(x) == x and \ + x >= -(2**47) and \ + x < (2**47) and \ + (x != 0 or math.copysign(1.0, x) == 1.0): + return True + return False def stringRep(x): - tmp = struct.pack('>d', x) - tmphex = tmp.encode('hex') + tmp = struct.pack('>d', x) + tmphex = tmp.encode('hex') - sgnexp = (ord(tmp[0]) << 8) + ord(tmp[1]) - sgn = (sgnexp) >> 15 - exp = (sgnexp & 0x7ff0) >> 4 - manthex = tmphex[3:] + sgnexp = (ord(tmp[0]) << 8) + ord(tmp[1]) + sgn = (sgnexp) >> 15 + exp = (sgnexp & 0x7ff0) >> 4 + manthex = tmphex[3:] - return '%s sgn=%d exp=%d sgnexp=%x manthex=%s' % (tmphex, sgn, exp, sgnexp, manthex) + return '%s sgn=%d exp=%d sgnexp=%x manthex=%s' % (tmphex, sgn, exp, sgnexp, manthex) def main(): - for i in [ -(2**47) - 1, - -(2**47), - -(2**47) + 1, - -(2**32) - 1, - -(2**32), - -(2**32) + 1, - -(long(0xdeadbeef)), - -9, - -8, - -8, - -7, - -6, - -5, - -4, - -3, - -2, - -1, - -0, - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - long(0xdeadbeef), - (2**32) - 1, - (2**32), - (2**32) + 1, - (2**47) - 1, - (2**47) - ]: - print('%d %x (fastint=%s): %s' % (i, i, str(isFastint(i)), stringRep(i))) + for i in [ -(2**47) - 1, + -(2**47), + -(2**47) + 1, + -(2**32) - 1, + -(2**32), + -(2**32) + 1, + -(long(0xdeadbeef)), + -9, + -8, + -8, + -7, + -6, + -5, + -4, + -3, + -2, + -1, + -0.0, # must use float to get neg zero + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + long(0xdeadbeef), + (2**32) - 1, + (2**32), + (2**32) + 1, + (2**47) - 1, + (2**47) + ]: + print('%f %x (fastint=%s): %s' % (float(i), int(i), str(isFastint(i)), stringRep(i))) if __name__ == '__main__': - main() + main() diff --git a/util/filter_test262_log.py b/util/filter_test262_log.py index dbad7c65..ff3f0711 100644 --- a/util/filter_test262_log.py +++ b/util/filter_test262_log.py @@ -6,114 +6,114 @@ import json import yaml def main(): - with open(sys.argv[1], 'rb') as f: - known_issues = yaml.load(f.read()) - - skipstrings = [ - 'passed in strict mode', - 'passed in non-strict mode', - 'failed in strict mode as expected', - 'failed in non-strict mode as expected' - ] - - in_failed_tests = False - tofix_count = 0 # count of bugs that will be fixed (no uncertainty about proper behavior etc) - known_errors = [] - diagnosed_errors = [] - unknown_errors = [] - other_errors = [] - - for line in sys.stdin: - if len(line) > 1 and line[-1] == '\n': - line = line[:-1] - - # Skip success cases - - skip = False - for sk in skipstrings: - if sk in line: - skip = True - if skip: - continue - - # Augment error list with "known bugs" - - print(line) # print error list as is, then refined version later - - if 'failed tests' in line.lower(): - in_failed_tests = True - continue - - if in_failed_tests and line.strip() == '': - in_failed_tests = False - continue - - if in_failed_tests: - # " intl402/ch12/12.2/12.2.3_c in non-strict mode" - tmp = line.strip().split(' ') - test = tmp[0] - - matched = False - for kn in known_issues: - if kn.get('test', None) != test: - continue - if kn.has_key('diagnosed'): - tofix_count += 1 - diagnosed_errors.append(line + ' // diagnosed: ' + kn['diagnosed']) - elif kn.has_key('knownissue'): - # don't bump tofix_count, as testcase expected result is not certain - known_errors.append(line + ' // KNOWN: ' + kn['knownissue']) - else: - tofix_count += 1 - unknown_errors.append(line + ' // ??? (rule matches)') - kn['used'] = True # mark rule used - matched = True - break - - if matched: - continue - - # no match, to fix - other_errors.append(line) - tofix_count += 1 - - print('') - print('=== CATEGORISED ERRORS ===') - print('') - - for i in known_errors: - print(i) - for i in diagnosed_errors: - print(i) - for i in unknown_errors: - print(i) - for i in other_errors: - print(i) - - # Check for unused rules (e.g. bugs fixed) - - print('') - for kn in known_issues: - if not kn.has_key('used'): - print('WARNING: unused rule: ' + json.dumps(kn)) - - # Used by testclient - - if len(unknown_errors) > 0 or len(other_errors) > 0: - print('TEST262 FAILED') - elif len(known_errors) > 0 or len(diagnosed_errors) > 0: - # Known and diagnosed errors don't indicate test failure - # as far as Github status is concerned. - print('TEST262 SUCCESS') - else: - print('TEST262 SUCCESS') - - # To fix count - - print('') - print('TO-FIX COUNT: ' + str(tofix_count)) - print(' = test case failures which need fixing (Duktape bugs, uninvestigated)') + with open(sys.argv[1], 'rb') as f: + known_issues = yaml.load(f.read()) + + skipstrings = [ + 'passed in strict mode', + 'passed in non-strict mode', + 'failed in strict mode as expected', + 'failed in non-strict mode as expected' + ] + + in_failed_tests = False + tofix_count = 0 # count of bugs that will be fixed (no uncertainty about proper behavior etc) + known_errors = [] + diagnosed_errors = [] + unknown_errors = [] + other_errors = [] + + for line in sys.stdin: + if len(line) > 1 and line[-1] == '\n': + line = line[:-1] + + # Skip success cases + + skip = False + for sk in skipstrings: + if sk in line: + skip = True + if skip: + continue + + # Augment error list with "known bugs" + + print(line) # print error list as is, then refined version later + + if 'failed tests' in line.lower(): + in_failed_tests = True + continue + + if in_failed_tests and line.strip() == '': + in_failed_tests = False + continue + + if in_failed_tests: + # " intl402/ch12/12.2/12.2.3_c in non-strict mode" + tmp = line.strip().split(' ') + test = tmp[0] + + matched = False + for kn in known_issues: + if kn.get('test', None) != test: + continue + if kn.has_key('diagnosed'): + tofix_count += 1 + diagnosed_errors.append(line + ' // diagnosed: ' + kn['diagnosed']) + elif kn.has_key('knownissue'): + # don't bump tofix_count, as testcase expected result is not certain + known_errors.append(line + ' // KNOWN: ' + kn['knownissue']) + else: + tofix_count += 1 + unknown_errors.append(line + ' // ??? (rule matches)') + kn['used'] = True # mark rule used + matched = True + break + + if matched: + continue + + # no match, to fix + other_errors.append(line) + tofix_count += 1 + + print('') + print('=== CATEGORISED ERRORS ===') + print('') + + for i in known_errors: + print(i) + for i in diagnosed_errors: + print(i) + for i in unknown_errors: + print(i) + for i in other_errors: + print(i) + + # Check for unused rules (e.g. bugs fixed) + + print('') + for kn in known_issues: + if not kn.has_key('used'): + print('WARNING: unused rule: ' + json.dumps(kn)) + + # Used by testclient + + if len(unknown_errors) > 0 or len(other_errors) > 0: + print('TEST262 FAILED') + elif len(known_errors) > 0 or len(diagnosed_errors) > 0: + # Known and diagnosed errors don't indicate test failure + # as far as Github status is concerned. + print('TEST262 SUCCESS') + else: + print('TEST262 SUCCESS') + + # To fix count + + print('') + print('TO-FIX COUNT: ' + str(tofix_count)) + print(' = test case failures which need fixing (Duktape bugs, uninvestigated)') if __name__ == '__main__': - main() + main() diff --git a/util/find_func_calls.py b/util/find_func_calls.py index b6817c97..eb36de24 100644 --- a/util/find_func_calls.py +++ b/util/find_func_calls.py @@ -24,57 +24,57 @@ re_func_call = re.compile(r'([A-Za-z_][A-Za-z0-9_]+)\(') re_string = re.compile(r'"(\\"|[^"])*"') def stripLineContinuations(x): - res = re.sub(re_linecont, ' ', x) - #print(res) - return res + res = re.sub(re_linecont, ' ', x) + #print(res) + return res def stripComments(x): - res = re.sub(re_comment, '/*omit*/', x) - #print(res) - return res + res = re.sub(re_comment, '/*omit*/', x) + #print(res) + return res def stripStrings(x): - res = re.sub(re_string, '"..."', x) - #print(res) - return res + res = re.sub(re_string, '"..."', x) + #print(res) + return res def findFuncCalls(d, fn): - res = [] - for line in d.split('\n'): - if len(line) >= 1 and line[0] == '#': - # Preprocessor lines contain function call like - # syntax but are not function calls. - continue + res = [] + for line in d.split('\n'): + if len(line) >= 1 and line[0] == '#': + # Preprocessor lines contain function call like + # syntax but are not function calls. + continue - for m in re_func_call.finditer(line): - res.append({ - 'name': m.group(1), - 'filename': fn - }) - return res + for m in re_func_call.finditer(line): + res.append({ + 'name': m.group(1), + 'filename': fn + }) + return res def main(): - # Duktape code does not have a space between a function name and - # an open parenthesis. If the regexp includes an optional space, - # it will provide a lot of false matches. + # Duktape code does not have a space between a function name and + # an open parenthesis. If the regexp includes an optional space, + # it will provide a lot of false matches. - for fn in sys.argv[1:]: - f = open(fn, 'rb') - d = f.read() - f.close() + for fn in sys.argv[1:]: + f = open(fn, 'rb') + d = f.read() + f.close() - # Strip line continuations, comments, and strings so that - # we minimize false matches. + # Strip line continuations, comments, and strings so that + # we minimize false matches. - d = stripLineContinuations(d) - d = stripComments(d) - d = stripStrings(d) + d = stripLineContinuations(d) + d = stripComments(d) + d = stripStrings(d) - # Find function calls (close enough). + # Find function calls (close enough). - for i in findFuncCalls(d, fn): - #print '%s' % i['name'] - print '%-25s%s' % (i['name'], i['filename']) + for i in findFuncCalls(d, fn): + #print '%s' % i['name'] + print '%-25s%s' % (i['name'], i['filename']) if __name__ == '__main__': - main() + main() diff --git a/util/find_non_ascii.py b/util/find_non_ascii.py index 9e89ac93..a2b4c3e0 100644 --- a/util/find_non_ascii.py +++ b/util/find_non_ascii.py @@ -8,18 +8,18 @@ import os, sys def main(): - f = open(sys.argv[1], 'rb') - data = f.read() - f.close() + f = open(sys.argv[1], 'rb') + data = f.read() + f.close() - for linenum, linedata in enumerate(data.split('\n')): - non_ascii = False - for i in xrange(len(linedata)): - x = ord(linedata[i]) - if x >= 0x80: - print '%s: non-ascii data on line %d, char index %d, value %d (0x%02x)' % \ - (sys.argv[1], linenum + 1, i + 1, x, x) - non_ascii = True + for linenum, linedata in enumerate(data.split('\n')): + non_ascii = False + for i in xrange(len(linedata)): + x = ord(linedata[i]) + if x >= 0x80: + print '%s: non-ascii data on line %d, char index %d, value %d (0x%02x)' % \ + (sys.argv[1], linenum + 1, i + 1, x, x) + non_ascii = True if __name__ == '__main__': - main() + main() diff --git a/util/fix_emscripten.py b/util/fix_emscripten.py index 2f3a2cd3..3523c647 100644 --- a/util/fix_emscripten.py +++ b/util/fix_emscripten.py @@ -12,45 +12,45 @@ import sys fix_count = 0 replacements = { - # RegExp fixes for non-compliant regexps (typically literal brace - # without a backslash escape). These fixes are no longer needed - # with Duktape 1.5.0 which adds support for parsing non-standard - # regexp curly braces. - #r"""if (/?/.test(type)) return true""": - # r"""if (/?/.test(type)) return true""", - #r"""var sourceRegex = /^function\s\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""": - # r"""var sourceRegex = /^function\s\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""", - #r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""": - # r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""", - #r"""/^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/""": - # r"""/^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/""", - - # GH-11: Attempt to parse a function's toString() output with a RegExp. - # The RegExp assumes more of toString() output format than what is - # guaranteed by the specification, and won't parse Duktape 1.4.0 (and - # before) function toString() output ("function empty() {/* source code*/)}"). - # No longer needed in Duktape 1.5.0 which changed the .toString() format. - #r"""var parsed = jsfunc.toString().match(sourceRegex).slice(1);""": - # r"""var parsed = (jsfunc.toString().match(sourceRegex) || []).slice(1);""", - #r"""jsfunc.toString().match(sourceRegex).slice(1);""": - # r"""(jsfunc.toString().match(sourceRegex) || []).slice(1);""", + # RegExp fixes for non-compliant regexps (typically literal brace + # without a backslash escape). These fixes are no longer needed + # with Duktape 1.5.0 which adds support for parsing non-standard + # regexp curly braces. + #r"""if (/?/.test(type)) return true""": + # r"""if (/?/.test(type)) return true""", + #r"""var sourceRegex = /^function\s\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""": + # r"""var sourceRegex = /^function\s\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""", + #r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""": + # r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""", + #r"""/^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/""": + # r"""/^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/""", + + # GH-11: Attempt to parse a function's toString() output with a RegExp. + # The RegExp assumes more of toString() output format than what is + # guaranteed by the specification, and won't parse Duktape 1.4.0 (and + # before) function toString() output ("function empty() {/* source code*/)}"). + # No longer needed in Duktape 1.5.0 which changed the .toString() format. + #r"""var parsed = jsfunc.toString().match(sourceRegex).slice(1);""": + # r"""var parsed = (jsfunc.toString().match(sourceRegex) || []).slice(1);""", + #r"""jsfunc.toString().match(sourceRegex).slice(1);""": + # r"""(jsfunc.toString().match(sourceRegex) || []).slice(1);""", } repl_keys = replacements.keys() repl_keys.sort() for line in sys.stdin: - if len(line) > 1 and line[-1] == '\n': - line = line[:-1] + if len(line) > 1 and line[-1] == '\n': + line = line[:-1] - for k in repl_keys: - line_fix = line.replace(k, replacements[k]) - if line_fix != line: - fix_count += 1 - line = line_fix + for k in repl_keys: + line_fix = line.replace(k, replacements[k]) + if line_fix != line: + fix_count += 1 + line = line_fix - print(line) + print(line) if fix_count > 0: - sys.stderr.write('Emscripten fixes needed (fix_emscripten.py): fix_count=%d\n' % fix_count) - sys.stderr.flush() + sys.stderr.write('Emscripten fixes needed (fix_emscripten.py): fix_count=%d\n' % fix_count) + sys.stderr.flush() diff --git a/util/format_perftest.py b/util/format_perftest.py index 2fc20cf7..9c2befcc 100644 --- a/util/format_perftest.py +++ b/util/format_perftest.py @@ -7,51 +7,51 @@ import sys import re def main(): - # test-try-catch-throw.js : duk.O2.alt0 40.70 duk.O2.alt0f 40.74 duk.O2.alt1 40.10 duk.O2.alt1a 39.91 duk.O2.alt2 40.10 duk.O2.alt3 39.77 duk.O2.master 40.01 duk.O2.130 38.08 + # test-try-catch-throw.js : duk.O2.alt0 40.70 duk.O2.alt0f 40.74 duk.O2.alt1 40.10 duk.O2.alt1a 39.91 duk.O2.alt2 40.10 duk.O2.alt3 39.77 duk.O2.master 40.01 duk.O2.130 38.08 - re_line = re.compile(r'^(\S+)\s*:\s*(.*?)$') - re_part = re.compile(r'\S+') - first = True + re_line = re.compile(r'^(\S+)\s*:\s*(.*?)$') + re_part = re.compile(r'\S+') + first = True - with open(sys.argv[1], 'rb') as f_in, open(sys.argv[2], 'wb') as f_out: - f_out.write('\n') - f_out.write('\n') - f_out.write('\n') - f_out.write("""\ + with open(sys.argv[1], 'rb') as f_in, open(sys.argv[2], 'wb') as f_out: + f_out.write('\n') + f_out.write('\n') + f_out.write('\n') + f_out.write("""\ """) - f_out.write('\n') - f_out.write('\n') - f_out.write('\n') - for line in f_in: - line = line.strip() - m = re_line.match(line) - if m is None: - continue - - testname = m.group(1) - parts = re_part.findall(m.group(2)) - - if first: - first = False - f_out.write('') - f_out.write('') - for idx in xrange(0, len(parts), 2): - f_out.write('') - f_out.write('\n') - - f_out.write('') - f_out.write('') - for idx in xrange(1, len(parts), 2): - f_out.write('') - f_out.write('\n') - - f_out.write('
' + parts[idx] + '
' + testname + '' + parts[idx] + '
\n') - f_out.write('\n') - f_out.write('\n') + f_out.write('\n') + f_out.write('\n') + f_out.write('\n') + for line in f_in: + line = line.strip() + m = re_line.match(line) + if m is None: + continue + + testname = m.group(1) + parts = re_part.findall(m.group(2)) + + if first: + first = False + f_out.write('') + f_out.write('') + for idx in xrange(0, len(parts), 2): + f_out.write('') + f_out.write('\n') + + f_out.write('') + f_out.write('') + for idx in xrange(1, len(parts), 2): + f_out.write('') + f_out.write('\n') + + f_out.write('
' + parts[idx] + '
' + testname + '' + parts[idx] + '
\n') + f_out.write('\n') + f_out.write('\n') if __name__ == '__main__': - main() + main() diff --git a/src/gendoubleconsts.py b/util/gendoubleconsts.py similarity index 65% rename from src/gendoubleconsts.py rename to util/gendoubleconsts.py index 2564efe6..ac04e1d9 100644 --- a/src/gendoubleconsts.py +++ b/util/gendoubleconsts.py @@ -8,30 +8,30 @@ import struct import mpmath def create_double_constants_mpmath(): - # Just a helper to use manually - # http://mpmath.googlecode.com/svn/trunk/doc/build/basics.html - - mpmath.mp.prec = 1000 # 1000 bits - - def printhex(name, x): - # to hex string, ready for create_double() - hex = struct.pack('>d', float(str(x))).encode('hex') - flt = struct.unpack('>d', hex.decode('hex'))[0] - print '%-11s -> %s (= %.20f)' % (name, hex, flt) - - printhex('DBL_E', mpmath.mpf(mpmath.e)) - printhex('DBL_LN10', mpmath.log(10)) - printhex('DBL_LN2', mpmath.log(2)) - printhex('DBL_LOG2E', mpmath.log(mpmath.e) / mpmath.log(2)) - printhex('DBL_LOG10E', mpmath.log(mpmath.e) / mpmath.log(10)) - printhex('DBL_PI', mpmath.mpf(mpmath.pi)) - printhex('DBL_SQRT1_2', mpmath.mpf(1) / mpmath.sqrt(2)) - printhex('DBL_SQRT2', mpmath.sqrt(2)) + # Just a helper to use manually + # http://mpmath.googlecode.com/svn/trunk/doc/build/basics.html + + mpmath.mp.prec = 1000 # 1000 bits + + def printhex(name, x): + # to hex string, ready for create_double() + hex = struct.pack('>d', float(str(x))).encode('hex') + flt = struct.unpack('>d', hex.decode('hex'))[0] + print '%-11s -> %s (= %.20f)' % (name, hex, flt) + + printhex('DBL_E', mpmath.mpf(mpmath.e)) + printhex('DBL_LN10', mpmath.log(10)) + printhex('DBL_LN2', mpmath.log(2)) + printhex('DBL_LOG2E', mpmath.log(mpmath.e) / mpmath.log(2)) + printhex('DBL_LOG10E', mpmath.log(mpmath.e) / mpmath.log(10)) + printhex('DBL_PI', mpmath.mpf(mpmath.pi)) + printhex('DBL_SQRT1_2', mpmath.mpf(1) / mpmath.sqrt(2)) + printhex('DBL_SQRT2', mpmath.sqrt(2)) create_double_constants_mpmath() def create_double(x): - return struct.unpack('>d', x.decode('hex'))[0] + return struct.unpack('>d', x.decode('hex'))[0] DBL_NAN = create_double('7ff8000000000000') # a NaN matching our "normalized NAN" definition (see duk_tval.h) DBL_POSITIVE_INFINITY = create_double('7ff0000000000000') # positive infinity (unique) diff --git a/util/genequivyear.py b/util/genequivyear.py new file mode 100644 index 00000000..3f41a5da --- /dev/null +++ b/util/genequivyear.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python2 +# +# Generate equivalent year table needed by duk_bi_date.c. Based on: +# +# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146 +# + +import datetime +import pytz + +def isleapyear(year): + if (year % 4) != 0: + return False + if (year % 100) != 0: + return True + if (year % 400) != 0: + return False + return True + +def eqyear(weekday, isleap): + # weekday: 0=Sunday, 1=Monday, ... + + if isleap: + recent_year = 1956 + else: + recent_year = 1967 + recent_year += (weekday * 12) % 28 + year = 2008 + (recent_year + 3 * 28 - 2008) % 28 + + # some assertions + # + # Note that Ecmascript internal weekday (0=Sunday) matches neither + # Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday). + # Python isoweekday() % 7 matches the Ecmascript weekday. + # https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday + + dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC + #print(weekday, isleap, year, dt.isoweekday(), isleapyear(year)) + #print(repr(dt)) + #print(dt.isoformat()) + + if isleap != isleapyear(year): + raise Exception('internal error: equivalent year does not have same leap-year-ness') + pass + + if weekday != dt.isoweekday() % 7: + raise Exception('internal error: equivalent year does not begin with the same weekday') + pass + + return year + +def main(): + for i in xrange(14): + print(eqyear(i % 7, i >= 7)) + +if __name__ == '__main__': + main() diff --git a/util/genexesizereport.py b/util/genexesizereport.py new file mode 100644 index 00000000..e0f27370 --- /dev/null +++ b/util/genexesizereport.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python2 +# +# Generate a size report from a Duktape library / executable. +# Write out useful information about function sizes in a variety +# of forms. +# + +import os +import sys +import re +import subprocess + +#000000000040d200 : +# 40d200: 55 push %rbp +# 40d201: 89 f5 mov %esi,%ebp + +re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$') +re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$') + +def objdump(filename): + proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE) + curr_func = None + func_start = None + func_end = None + ret = {} + + def storeFunc(): + if curr_func is None or func_start is None or func_end is None: + return + ret[curr_func] = { + 'name': curr_func, + 'start': func_start, + 'end': func_end, # exclusive + 'length': func_end - func_start + } + + for line in proc.stdout: + line = line.strip() + + m = re_funcstart.match(line) + if m is not None: + if curr_func is not None: + storeFunc() + curr_func = m.group(1) + func_start = None + func_end = None + + m = re_codeline.match(line) + if m is not None: + func_addr = long(m.group(1), 16) + func_bytes = m.group(2) + func_nbytes = len(func_bytes.split(' ')) + func_instr = m.group(3) + if func_start is None: + func_start = func_addr + func_end = func_addr + func_nbytes + + storeFunc() + + return ret + +def filterFuncs(funcs): + todo = [] # avoid mutation while iterating + + def accept(fun): + n = fun['name'] + + if n in [ '.comment', + '.dynstr', + '.dynsym', + '.eh_frame_hdr', + '.interp', + '.rela.dyn', + '.rela.plt', + '_DYNAMIC', + '_GLOBAL_OFFSET_TABLE_', + '_IO_stdin_used', + '__CTOR_LIST__', + '__DTOR_LIST__', + '_fini', + '_init', + '_start', + '' ]: + return False + + for pfx in [ '.debug', '.gnu', '.note', + '__FRAME_', '__' ]: + if n.startswith(pfx): + return False + + return True + + for k in funcs.keys(): + if not accept(funcs[k]): + todo.append(k) + + for k in todo: + del funcs[k] + +def main(): + funcs = objdump(sys.argv[1]) + filterFuncs(funcs) + + funcs_keys = funcs.keys() + funcs_keys.sort() + combined_size_all = 0 + combined_size_duk = 0 + for k in funcs_keys: + fun = funcs[k] + combined_size_all += fun['length'] + if fun['name'].startswith('duk_'): + combined_size_duk += fun['length'] + + f = sys.stdout + f.write('') + f.write('') + f.write('Size dump for %s' % sys.argv[1]) + f.write("""\ + +""") + f.write('') + f.write('') + + f.write('

Summary

') + f.write('') + f.write('' % len(funcs_keys)) + f.write('' % combined_size_all) + f.write('' % combined_size_duk) + f.write('
Entries%d
Combined size (all)%d
Combined size (duk_*)%d
') + + f.write('

Sorted by function name

') + f.write('') + f.write('') + funcs_keys = funcs.keys() + funcs_keys.sort() + for k in funcs_keys: + fun = funcs[k] + f.write('' % (fun['name'], fun['length'])) + f.write('
NameBytes
%s%d
') + + f.write('

Sorted by size

') + f.write('') + f.write('') + funcs_keys = funcs.keys() + def cmpSize(a,b): + return cmp(funcs[a]['length'], funcs[b]['length']) + funcs_keys.sort(cmp=cmpSize) + for k in funcs_keys: + fun = funcs[k] + f.write('' % (fun['name'], fun['length'])) + f.write('
NameBytes
%s%d
') + + f.write('') + f.write('') + +if __name__ == '__main__': + main() diff --git a/util/genhashsizes.py b/util/genhashsizes.py new file mode 100644 index 00000000..2809a85e --- /dev/null +++ b/util/genhashsizes.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python2 +# +# Find a sequence of duk_hobject hash sizes which have a desired 'ratio' +# and are primes. Prime hash sizes ensure that all probe sequence values +# (less than hash size) are relatively prime to hash size, i.e. cover the +# entire hash. Prime data is packed into about 1 byte/prime using a +# prediction-correction model. +# +# Also generates a set of probe steps which are relatively prime to every +# hash size. + +import sys +import math + +def is_prime(n): + if n == 0: + return False + if n == 1 or n == 2: + return True + + n_limit = int(math.ceil(float(n) ** 0.5)) + 1 + n_limit += 100 # paranoia + if n_limit >= n: + n_limit = n - 1 + for i in xrange(2,n_limit + 1): + if (n % i) == 0: + return False + return True + +def next_prime(n): + while True: + n += 1 + if is_prime(n): + return n + +def generate_sizes(min_size, max_size, step_ratio): + "Generate a set of hash sizes following a nice ratio." + + sizes = [] + ratios = [] + curr = next_prime(min_size) + next = curr + sizes.append(curr) + + step_ratio = float(step_ratio) / 1024 + + while True: + if next > max_size: + break + ratio = float(next) / float(curr) + if ratio < step_ratio: + next = next_prime(next) + continue + sys.stdout.write('.'); sys.stdout.flush() + sizes.append(next) + ratios.append(ratio) + curr = next + next = next_prime(int(next * step_ratio)) + + sys.stdout.write('\n'); sys.stdout.flush() + return sizes, ratios + +def generate_corrections(sizes, step_ratio): + "Generate a set of correction from a ratio-based predictor." + + # Generate a correction list for size list, assuming steps follow a certain + # ratio; this allows us to pack size list into one byte per size + + res = [] + + res.append(sizes[0]) # first entry is first size + + for i in xrange(1, len(sizes)): + prev = sizes[i - 1] + pred = int(prev * step_ratio) >> 10 + diff = int(sizes[i] - pred) + res.append(diff) + + if diff < 0 or diff > 127: + raise Exception('correction does not fit into 8 bits') + + res.append(-1) # negative denotes last end of list + return res + +def generate_probes(count, sizes): + res = [] + + # Generate probe values which are guaranteed to be relatively prime to + # all generated hash size primes. These don't have to be primes, but + # we currently use smallest non-conflicting primes here. + + i = 2 + while len(res) < count: + if is_prime(i) and (i not in sizes): + if i > 255: + raise Exception('probe step does not fit into 8 bits') + res.append(i) + i += 1 + continue + i += 1 + + return res + +# NB: these must match duk_hobject defines and code +step_ratio = 1177 # approximately (1.15 * (1 << 10)) +min_size = 16 +max_size = 2**32 - 1 + +sizes, ratios = generate_sizes(min_size, max_size, step_ratio) +corrections = generate_corrections(sizes, step_ratio) +probes = generate_probes(32, sizes) +print len(sizes) +print 'SIZES: ' + repr(sizes) +print 'RATIOS: ' + repr(ratios) +print 'CORRECTIONS: ' + repr(corrections) +print 'PROBES: ' + repr(probes) + +# highest 32-bit prime +i = 2**32 +while True: + i -= 1 + if is_prime(i): + print 'highest 32-bit prime is: %d (0x%08x)' % (i, i) + break diff --git a/src/gennumdigits.py b/util/gennumdigits.py similarity index 62% rename from src/gennumdigits.py rename to util/gennumdigits.py index acd7af6a..b2b7284e 100644 --- a/src/gennumdigits.py +++ b/util/gennumdigits.py @@ -24,21 +24,21 @@ digits_table = [] limits_table = [] for radix in xrange(2, 36+1): - bits_per_digit = math.log(radix, 2) + bits_per_digit = math.log(radix, 2) - if radix == 10: - prec_digits = 20 - else: - target_bits = math.ceil(math.log(10, 2) * 20) + 2 # +2 is extra, just in case - prec_digits = int(math.ceil(target_bits / bits_per_digit)) - digits_table.append(prec_digits) + if radix == 10: + prec_digits = 20 + else: + target_bits = math.ceil(math.log(10, 2) * 20) + 2 # +2 is extra, just in case + prec_digits = int(math.ceil(target_bits / bits_per_digit)) + digits_table.append(prec_digits) - # these are conservative (details are off by one etc); +/- 2 is the extra - overflow_limit = int(math.ceil(1024.0 / bits_per_digit)) + 2 - prec_digits - underflow_limit = int(math.floor((-1024.0 - 52.0) / bits_per_digit)) - 2 - prec_digits + # these are conservative (details are off by one etc); +/- 2 is the extra + overflow_limit = int(math.ceil(1024.0 / bits_per_digit)) + 2 - prec_digits + underflow_limit = int(math.floor((-1024.0 - 52.0) / bits_per_digit)) - 2 - prec_digits - limits_table.append(overflow_limit) - limits_table.append(underflow_limit) + limits_table.append(overflow_limit) + limits_table.append(underflow_limit) print repr(digits_table) print repr(limits_table) diff --git a/util/genobjsizereport.py b/util/genobjsizereport.py new file mode 100644 index 00000000..4543700b --- /dev/null +++ b/util/genobjsizereport.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python2 +# +# Size report of (stripped) object and source files. +# + +import os +import sys + +def getsize(fname): + return os.stat(fname).st_size + +def getlines(fname): + f = None + try: + f = open(fname, 'rb') + lines = f.read().split('\n') + return len(lines) + finally: + if f is not None: + f.close() + f = None + +def process(srcfile, objfile): + srcsize = getsize(srcfile) + srclines = getlines(srcfile) + srcbpl = float(srcsize) / float(srclines) + objsize = getsize(objfile) + objbpl = float(objsize) / float(srclines) + + return objsize, objbpl, srcsize, srclines, srcbpl + +def main(): + tot_srcsize = 0 + tot_srclines = 0 + tot_objsize = 0 + + tmp = [] + for i in sys.argv[1:]: + objfile = i + if i.endswith('.strip'): + objname = i[:-6] + else: + objname = i + base, ext = os.path.splitext(objname) + srcfile = base + '.c' + + objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile) + srcbase = os.path.basename(srcfile) + objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o + tot_srcsize += srcsize + tot_srclines += srclines + tot_objsize += objsize + tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl)) + + def mycmp(a,b): + return cmp(a[5], b[5]) + + tmp.sort(cmp=mycmp, reverse=True) # sort by object size + fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f' + for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp: + print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl)) + + print('========================================================================') + print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines), + '', tot_objsize, float(tot_objsize) / float(tot_srclines))) + +if __name__ == '__main__': + # Usage: + # + # $ strip *.o + # $ python genobjsizereport.py *.o + + main() diff --git a/util/json2yaml.py b/util/json2yaml.py deleted file mode 100644 index c285fbbd..00000000 --- a/util/json2yaml.py +++ /dev/null @@ -1,5 +0,0 @@ -import os, sys, json, yaml - -if __name__ == '__main__': - # Use safe_dump() instead of dump() to avoid tags like "!!python/unicode" - print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False)) diff --git a/util/make_ascii.py b/util/make_ascii.py index 25e15918..2a6c5468 100644 --- a/util/make_ascii.py +++ b/util/make_ascii.py @@ -7,7 +7,7 @@ import os, sys inp = sys.stdin.read().decode('utf-8') for c in inp: - if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): - sys.stdout.write(c) - else: - sys.stdout.write('\\u%04x' % ord(c)) + if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): + sys.stdout.write(c) + else: + sys.stdout.write('\\u%04x' % ord(c)) diff --git a/util/make_dist.py b/util/make_dist.py index b23d28f0..20de5ae9 100644 --- a/util/make_dist.py +++ b/util/make_dist.py @@ -31,168 +31,170 @@ import tarfile # Helpers def exec_get_stdout(cmd, input=None, default=None, print_stdout=False): - try: - proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - ret = proc.communicate(input=input) - if print_stdout: - sys.stdout.write(ret[0]) - sys.stdout.flush() - if proc.returncode != 0: - sys.stdout.write(ret[1]) # print stderr on error - sys.stdout.flush() - if default is not None: - print('WARNING: command %r failed, return default' % cmd) - return default - raise Exception('command failed, return code %d: %r' % (proc.returncode, cmd)) - return ret[0] - except: - if default is not None: - print('WARNING: command %r failed, return default' % cmd) - return default - raise + try: + proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = proc.communicate(input=input) + if print_stdout: + sys.stdout.write(ret[0]) + sys.stdout.flush() + if proc.returncode != 0: + sys.stdout.write(ret[1]) # print stderr on error + sys.stdout.flush() + if default is not None: + print('WARNING: command %r failed, return default' % cmd) + return default + raise Exception('command failed, return code %d: %r' % (proc.returncode, cmd)) + return ret[0] + except: + if default is not None: + print('WARNING: command %r failed, return default' % cmd) + return default + raise def exec_print_stdout(cmd, input=None): - ret = exec_get_stdout(cmd, input=input, print_stdout=True) + ret = exec_get_stdout(cmd, input=input, print_stdout=True) def mkdir(path): - os.mkdir(path) + os.mkdir(path) def copy_file(src, dst): - with open(src, 'rb') as f_in: - with open(dst, 'wb') as f_out: - f_out.write(f_in.read()) + with open(src, 'rb') as f_in: + with open(dst, 'wb') as f_out: + f_out.write(f_in.read()) def copy_files(filelist, srcdir, dstdir): - for i in filelist: - copy_file(os.path.join(srcdir, i), os.path.join(dstdir, i)) + for i in filelist: + copy_file(os.path.join(srcdir, i), os.path.join(dstdir, i)) def copy_and_replace(src, dst, rules): - # Read and write separately to allow in-place replacement - keys = sorted(rules.keys()) - res = [] - with open(src, 'rb') as f_in: - for line in f_in: - for k in keys: - line = line.replace(k, rules[k]) - res.append(line) - with open(dst, 'wb') as f_out: - f_out.write(''.join(res)) + # Read and write separately to allow in-place replacement + keys = sorted(rules.keys()) + res = [] + with open(src, 'rb') as f_in: + for line in f_in: + for k in keys: + line = line.replace(k, rules[k]) + res.append(line) + with open(dst, 'wb') as f_out: + f_out.write(''.join(res)) def copy_and_cquote(src, dst): - with open(src, 'rb') as f_in: - with open(dst, 'wb') as f_out: - f_out.write('/*\n') - for line in f_in: - line = line.decode('utf-8') - f_out.write(' * ') - for c in line: - if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): - f_out.write(c.encode('ascii')) - else: - f_out.write('\\u%04x' % ord(c)) - f_out.write(' */\n') + with open(src, 'rb') as f_in: + with open(dst, 'wb') as f_out: + f_out.write('/*\n') + for line in f_in: + line = line.decode('utf-8') + f_out.write(' * ') + for c in line: + if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): + f_out.write(c.encode('ascii')) + else: + f_out.write('\\u%04x' % ord(c)) + f_out.write(' */\n') def read_file(src, strip_last_nl=False): - with open(src, 'rb') as f: - data = f.read() - if len(data) > 0 and data[-1] == '\n': - data = data[:-1] - return data + with open(src, 'rb') as f: + data = f.read() + if len(data) > 0 and data[-1] == '\n': + data = data[:-1] + return data def delete_matching_files(dirpath, cb): - for fn in os.listdir(dirpath): - if os.path.isfile(os.path.join(dirpath, fn)) and cb(fn): - #print('Deleting %r' % os.path.join(dirpath, fn)) - os.unlink(os.path.join(dirpath, fn)) + for fn in os.listdir(dirpath): + if os.path.isfile(os.path.join(dirpath, fn)) and cb(fn): + #print('Deleting %r' % os.path.join(dirpath, fn)) + os.unlink(os.path.join(dirpath, fn)) def create_targz(dstfile, filelist): - # https://docs.python.org/2/library/tarfile.html#examples - - def _add(tf, fn): # recursive add - #print('Adding to tar: ' + fn) - if os.path.isdir(fn): - for i in sorted(os.listdir(fn)): - _add(tf, os.path.join(fn, i)) - elif os.path.isfile(fn): - tf.add(fn) - else: - raise Exception('invalid file: %r' % fn) - - with tarfile.open(dstfile, 'w:gz') as tf: - for fn in filelist: - _add(tf, fn) + # https://docs.python.org/2/library/tarfile.html#examples + + def _add(tf, fn): # recursive add + #print('Adding to tar: ' + fn) + if os.path.isdir(fn): + for i in sorted(os.listdir(fn)): + _add(tf, os.path.join(fn, i)) + elif os.path.isfile(fn): + tf.add(fn) + else: + raise Exception('invalid file: %r' % fn) + + with tarfile.open(dstfile, 'w:gz') as tf: + for fn in filelist: + _add(tf, fn) def glob_files(pattern): - return glob.glob(pattern) + return glob.glob(pattern) def cstring(x): - return '"' + x + '"' # good enough for now + return '"' + x + '"' # good enough for now # DUK_VERSION is grepped from duk_api_public.h.in: it is needed for the # public API and we want to avoid defining it in two places. def get_duk_version(): - r = re.compile(r'^#define\s+DUK_VERSION\s+(.*?)L?\s*$') - with open(os.path.join('src', 'duk_api_public.h.in'), 'rb') as f: - for line in f: - m = r.match(line) - if m is not None: - duk_version = int(m.group(1)) - duk_major = duk_version / 10000 - duk_minor = (duk_version % 10000) / 100 - duk_patch = duk_version % 100 - duk_version_formatted = '%d.%d.%d' % (duk_major, duk_minor, duk_patch) - return duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted - - raise Exception('cannot figure out duktape version') + r = re.compile(r'^#define\s+DUK_VERSION\s+(.*?)L?\s*$') + with open(os.path.join('src', 'duk_api_public.h.in'), 'rb') as f: + for line in f: + m = r.match(line) + if m is not None: + duk_version = int(m.group(1)) + duk_major = duk_version / 10000 + duk_minor = (duk_version % 10000) / 100 + duk_patch = duk_version % 100 + duk_version_formatted = '%d.%d.%d' % (duk_major, duk_minor, duk_patch) + return duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted + + raise Exception('cannot figure out duktape version') def create_dist_directories(dist): - if os.path.isdir(dist): - shutil.rmtree(dist) - mkdir(dist) - mkdir(os.path.join(dist, 'src-separate')) - mkdir(os.path.join(dist, 'src')) - mkdir(os.path.join(dist, 'src-noline')) - mkdir(os.path.join(dist, 'config')) - mkdir(os.path.join(dist, 'extras')) - mkdir(os.path.join(dist, 'extras', 'duk-v1-compat')) - mkdir(os.path.join(dist, 'extras', 'print-alert')) - mkdir(os.path.join(dist, 'extras', 'console')) - mkdir(os.path.join(dist, 'extras', 'logging')) - mkdir(os.path.join(dist, 'extras', 'minimal-printf')) - mkdir(os.path.join(dist, 'extras', 'module-duktape')) - mkdir(os.path.join(dist, 'extras', 'module-node')) - mkdir(os.path.join(dist, 'extras', 'alloc-pool')) - mkdir(os.path.join(dist, 'polyfills')) - #mkdir(os.path.join(dist, 'doc')) # Empty, so omit - mkdir(os.path.join(dist, 'licenses')) - mkdir(os.path.join(dist, 'debugger')) - mkdir(os.path.join(dist, 'debugger', 'static')) - mkdir(os.path.join(dist, 'examples')) - mkdir(os.path.join(dist, 'examples', 'hello')) - mkdir(os.path.join(dist, 'examples', 'eval')) - mkdir(os.path.join(dist, 'examples', 'cmdline')) - mkdir(os.path.join(dist, 'examples', 'eventloop')) - mkdir(os.path.join(dist, 'examples', 'guide')) - mkdir(os.path.join(dist, 'examples', 'coffee')) - mkdir(os.path.join(dist, 'examples', 'jxpretty')) - mkdir(os.path.join(dist, 'examples', 'sandbox')) - mkdir(os.path.join(dist, 'examples', 'alloc-logging')) - mkdir(os.path.join(dist, 'examples', 'alloc-torture')) - mkdir(os.path.join(dist, 'examples', 'alloc-hybrid')) - mkdir(os.path.join(dist, 'examples', 'debug-trans-socket')) - mkdir(os.path.join(dist, 'examples', 'debug-trans-dvalue')) - mkdir(os.path.join(dist, 'examples', 'codepage-conv')) - mkdir(os.path.join(dist, 'examples', 'dummy-date-provider')) - mkdir(os.path.join(dist, 'examples', 'cpp-exceptions')) - -# Path check + if os.path.isdir(dist): + shutil.rmtree(dist) + mkdir(dist) + mkdir(os.path.join(dist, 'src-input')) + #mkdir(os.path.join(dist, 'src-separate')) # created by prepare_sources.py + #mkdir(os.path.join(dist, 'src')) + #mkdir(os.path.join(dist, 'src-noline')) + mkdir(os.path.join(dist, 'tools')) + mkdir(os.path.join(dist, 'config')) + mkdir(os.path.join(dist, 'extras')) + mkdir(os.path.join(dist, 'extras', 'duk-v1-compat')) + mkdir(os.path.join(dist, 'extras', 'print-alert')) + mkdir(os.path.join(dist, 'extras', 'console')) + mkdir(os.path.join(dist, 'extras', 'logging')) + mkdir(os.path.join(dist, 'extras', 'minimal-printf')) + mkdir(os.path.join(dist, 'extras', 'module-duktape')) + mkdir(os.path.join(dist, 'extras', 'module-node')) + mkdir(os.path.join(dist, 'extras', 'alloc-pool')) + mkdir(os.path.join(dist, 'polyfills')) + #mkdir(os.path.join(dist, 'doc')) # Empty, so omit + mkdir(os.path.join(dist, 'licenses')) + mkdir(os.path.join(dist, 'debugger')) + mkdir(os.path.join(dist, 'debugger', 'static')) + mkdir(os.path.join(dist, 'examples')) + mkdir(os.path.join(dist, 'examples', 'hello')) + mkdir(os.path.join(dist, 'examples', 'eval')) + mkdir(os.path.join(dist, 'examples', 'cmdline')) + mkdir(os.path.join(dist, 'examples', 'eventloop')) + mkdir(os.path.join(dist, 'examples', 'guide')) + mkdir(os.path.join(dist, 'examples', 'coffee')) + mkdir(os.path.join(dist, 'examples', 'jxpretty')) + mkdir(os.path.join(dist, 'examples', 'sandbox')) + mkdir(os.path.join(dist, 'examples', 'alloc-logging')) + mkdir(os.path.join(dist, 'examples', 'alloc-torture')) + mkdir(os.path.join(dist, 'examples', 'alloc-hybrid')) + mkdir(os.path.join(dist, 'examples', 'debug-trans-socket')) + mkdir(os.path.join(dist, 'examples', 'debug-trans-dvalue')) + mkdir(os.path.join(dist, 'examples', 'codepage-conv')) + mkdir(os.path.join(dist, 'examples', 'dummy-date-provider')) + mkdir(os.path.join(dist, 'examples', 'cpp-exceptions')) + +# Path check (spot check a few files to ensure we're in Duktape repo root) if not (os.path.isfile(os.path.join('src', 'duk_api_public.h.in')) and \ - os.path.isfile(os.path.join('config', 'genconfig.py'))): - sys.stderr.write('\n') - sys.stderr.write('*** Working directory must be Duktape repo checkout root!\n') - sys.stderr.write('\n') - raise Exception('Incorrect working directory') + os.path.isfile(os.path.join('config', 'platforms.yaml'))): + sys.stderr.write('\n') + sys.stderr.write('*** Working directory must be Duktape repo checkout root!\n') + sys.stderr.write('\n') + raise Exception('Incorrect working directory') # Option parsing @@ -209,37 +211,37 @@ parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', actio # Python module check and friendly errors def check_python_modules(): - # make_dist.py doesn't need yaml but other dist utils will; check for it and - # warn if it is missing. - failed = False - - def _warning(module, aptPackage, pipPackage): - sys.stderr.write('\n') - sys.stderr.write('*** NOTE: Could not "import %s" needed for dist. Install it using e.g.:\n' % module) - sys.stderr.write('\n') - sys.stderr.write(' # Linux\n') - sys.stderr.write(' $ sudo apt-get install %s\n' % aptPackage) - sys.stderr.write('\n') - sys.stderr.write(' # Windows\n') - sys.stderr.write(' > pip install %s\n' % pipPackage) - - try: - import yaml - except ImportError: - _warning('yaml', 'python-yaml', 'PyYAML') - failed = True - - try: - if opts.create_spdx: - import rdflib - except: - # Tolerate missing rdflib, just warn about it. - _warning('rdflib', 'python-rdflib', 'rdflib') - #failed = True - - if failed: - sys.stderr.write('\n') - raise Exception('Missing some required Python modules') + # make_dist.py doesn't need yaml but other dist utils will; check for it and + # warn if it is missing. + failed = False + + def _warning(module, aptPackage, pipPackage): + sys.stderr.write('\n') + sys.stderr.write('*** NOTE: Could not "import %s" needed for dist. Install it using e.g.:\n' % module) + sys.stderr.write('\n') + sys.stderr.write(' # Linux\n') + sys.stderr.write(' $ sudo apt-get install %s\n' % aptPackage) + sys.stderr.write('\n') + sys.stderr.write(' # Windows\n') + sys.stderr.write(' > pip install %s\n' % pipPackage) + + try: + import yaml + except ImportError: + _warning('yaml', 'python-yaml', 'PyYAML') + failed = True + + try: + if opts.create_spdx: + import rdflib + except: + # Tolerate missing rdflib, just warn about it. + _warning('rdflib', 'python-rdflib', 'rdflib') + #failed = True + + if failed: + sys.stderr.write('\n') + raise Exception('Missing some required Python modules') check_python_modules() @@ -247,30 +249,24 @@ check_python_modules() entry_pwd = os.getcwd() dist = os.path.join(entry_pwd, 'dist') -distsrcsep = os.path.join(dist, 'src-separate') -distsrccom = os.path.join(dist, 'src') -distsrcnol = os.path.join(dist, 'src-noline') # src-noline/duktape.c is same as src/duktape.c - # but without line directives - # https://github.com/svaarala/duktape/pull/363 duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted = get_duk_version() if opts.git_commit is not None: - git_commit = opts.git_commit + git_commit = opts.git_commit else: - git_commit = exec_get_stdout([ 'git', 'rev-parse', 'HEAD' ], default='external').strip() -git_commit_cstring = cstring(git_commit) - + git_commit = exec_get_stdout([ 'git', 'rev-parse', 'HEAD' ], default='external').strip() if opts.git_describe is not None: - git_describe = opts.git_describe + git_describe = opts.git_describe else: - git_describe = exec_get_stdout([ 'git', 'describe', '--always', '--dirty' ], default='external').strip() -git_describe_cstring = cstring(git_describe) - + git_describe = exec_get_stdout([ 'git', 'describe', '--always', '--dirty' ], default='external').strip() if opts.git_branch is not None: - git_branch = opts.git_branch + git_branch = opts.git_branch else: - git_branch = exec_get_stdout([ 'git', 'rev-parse', '--abbrev-ref', 'HEAD' ], default='external').strip() + git_branch = exec_get_stdout([ 'git', 'rev-parse', '--abbrev-ref', 'HEAD' ], default='external').strip() + +git_commit_cstring = cstring(git_commit) +git_describe_cstring = cstring(git_describe) git_branch_cstring = cstring(git_branch) print('Dist for Duktape version %s, commit %s, describe %s, branch %s' % \ @@ -285,471 +281,355 @@ create_dist_directories(dist) # Copy most files directly os.chdir(entry_pwd) -copy_files([ - 'duk_alloc_default.c', - 'duk_api_internal.h', - 'duk_api_stack.c', - 'duk_api_heap.c', - 'duk_api_buffer.c', - 'duk_api_call.c', - 'duk_api_codec.c', - 'duk_api_compile.c', - 'duk_api_bytecode.c', - 'duk_api_memory.c', - 'duk_api_object.c', - 'duk_api_string.c', - 'duk_api_time.c', - 'duk_api_debug.c', - 'duk_bi_array.c', - 'duk_bi_boolean.c', - 'duk_bi_buffer.c', - 'duk_bi_date.c', - 'duk_bi_date_unix.c', - 'duk_bi_date_windows.c', - 'duk_bi_duktape.c', - 'duk_bi_error.c', - 'duk_bi_function.c', - 'duk_bi_global.c', - 'duk_bi_json.c', - 'duk_bi_math.c', - 'duk_bi_number.c', - 'duk_bi_object.c', - 'duk_bi_pointer.c', - 'duk_bi_protos.h', - 'duk_bi_regexp.c', - 'duk_bi_string.c', - 'duk_bi_proxy.c', - 'duk_bi_thread.c', - 'duk_bi_thrower.c', - 'duk_debug_fixedbuffer.c', - 'duk_debug.h', - 'duk_debug_macros.c', - 'duk_debug_vsnprintf.c', - 'duk_error_augment.c', - 'duk_error.h', - 'duk_error_longjmp.c', - 'duk_error_macros.c', - 'duk_error_misc.c', - 'duk_error_throw.c', - 'duk_forwdecl.h', - 'duk_harray.h', - 'duk_hbuffer_alloc.c', - 'duk_hbuffer.h', - 'duk_hbuffer_ops.c', - 'duk_hcompfunc.h', - 'duk_heap_alloc.c', - 'duk_heap.h', - 'duk_heap_hashstring.c', - 'duk_heaphdr.h', - 'duk_heap_markandsweep.c', - 'duk_heap_memory.c', - 'duk_heap_misc.c', - 'duk_heap_refcount.c', - 'duk_heap_stringcache.c', - 'duk_heap_stringtable.c', - 'duk_hnatfunc.h', - 'duk_hobject_alloc.c', - 'duk_hobject_class.c', - 'duk_hobject_enum.c', - 'duk_hobject_finalizer.c', - 'duk_hobject.h', - 'duk_hobject_misc.c', - 'duk_hobject_pc2line.c', - 'duk_hobject_props.c', - 'duk_hstring.h', - 'duk_hstring_misc.c', - 'duk_hthread_alloc.c', - 'duk_hthread_builtins.c', - 'duk_hthread.h', - 'duk_hthread_misc.c', - 'duk_hthread_stacks.c', - 'duk_hbufobj.h', - 'duk_hbufobj_misc.c', - 'duk_debugger.c', - 'duk_debugger.h', - 'duk_internal.h', - 'duk_jmpbuf.h', - 'duk_exception.h', - 'duk_js_bytecode.h', - 'duk_js_call.c', - 'duk_js_compiler.c', - 'duk_js_compiler.h', - 'duk_js_executor.c', - 'duk_js.h', - 'duk_json.h', - 'duk_js_ops.c', - 'duk_js_var.c', - 'duk_lexer.c', - 'duk_lexer.h', - 'duk_numconv.c', - 'duk_numconv.h', - 'duk_regexp_compiler.c', - 'duk_regexp_executor.c', - 'duk_regexp.h', - 'duk_tval.c', - 'duk_tval.h', - 'duk_unicode.h', - 'duk_unicode_support.c', - 'duk_unicode_tables.c', - 'duk_util_bitdecoder.c', - 'duk_util_bitencoder.c', - 'duk_util.h', - 'duk_util_hashbytes.c', - 'duk_util_hashprime.c', - 'duk_util_misc.c', - 'duk_util_tinyrandom.c', - 'duk_util_bufwriter.c', - 'duk_selftest.c', - 'duk_selftest.h', - 'duk_strings.h', - 'duk_replacements.c', - 'duk_replacements.h' -], 'src', distsrcsep) + +for fn in glob.glob(os.path.join('src', '*')): + copy_file(fn, os.path.join(dist, 'src-input', os.path.basename(fn))) os.chdir(os.path.join(entry_pwd, 'config')) create_targz(os.path.join(dist, 'config', 'genconfig_metadata.tar.gz'), [ - 'tags.yaml', - 'platforms.yaml', - 'architectures.yaml', - 'compilers.yaml', - 'platforms', - 'architectures', - 'compilers', - 'feature-options', - 'config-options', - 'helper-snippets', - 'header-snippets', - 'other-defines', - 'examples' + 'tags.yaml', + 'platforms.yaml', + 'architectures.yaml', + 'compilers.yaml', + 'platforms', + 'architectures', + 'compilers', + 'feature-options', + 'config-options', + 'helper-snippets', + 'header-snippets', + 'other-defines', + 'examples' ]) os.chdir(entry_pwd) copy_files([ - 'README.rst', - 'genconfig.py' + 'prepare_sources.py', + 'combine_src.py', + 'create_spdx_license.py', + 'duk_meta_to_strarray.py', + 'dukutil.py', + 'dump_bytecode.py', + 'extract_caseconv.py', + 'extract_chars.py', + 'extract_unique_options.py', + 'genbuildparams.py', + 'genbuiltins.py', + 'genconfig.py', + 'json2yaml.py', + 'merge_debug_meta.py', + 'prepare_unicode_data.py', + 'resolve_combined_lineno.py', + 'scan_strings.py', + 'scan_used_stridx_bidx.py', + 'yaml2json.py', +], 'tools', os.path.join(dist, 'tools')) + +# XXX: Copy genconfig.py also to config/genconfig.py for now. +copy_file(os.path.join(dist, 'tools', 'genconfig.py'), os.path.join(dist, 'config', 'genconfig.py')) + +copy_files([ + 'README.rst' ], 'config', os.path.join(dist, 'config')) copy_files([ - 'README.rst', - 'Makefile', - 'package.json', - 'duk_debug.js', - 'duk_debug_proxy.js', - 'duk_classnames.yaml', - 'duk_debugcommands.yaml', - 'duk_debugerrors.yaml', - 'duk_opcodes.yaml', - 'merge_debug_meta.py' + 'README.rst', + 'Makefile', + 'package.json', + 'duk_debug.js', + 'duk_debug_proxy.js', + 'duk_classnames.yaml', + 'duk_debugcommands.yaml', + 'duk_debugerrors.yaml', + 'duk_opcodes.yaml' ], 'debugger', os.path.join(dist, 'debugger')) copy_files([ - 'index.html', - 'style.css', - 'webui.js' + 'index.html', + 'style.css', + 'webui.js' ], os.path.join('debugger', 'static'), os.path.join(dist, 'debugger', 'static')) copy_files([ - 'console-minimal.js', - 'object-prototype-definegetter.js', - 'object-prototype-definesetter.js', - 'object-assign.js', - 'performance-now.js', - 'duktape-isfastint.js', - 'duktape-error-setter-writable.js', - 'duktape-error-setter-nonwritable.js', - 'duktape-buffer.js' + 'console-minimal.js', + 'object-prototype-definegetter.js', + 'object-prototype-definesetter.js', + 'object-assign.js', + 'performance-now.js', + 'duktape-isfastint.js', + 'duktape-error-setter-writable.js', + 'duktape-error-setter-nonwritable.js', + 'duktape-buffer.js' ], 'polyfills', os.path.join(dist, 'polyfills')) copy_files([ - 'README.rst' + 'README.rst' ], 'examples', os.path.join(dist, 'examples')) copy_files([ - 'README.rst', - 'duk_cmdline.c', - 'duk_cmdline_ajduk.c' + 'README.rst', + 'duk_cmdline.c', + 'duk_cmdline_ajduk.c' ], os.path.join('examples', 'cmdline'), os.path.join(dist, 'examples', 'cmdline')) copy_files([ - 'README.rst', - 'c_eventloop.c', - 'c_eventloop.js', - 'ecma_eventloop.js', - 'main.c', - 'poll.c', - 'ncurses.c', - 'socket.c', - 'fileio.c', - 'curses-timers.js', - 'basic-test.js', - 'server-socket-test.js', - 'client-socket-test.js' + 'README.rst', + 'c_eventloop.c', + 'c_eventloop.js', + 'ecma_eventloop.js', + 'main.c', + 'poll.c', + 'ncurses.c', + 'socket.c', + 'fileio.c', + 'curses-timers.js', + 'basic-test.js', + 'server-socket-test.js', + 'client-socket-test.js' ], os.path.join('examples', 'eventloop'), os.path.join(dist, 'examples', 'eventloop')) copy_files([ - 'README.rst', - 'hello.c' + 'README.rst', + 'hello.c' ], os.path.join('examples', 'hello'), os.path.join(dist, 'examples', 'hello')) copy_files([ - 'README.rst', - 'eval.c' + 'README.rst', + 'eval.c' ], os.path.join('examples', 'eval'), os.path.join(dist, 'examples', 'eval')) copy_files([ - 'README.rst', - 'fib.js', - 'process.js', - 'processlines.c', - 'prime.js', - 'primecheck.c', - 'uppercase.c' + 'README.rst', + 'fib.js', + 'process.js', + 'processlines.c', + 'prime.js', + 'primecheck.c', + 'uppercase.c' ], os.path.join('examples', 'guide'), os.path.join(dist, 'examples', 'guide')) copy_files([ - 'README.rst', - 'globals.coffee', - 'hello.coffee', - 'mandel.coffee' + 'README.rst', + 'globals.coffee', + 'hello.coffee', + 'mandel.coffee' ], os.path.join('examples', 'coffee'), os.path.join(dist, 'examples', 'coffee')) copy_files([ - 'README.rst', - 'jxpretty.c' + 'README.rst', + 'jxpretty.c' ], os.path.join('examples', 'jxpretty'), os.path.join(dist, 'examples', 'jxpretty')) copy_files([ - 'README.rst', - 'sandbox.c' + 'README.rst', + 'sandbox.c' ], os.path.join('examples', 'sandbox'), os.path.join(dist, 'examples', 'sandbox')) copy_files([ - 'README.rst', - 'duk_alloc_logging.c', - 'duk_alloc_logging.h', - 'log2gnuplot.py' + 'README.rst', + 'duk_alloc_logging.c', + 'duk_alloc_logging.h', + 'log2gnuplot.py' ], os.path.join('examples', 'alloc-logging'), os.path.join(dist, 'examples', 'alloc-logging')) copy_files([ - 'README.rst', - 'duk_alloc_torture.c', - 'duk_alloc_torture.h' + 'README.rst', + 'duk_alloc_torture.c', + 'duk_alloc_torture.h' ], os.path.join('examples', 'alloc-torture'), os.path.join(dist, 'examples', 'alloc-torture')) copy_files([ - 'README.rst', - 'duk_alloc_hybrid.c', - 'duk_alloc_hybrid.h' + 'README.rst', + 'duk_alloc_hybrid.c', + 'duk_alloc_hybrid.h' ], os.path.join('examples', 'alloc-hybrid'), os.path.join(dist, 'examples', 'alloc-hybrid')) copy_files([ - 'README.rst', - 'duk_trans_socket_unix.c', - 'duk_trans_socket_windows.c', - 'duk_trans_socket.h' + 'README.rst', + 'duk_trans_socket_unix.c', + 'duk_trans_socket_windows.c', + 'duk_trans_socket.h' ], os.path.join('examples', 'debug-trans-socket'), os.path.join(dist, 'examples', 'debug-trans-socket')) copy_files([ - 'README.rst', - 'duk_trans_dvalue.c', - 'duk_trans_dvalue.h', - 'test.c', - 'Makefile' + 'README.rst', + 'duk_trans_dvalue.c', + 'duk_trans_dvalue.h', + 'test.c', + 'Makefile' ], os.path.join('examples', 'debug-trans-dvalue'), os.path.join(dist, 'examples', 'debug-trans-dvalue')) copy_files([ - 'README.rst', - 'duk_codepage_conv.c', - 'duk_codepage_conv.h', - 'test.c' + 'README.rst', + 'duk_codepage_conv.c', + 'duk_codepage_conv.h', + 'test.c' ], os.path.join('examples', 'codepage-conv'), os.path.join(dist, 'examples', 'codepage-conv')) copy_files([ - 'README.rst', - 'dummy_date_provider.c' + 'README.rst', + 'dummy_date_provider.c' ], os.path.join('examples', 'dummy-date-provider'), os.path.join(dist, 'examples', 'dummy-date-provider')) copy_files([ - 'README.rst', - 'cpp_exceptions.cpp' + 'README.rst', + 'cpp_exceptions.cpp' ], os.path.join('examples', 'cpp-exceptions'), os.path.join(dist, 'examples', 'cpp-exceptions')) copy_files([ - 'README.rst' + 'README.rst' ], 'extras', os.path.join(dist, 'extras')) copy_files([ - 'README.rst', - 'duk_logging.c', - 'duk_logging.h', - 'test.c', - 'Makefile' + 'README.rst', + 'duk_logging.c', + 'duk_logging.h', + 'test.c', + 'Makefile' ], os.path.join('extras', 'logging'), os.path.join(dist, 'extras', 'logging')) copy_files([ - 'README.rst', - 'duk_v1_compat.c', - 'duk_v1_compat.h', - 'test.c', - 'Makefile', - 'test_eval1.js', - 'test_eval2.js', - 'test_compile1.js', - 'test_compile2.js' + 'README.rst', + 'duk_v1_compat.c', + 'duk_v1_compat.h', + 'test.c', + 'Makefile', + 'test_eval1.js', + 'test_eval2.js', + 'test_compile1.js', + 'test_compile2.js' ], os.path.join('extras', 'duk-v1-compat'), os.path.join(dist, 'extras', 'duk-v1-compat')) copy_files([ - 'README.rst', - 'duk_print_alert.c', - 'duk_print_alert.h', - 'test.c', - 'Makefile' + 'README.rst', + 'duk_print_alert.c', + 'duk_print_alert.h', + 'test.c', + 'Makefile' ], os.path.join('extras', 'print-alert'), os.path.join(dist, 'extras', 'print-alert')) copy_files([ - 'README.rst', - 'duk_console.c', - 'duk_console.h', - 'test.c', - 'Makefile' + 'README.rst', + 'duk_console.c', + 'duk_console.h', + 'test.c', + 'Makefile' ], os.path.join('extras', 'console'), os.path.join(dist, 'extras', 'console')) copy_files([ - 'README.rst', - 'duk_minimal_printf.c', - 'duk_minimal_printf.h', - 'Makefile', - 'test.c' + 'README.rst', + 'duk_minimal_printf.c', + 'duk_minimal_printf.h', + 'Makefile', + 'test.c' ], os.path.join('extras', 'minimal-printf'), os.path.join(dist, 'extras', 'minimal-printf')) copy_files([ - 'README.rst', - 'duk_module_duktape.c', - 'duk_module_duktape.h', - 'Makefile', - 'test.c' + 'README.rst', + 'duk_module_duktape.c', + 'duk_module_duktape.h', + 'Makefile', + 'test.c' ], os.path.join('extras', 'module-duktape'), os.path.join(dist, 'extras', 'module-duktape')) copy_files([ - 'README.rst', - 'duk_module_node.c', - 'duk_module_node.h', - 'Makefile', - 'test.c' + 'README.rst', + 'duk_module_node.c', + 'duk_module_node.h', + 'Makefile', + 'test.c' ], os.path.join('extras', 'module-node'), os.path.join(dist, 'extras', 'module-node')) copy_files([ - 'README.rst', - 'duk_alloc_pool.c', - 'duk_alloc_pool.h', - 'ptrcomp.yaml', - 'ptrcomp_fixup.h', - 'Makefile', - 'test.c' + 'README.rst', + 'duk_alloc_pool.c', + 'duk_alloc_pool.h', + 'ptrcomp.yaml', + 'ptrcomp_fixup.h', + 'Makefile', + 'test.c' ], os.path.join('extras', 'alloc-pool'), os.path.join(dist, 'extras', 'alloc-pool')) copy_files([ - 'Makefile.cmdline', - 'Makefile.dukdebug', - 'Makefile.eventloop', - 'Makefile.hello', - 'Makefile.eval', - 'Makefile.coffee', - 'Makefile.jxpretty', - 'Makefile.sandbox', - 'Makefile.codepage', - 'mandel.js' + 'Makefile.cmdline', + 'Makefile.dukdebug', + 'Makefile.eventloop', + 'Makefile.hello', + 'Makefile.eval', + 'Makefile.coffee', + 'Makefile.jxpretty', + 'Makefile.sandbox', + 'Makefile.codepage', + 'mandel.js' ], 'dist-files', dist) copy_and_replace(os.path.join('dist-files', 'Makefile.sharedlibrary'), os.path.join(dist, 'Makefile.sharedlibrary'), { - '@DUK_VERSION@': str(duk_version), - '@SONAME_VERSION@': str(int(duk_version / 100)) # 10500 -> 105 + '@DUK_VERSION@': str(duk_version), + '@SONAME_VERSION@': str(int(duk_version / 100)) # 10500 -> 105 }) copy_and_replace(os.path.join('dist-files', 'README.rst'), os.path.join(dist, 'README.rst'), { - '@DUK_VERSION_FORMATTED@': duk_version_formatted, - '@GIT_COMMIT@': git_commit, - '@GIT_DESCRIBE@': git_describe, - '@GIT_BRANCH@': git_branch + '@DUK_VERSION_FORMATTED@': duk_version_formatted, + '@GIT_COMMIT@': git_commit, + '@GIT_DESCRIBE@': git_describe, + '@GIT_BRANCH@': git_branch }) copy_files([ - 'LICENSE.txt', # not strict RST so keep .txt suffix - 'AUTHORS.rst' + 'LICENSE.txt', # not strict RST so keep .txt suffix + 'AUTHORS.rst' ], '.', os.path.join(dist)) # RELEASES.rst is only updated in master. It's not included in the dist to # make maintenance fixes easier to make. copy_files([ - 'murmurhash2.txt', - 'lua.txt', - 'commonjs.txt' + 'murmurhash2.txt', + 'lua.txt', + 'commonjs.txt' ], 'licenses', os.path.join(dist, 'licenses')) -# Build temp versions of LICENSE.txt and AUTHORS.rst for embedding into -# autogenerated C/H files. - -copy_and_cquote('LICENSE.txt', os.path.join(dist, 'LICENSE.txt.tmp')) -copy_and_cquote('AUTHORS.rst', os.path.join(dist, 'AUTHORS.rst.tmp')) - -print('Create duk_config.h headers') - # Merge debugger metadata. merged = exec_print_stdout([ - sys.executable, os.path.join('debugger', 'merge_debug_meta.py'), - '--output', os.path.join(dist, 'debugger', 'duk_debug_meta.json'), - '--class-names', os.path.join('debugger', 'duk_classnames.yaml'), - '--debug-commands', os.path.join('debugger', 'duk_debugcommands.yaml'), - '--debug-errors', os.path.join('debugger', 'duk_debugerrors.yaml'), - '--opcodes', os.path.join('debugger', 'duk_opcodes.yaml') -]) - -# Build default duk_config.h from snippets using genconfig. -exec_print_stdout([ - sys.executable, os.path.join('config', 'genconfig.py'), '--metadata', 'config', - '--output', os.path.join(dist, 'duk_config.h.tmp'), - '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, - '--omit-removed-config-options', '--omit-unused-config-options', - '--emit-config-sanity-check', - '--support-feature-options', - 'duk-config-header' + sys.executable, os.path.join('tools', 'merge_debug_meta.py'), + '--output', os.path.join(dist, 'debugger', 'duk_debug_meta.json'), + '--class-names', os.path.join('debugger', 'duk_classnames.yaml'), + '--debug-commands', os.path.join('debugger', 'duk_debugcommands.yaml'), + '--debug-errors', os.path.join('debugger', 'duk_debugerrors.yaml'), + '--opcodes', os.path.join('debugger', 'duk_opcodes.yaml') ]) -copy_file(os.path.join(dist, 'duk_config.h.tmp'), os.path.join(distsrccom, 'duk_config.h')) -copy_file(os.path.join(dist, 'duk_config.h.tmp'), os.path.join(distsrcnol, 'duk_config.h')) -copy_file(os.path.join(dist, 'duk_config.h.tmp'), os.path.join(distsrcsep, 'duk_config.h')) -#copy_file(os.path.join(dist, 'duk_config.h.tmp'), os.path.join(dist, 'config', 'duk_config.h-autodetect')) +print('Create duk_config.h headers') # Build duk_config.h without feature option support. exec_print_stdout([ - sys.executable, os.path.join('config', 'genconfig.py'), '--metadata', 'config', - '--output', os.path.join(dist, 'config', 'duk_config.h-modular-static'), - '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, - '--omit-removed-config-options', '--omit-unused-config-options', - '--emit-legacy-feature-check', '--emit-config-sanity-check', - 'duk-config-header' + sys.executable, os.path.join('tools', 'genconfig.py'), '--metadata', 'config', + '--output', os.path.join(dist, 'config', 'duk_config.h-modular-static'), + '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, + '--omit-removed-config-options', '--omit-unused-config-options', + '--emit-legacy-feature-check', '--emit-config-sanity-check', + 'duk-config-header' ]) exec_print_stdout([ - sys.executable, os.path.join('config', 'genconfig.py'), '--metadata', 'config', - '--output', os.path.join(dist, 'config', 'duk_config.h-modular-dll'), - '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, - '--omit-removed-config-options', '--omit-unused-config-options', - '--emit-legacy-feature-check', '--emit-config-sanity-check', - '--dll', - 'duk-config-header' + sys.executable, os.path.join('tools', 'genconfig.py'), '--metadata', 'config', + '--output', os.path.join(dist, 'config', 'duk_config.h-modular-dll'), + '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, + '--omit-removed-config-options', '--omit-unused-config-options', + '--emit-legacy-feature-check', '--emit-config-sanity-check', + '--dll', + 'duk-config-header' ]) # Generate a few barebones config examples def genconfig_barebones(platform, architecture, compiler): - exec_print_stdout([ - sys.executable, os.path.join('config', 'genconfig.py'), '--metadata', 'config', - '--output', os.path.join(dist, 'config', 'duk_config.h-%s-%s-%s' % (platform, architecture, compiler)), - '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, - '--platform', platform, '--architecture', architecture, '--compiler', compiler, - '--omit-removed-config-options', '--omit-unused-config-options', - '--emit-legacy-feature-check', '--emit-config-sanity-check', - 'duk-config-header' - ]) + exec_print_stdout([ + sys.executable, os.path.join('tools', 'genconfig.py'), '--metadata', 'config', + '--output', os.path.join(dist, 'config', 'duk_config.h-%s-%s-%s' % (platform, architecture, compiler)), + '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, + '--platform', platform, '--architecture', architecture, '--compiler', compiler, + '--omit-removed-config-options', '--omit-unused-config-options', + '--emit-legacy-feature-check', '--emit-config-sanity-check', + 'duk-config-header' + ]) #genconfig_barebones('linux', 'x86', 'gcc') #genconfig_barebones('linux', 'x64', 'gcc') @@ -762,384 +642,46 @@ def genconfig_barebones(platform, architecture, compiler): #genconfig_barebones('apple', 'x86', 'clang') #genconfig_barebones('apple', 'x64', 'clang') -# Build duktape.h from parts, with some git-related replacements. -# The only difference between single and separate file duktape.h -# is the internal DUK_SINGLE_FILE define. -# -# Newline after 'i \': -# http://stackoverflow.com/questions/25631989/sed-insert-line-command-osx -copy_and_replace(os.path.join('src', 'duktape.h.in'), os.path.join(distsrccom, 'duktape.h'), { - '@DUK_SINGLE_FILE@': '#define DUK_SINGLE_FILE', - '@LICENSE_TXT@': read_file(os.path.join(dist, 'LICENSE.txt.tmp'), strip_last_nl=True), - '@AUTHORS_RST@': read_file(os.path.join(dist, 'AUTHORS.rst.tmp'), strip_last_nl=True), - '@DUK_API_PUBLIC_H@': read_file(os.path.join('src', 'duk_api_public.h.in'), strip_last_nl=True), - '@DUK_DBLUNION_H@': read_file(os.path.join('src', 'duk_dblunion.h.in'), strip_last_nl=True), - '@DUK_VERSION_FORMATTED@': duk_version_formatted, - '@GIT_COMMIT@': git_commit, - '@GIT_COMMIT_CSTRING@': git_commit_cstring, - '@GIT_DESCRIBE@': git_describe, - '@GIT_DESCRIBE_CSTRING@': git_describe_cstring, - '@GIT_BRANCH@': git_branch, - '@GIT_BRANCH_CSTRING@': git_branch_cstring -}) -# keep the line so line numbers match between the two variant headers -copy_and_replace(os.path.join(distsrccom, 'duktape.h'), os.path.join(distsrcsep, 'duktape.h'), { - '#define DUK_SINGLE_FILE': '#undef DUK_SINGLE_FILE' -}) -copy_file(os.path.join(distsrccom, 'duktape.h'), os.path.join(distsrcnol, 'duktape.h')) - -# Autogenerated strings and built-in files -# -# There are currently no profile specific variants of strings/builtins, but -# this will probably change when functions are added/removed based on profile. - -exec_print_stdout([ - sys.executable, - os.path.join('src', 'genbuildparams.py'), - '--version=' + str(duk_version), - '--git-commit=' + git_commit, - '--git-describe=' + git_describe, - '--git-branch=' + git_branch, - '--out-json=' + os.path.join(distsrcsep, 'buildparams.json.tmp'), - '--out-header=' + os.path.join(distsrcsep, 'duk_buildparams.h.tmp') -]) - -res = exec_get_stdout([ - sys.executable, - os.path.join('src', 'scan_used_stridx_bidx.py') -] + glob_files(os.path.join('src', '*.c')) \ - + glob_files(os.path.join('src', '*.h')) \ - + glob_files(os.path.join('src', '*.h.in')) -) -with open(os.path.join(dist, 'duk_used_stridx_bidx_defs.json.tmp'), 'wb') as f: - f.write(res) - -gb_opts = [] -gb_opts.append('--ram-support') # enable by default +# Build prepared sources (src/, src-noline/, src-separate/) with default +# config. This is done using tools and metadata in the dist directory. +print('Config-and-prepare sources for default configuration') +cmd = [ + sys.executable, os.path.join(dist, 'tools', 'prepare_sources.py'), + '--source-directory', os.path.join(dist, 'src-input'), + '--output-directory', dist, + '--config-metadata', os.path.join(dist, 'config', 'genconfig_metadata.tar.gz'), + '--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch, + '--omit-removed-config-options', '--omit-unused-config-options', + '--emit-config-sanity-check', '--support-feature-options' +] if opts.rom_support: - # ROM string/object support is not enabled by default because - # it increases the generated duktape.c considerably. - print('Enabling --rom-support for genbuiltins.py') - gb_opts.append('--rom-support') + cmd.append('--rom-support') if opts.rom_auto_lightfunc: - print('Enabling --rom-auto-lightfunc for genbuiltins.py') - gb_opts.append('--rom-auto-lightfunc') -for fn in opts.user_builtin_metadata: - print('Forwarding --user-builtin-metadata %s' % fn) - gb_opts.append('--user-builtin-metadata') - gb_opts.append(fn) -exec_print_stdout([ - sys.executable, - os.path.join('src', 'genbuiltins.py'), - '--buildinfo=' + os.path.join(distsrcsep, 'buildparams.json.tmp'), - '--used-stridx-metadata=' + os.path.join(dist, 'duk_used_stridx_bidx_defs.json.tmp'), - '--strings-metadata=' + os.path.join('src', 'strings.yaml'), - '--objects-metadata=' + os.path.join('src', 'builtins.yaml'), - '--out-header=' + os.path.join(distsrcsep, 'duk_builtins.h'), - '--out-source=' + os.path.join(distsrcsep, 'duk_builtins.c'), - '--out-metadata-json=' + os.path.join(dist, 'duk_build_meta.json') -] + gb_opts) - -# Autogenerated Unicode files -# -# Note: not all of the generated headers are used. For instance, the -# match table for "WhiteSpace-Z" is not used, because a custom piece -# of code handles that particular match. -# -# UnicodeData.txt contains ranges expressed like this: -# -# 4E00;;Lo;0;L;;;;;N;;;;; -# 9FCB;;Lo;0;L;;;;;N;;;;; -# -# These are currently decoded into individual characters as a prestep. -# -# For IDPART: -# UnicodeCombiningMark -> categories Mn, Mc -# UnicodeDigit -> categories Nd -# UnicodeConnectorPunctuation -> categories Pc - -# Whitespace (unused now) -WHITESPACE_INCL='Zs' # USP = Any other Unicode space separator -WHITESPACE_EXCL='NONE' - -# Unicode letter (unused now) -LETTER_INCL='Lu,Ll,Lt,Lm,Lo' -LETTER_EXCL='NONE' -LETTER_NOA_INCL='Lu,Ll,Lt,Lm,Lo' -LETTER_NOA_EXCL='ASCII' -LETTER_NOABMP_INCL=LETTER_NOA_INCL -LETTER_NOABMP_EXCL='ASCII,NONBMP' - -# Identifier start -# E5 Section 7.6 -IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' -IDSTART_EXCL='NONE' -IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' -IDSTART_NOA_EXCL='ASCII' -IDSTART_NOABMP_INCL=IDSTART_NOA_INCL -IDSTART_NOABMP_EXCL='ASCII,NONBMP' - -# Identifier start - Letter: allows matching of (rarely needed) 'Letter' -# production space efficiently with the help of IdentifierStart. The -# 'Letter' production is only needed in case conversion of Greek final -# sigma. -IDSTART_MINUS_LETTER_INCL=IDSTART_NOA_INCL -IDSTART_MINUS_LETTER_EXCL='Lu,Ll,Lt,Lm,Lo' -IDSTART_MINUS_LETTER_NOA_INCL=IDSTART_NOA_INCL -IDSTART_MINUS_LETTER_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII' -IDSTART_MINUS_LETTER_NOABMP_INCL=IDSTART_NOA_INCL -IDSTART_MINUS_LETTER_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII,NONBMP' - -# Identifier start - Identifier part -# E5 Section 7.6: IdentifierPart, but remove IdentifierStart (already above) -IDPART_MINUS_IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' -IDPART_MINUS_IDSTART_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' -IDPART_MINUS_IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' -IDPART_MINUS_IDSTART_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII' -IDPART_MINUS_IDSTART_NOABMP_INCL=IDPART_MINUS_IDSTART_NOA_INCL -IDPART_MINUS_IDSTART_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII,NONBMP' - -print('Expand UnicodeData.txt ranges') - -exec_print_stdout([ - sys.executable, - os.path.join('src', 'prepare_unicode_data.py'), - os.path.join('src', 'UnicodeData.txt'), - os.path.join(distsrcsep, 'UnicodeData-expanded.tmp') -]) - -def extract_chars(incl, excl, suffix): - #print('- extract_chars: %s %s %s' % (incl, excl, suffix)) - res = exec_get_stdout([ - sys.executable, - os.path.join('src', 'extract_chars.py'), - '--unicode-data=' + os.path.join(distsrcsep, 'UnicodeData-expanded.tmp'), - '--include-categories=' + incl, - '--exclude-categories=' + excl, - '--out-source=' + os.path.join(distsrcsep, 'duk_unicode_%s.c.tmp' % suffix), - '--out-header=' + os.path.join(distsrcsep, 'duk_unicode_%s.h.tmp' % suffix), - '--table-name=' + 'duk_unicode_%s' % suffix - ]) - with open(os.path.join(distsrcsep, suffix + '.txt'), 'wb') as f: - f.write(res) - -def extract_caseconv(): - #print('- extract_caseconv case conversion') - res = exec_get_stdout([ - sys.executable, - os.path.join('src', 'extract_caseconv.py'), - '--command=caseconv_bitpacked', - '--unicode-data=' + os.path.join(distsrcsep, 'UnicodeData-expanded.tmp'), - '--special-casing=' + os.path.join('src', 'SpecialCasing.txt'), - '--out-source=' + os.path.join(distsrcsep, 'duk_unicode_caseconv.c.tmp'), - '--out-header=' + os.path.join(distsrcsep, 'duk_unicode_caseconv.h.tmp'), - '--table-name-lc=duk_unicode_caseconv_lc', - '--table-name-uc=duk_unicode_caseconv_uc' - ]) - with open(os.path.join(distsrcsep, 'caseconv.txt'), 'wb') as f: - f.write(res) - - #print('- extract_caseconv canon lookup') - res = exec_get_stdout([ - sys.executable, - os.path.join('src', 'extract_caseconv.py'), - '--command=re_canon_lookup', - '--unicode-data=' + os.path.join(distsrcsep, 'UnicodeData-expanded.tmp'), - '--special-casing=' + os.path.join('src', 'SpecialCasing.txt'), - '--out-source=' + os.path.join(distsrcsep, 'duk_unicode_re_canon_lookup.c.tmp'), - '--out-header=' + os.path.join(distsrcsep, 'duk_unicode_re_canon_lookup.h.tmp'), - '--table-name-re-canon-lookup=duk_unicode_re_canon_lookup' - ]) - with open(os.path.join(distsrcsep, 'caseconv_re_canon_lookup.txt'), 'wb') as f: - f.write(res) - -print('Create Unicode tables for codepoint classes') -extract_chars(WHITESPACE_INCL, WHITESPACE_EXCL, 'ws') -extract_chars(LETTER_INCL, LETTER_EXCL, 'let') -extract_chars(LETTER_NOA_INCL, LETTER_NOA_EXCL, 'let_noa') -extract_chars(LETTER_NOABMP_INCL, LETTER_NOABMP_EXCL, 'let_noabmp') -extract_chars(IDSTART_INCL, IDSTART_EXCL, 'ids') -extract_chars(IDSTART_NOA_INCL, IDSTART_NOA_EXCL, 'ids_noa') -extract_chars(IDSTART_NOABMP_INCL, IDSTART_NOABMP_EXCL, 'ids_noabmp') -extract_chars(IDSTART_MINUS_LETTER_INCL, IDSTART_MINUS_LETTER_EXCL, 'ids_m_let') -extract_chars(IDSTART_MINUS_LETTER_NOA_INCL, IDSTART_MINUS_LETTER_NOA_EXCL, 'ids_m_let_noa') -extract_chars(IDSTART_MINUS_LETTER_NOABMP_INCL, IDSTART_MINUS_LETTER_NOABMP_EXCL, 'ids_m_let_noabmp') -extract_chars(IDPART_MINUS_IDSTART_INCL, IDPART_MINUS_IDSTART_EXCL, 'idp_m_ids') -extract_chars(IDPART_MINUS_IDSTART_NOA_INCL, IDPART_MINUS_IDSTART_NOA_EXCL, 'idp_m_ids_noa') -extract_chars(IDPART_MINUS_IDSTART_NOABMP_INCL, IDPART_MINUS_IDSTART_NOABMP_EXCL, 'idp_m_ids_noabmp') - -print('Create Unicode tables for case conversion') -extract_caseconv() - -print('Combine sources and clean up') - -# Inject autogenerated files into source and header files so that they are -# usable (for all profiles and define cases) directly. -# -# The injection points use a standard C preprocessor #include syntax -# (earlier these were actual includes). - -copy_and_replace(os.path.join(distsrcsep, 'duk_unicode.h'), os.path.join(distsrcsep, 'duk_unicode.h'), { - '#include "duk_unicode_ids_noa.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_noa.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_noabmp.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_noabmp.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_m_let_noa.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_m_let_noa.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_m_let_noabmp.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_m_let_noabmp.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_idp_m_ids_noa.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_idp_m_ids_noa.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_idp_m_ids_noabmp.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_idp_m_ids_noabmp.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_caseconv.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_caseconv.h.tmp'), strip_last_nl=True), - '#include "duk_unicode_re_canon_lookup.h"': read_file(os.path.join(distsrcsep, 'duk_unicode_re_canon_lookup.h.tmp'), strip_last_nl=True) -}) - -copy_and_replace(os.path.join(distsrcsep, 'duk_unicode_tables.c'), os.path.join(distsrcsep, 'duk_unicode_tables.c'), { - '#include "duk_unicode_ids_noa.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_noa.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_noabmp.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_noabmp.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_m_let_noa.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_m_let_noa.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_ids_m_let_noabmp.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_ids_m_let_noabmp.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_idp_m_ids_noa.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_idp_m_ids_noa.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_idp_m_ids_noabmp.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_idp_m_ids_noabmp.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_caseconv.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_caseconv.c.tmp'), strip_last_nl=True), - '#include "duk_unicode_re_canon_lookup.c"': read_file(os.path.join(distsrcsep, 'duk_unicode_re_canon_lookup.c.tmp'), strip_last_nl=True) -}) - -# Clean up some temporary files - -delete_matching_files(distsrcsep, lambda x: x[-4:] == '.tmp') -delete_matching_files(distsrcsep, lambda x: x in [ - 'ws.txt', - 'let.txt', 'let_noa.txt', 'let_noabmp.txt', - 'ids.txt', 'ids_noa.txt', 'ids_noabmp.txt', - 'ids_m_let.txt', 'ids_m_let_noa.txt', 'ids_m_let_noabmp.txt', - 'idp_m_ids.txt', 'idp_m_ids_noa.txt', 'idp_m_ids_noabmp.txt' -]) -delete_matching_files(distsrcsep, lambda x: x[0:8] == 'caseconv' and x[-4:] == '.txt') - -# Create a combined source file, duktape.c, into a separate combined source -# directory. This allows user to just include "duktape.c", "duktape.h", and -# "duk_config.h" into a project and maximizes inlining and size optimization -# opportunities even with older compilers. Because some projects include -# these files into their repository, the result should be deterministic and -# diffable. Also, it must retain __FILE__/__LINE__ behavior through -# preprocessor directives. Whitespace and comments can be stripped as long -# as the other requirements are met. For some users it's preferable *not* -# to use #line directives in the combined source, so a separate variant is -# created for that, see: https://github.com/svaarala/duktape/pull/363. - -def create_source_prologue(license_file, authors_file): - res = [] - - # Because duktape.c/duktape.h/duk_config.h are often distributed or - # included in project sources as is, add a license reminder and - # Duktape version information to the duktape.c header (duktape.h - # already contains them). - - duk_major = duk_version / 10000 - duk_minor = duk_version / 100 % 100 - duk_patch = duk_version % 100 - res.append('/*') - res.append(' * Single source autogenerated distributable for Duktape %d.%d.%d.' % (duk_major, duk_minor, duk_patch)) - res.append(' *') - res.append(' * Git commit %s (%s).' % (git_commit, git_describe)) - res.append(' * Git branch %s.' % git_branch) - res.append(' *') - res.append(' * See Duktape AUTHORS.rst and LICENSE.txt for copyright and') - res.append(' * licensing information.') - res.append(' */') - res.append('') - - # Add LICENSE.txt and AUTHORS.rst to combined source so that they're automatically - # included and are up-to-date. - - res.append('/* LICENSE.txt */') - with open(license_file, 'rb') as f: - for line in f: - res.append(line.strip()) - res.append('') - res.append('/* AUTHORS.rst */') - with open(authors_file, 'rb') as f: - for line in f: - res.append(line.strip()) - - return '\n'.join(res) + '\n' - -def select_combined_sources(): - # These files must appear before the alphabetically sorted - # ones so that static variables get defined before they're - # used. We can't forward declare them because that would - # cause C++ issues (see GH-63). When changing, verify by - # compiling with g++. - handpick = [ - 'duk_replacements.c', - 'duk_debug_macros.c', - 'duk_builtins.c', - 'duk_error_macros.c', - 'duk_unicode_support.c', - 'duk_util_misc.c', - 'duk_util_hashprime.c', - 'duk_hobject_class.c' - ] - - files = [] - for fn in handpick: - files.append(fn) - - for fn in sorted(os.listdir(distsrcsep)): - f_ext = os.path.splitext(fn)[1] - if f_ext not in [ '.c' ]: - continue - if fn in files: - continue - files.append(fn) - - res = map(lambda x: os.path.join(distsrcsep, x), files) - #print(repr(files)) - #print(repr(res)) - return res - -with open(os.path.join(dist, 'prologue.tmp'), 'wb') as f: - f.write(create_source_prologue(os.path.join(dist, 'LICENSE.txt.tmp'), os.path.join(dist, 'AUTHORS.rst.tmp'))) - -exec_print_stdout([ - sys.executable, - os.path.join('util', 'combine_src.py'), - '--include-path', distsrcsep, - '--include-exclude', 'duk_config.h', # don't inline - '--include-exclude', 'duktape.h', # don't inline - '--prologue', os.path.join(dist, 'prologue.tmp'), - '--output-source', os.path.join(distsrccom, 'duktape.c'), - '--output-metadata', os.path.join(distsrccom, 'metadata.json'), - '--line-directives' -] + select_combined_sources()) - -exec_print_stdout([ - sys.executable, - os.path.join('util', 'combine_src.py'), - '--include-path', distsrcsep, - '--include-exclude', 'duk_config.h', # don't inline - '--include-exclude', 'duktape.h', # don't inline - '--prologue', os.path.join(dist, 'prologue.tmp'), - '--output-source', os.path.join(distsrcnol, 'duktape.c'), - '--output-metadata', os.path.join(distsrcnol, 'metadata.json') -] + select_combined_sources()) + cmd.append('--rom-auto-lightfunc') +for i in opts.user_builtin_metadata: + cmd.append('--user-builtin-metadata') + cmd.append(i) +exec_print_stdout(cmd) # Clean up remaining temp files delete_matching_files(dist, lambda x: x[-4:] == '.tmp') # Create SPDX license once all other files are in place (and cleaned) if opts.create_spdx: - print('Create SPDX license') - try: - exec_get_stdout([ - sys.executable, - os.path.join('util', 'create_spdx_license.py'), - os.path.join(dist, 'license.spdx') - ]) - except: - print('') - print('***') - print('*** WARNING: Failed to create SPDX license, this should not happen for an official release!') - print('***') - print('') + print('Create SPDX license') + try: + exec_get_stdout([ + sys.executable, + os.path.join('tools', 'create_spdx_license.py'), + os.path.join(dist, 'license.spdx') + ]) + except: + print('') + print('***') + print('*** WARNING: Failed to create SPDX license, this should not happen for an official release!') + print('***') + print('') else: - print('Skip SPDX license creation') + print('Skip SPDX license creation') print('Dist finished successfully') diff --git a/util/matrix_compile.py b/util/matrix_compile.py index ed8ea503..ce8210a0 100644 --- a/util/matrix_compile.py +++ b/util/matrix_compile.py @@ -25,421 +25,420 @@ import StringIO # no need for cStringIO # Select one: Select([ 1, 2, 3 ]) -> [ 1 ], [ 2 ], [ 3 ] class Select: - val = None + val = None - def __init__(self, val): - self.val = val + def __init__(self, val): + self.val = val # Combine: Combine([ 1, 2 ], 'foo') -> [ 1 'foo' ], [ 2 'foo' ] class Combine: - val = None + val = None - def __init__(self, val): - self.val = val + def __init__(self, val): + self.val = val # Subset: Subset([ 'foo', 'bar' ]) -> Combine([ [ '', 'foo' ], [ '', 'bar' ] ]) # -> [ '' '' ], [ 'foo' '' ], [ '' 'bar' ], [ 'foo' 'bar' ] class Subset: - val = None + val = None - def __init__(self, val): - self.val = val + def __init__(self, val): + self.val = val # Sequence: Sequence([ 'foo', 'bar', 'quux' ]) -> [ 'foo', 'bar', 'quux' ] # Plain list is also interpreted as a Sequence. class Sequence: - val = None + val = None - def __init__(self, val): - self.val = val + def __init__(self, val): + self.val = val # Prepare a combination lookup structure. def prepcomb(val): - if isinstance(val, (str, unicode)): - return { 'size': 1, 'value': val, 'type': 'terminal' } - if isinstance(val, Sequence): - return { 'size': 1, 'value': val.val, 'type': 'sequence' } - if isinstance(val, list): - # interpret as Sequence - return { 'size': 1, 'value': val, 'type': 'sequence' } - if isinstance(val, Select): - nodes = [] - size = 0 - for i in val.val: - node = prepcomb(i) - nodes.append(node) - size += node['size'] - return { 'size': size, 'value': nodes, 'type': 'select' } - if isinstance(val, Combine): - nodes = [] - size = 1 - for i in val.val: - node = prepcomb(i) - nodes.append(node) - size *= node['size'] - return { 'size': size, 'value': nodes, 'type': 'combine' } - if isinstance(val, Subset): - nodes = [] - size = 1 - for i in val.val: - node = prepcomb(i) - nodes.append(node) - size *= (node['size'] + 1) # value or not present - return { 'size': size, 'value': nodes, 'type': 'subset' } - raise Exception('invalid argument') + if isinstance(val, (str, unicode)): + return { 'size': 1, 'value': val, 'type': 'terminal' } + if isinstance(val, Sequence): + return { 'size': 1, 'value': val.val, 'type': 'sequence' } + if isinstance(val, list): + # interpret as Sequence + return { 'size': 1, 'value': val, 'type': 'sequence' } + if isinstance(val, Select): + nodes = [] + size = 0 + for i in val.val: + node = prepcomb(i) + nodes.append(node) + size += node['size'] + return { 'size': size, 'value': nodes, 'type': 'select' } + if isinstance(val, Combine): + nodes = [] + size = 1 + for i in val.val: + node = prepcomb(i) + nodes.append(node) + size *= node['size'] + return { 'size': size, 'value': nodes, 'type': 'combine' } + if isinstance(val, Subset): + nodes = [] + size = 1 + for i in val.val: + node = prepcomb(i) + nodes.append(node) + size *= (node['size'] + 1) # value or not present + return { 'size': size, 'value': nodes, 'type': 'subset' } + raise Exception('invalid argument') # Return number of combinations for input lists. def countcombinations(prepped): - return prepped['size'] + return prepped['size'] # Return a combination for index, for index in [0,countcombinations(lists)[. # This allows random selection of combinations using a PRNG. def getcomb(prepped, index): - if prepped['type'] == 'terminal': - return [ prepped['value'] ], index - if prepped['type'] == 'sequence': - return prepped['value'], index - if prepped['type'] == 'select': - idx = index % prepped['size'] - index = index / prepped['size'] - - for i in prepped['value']: - if idx >= i['size']: - idx -= i['size'] - continue - ret, ign_index = getcomb(i, idx) - return ret, index - - raise Exception('should not be here') - if prepped['type'] == 'combine': - ret = [] - for i in prepped['value']: - idx = index % i['size'] - index = index / i['size'] - tmp, tmp_index = getcomb(i, idx) - ret.append(tmp) - return ret, index - if prepped['type'] == 'subset': - ret = [] - for i in prepped['value']: - idx = index % (i['size'] + 1) - index = index / (i['size'] + 1) - if idx == 0: - # no value - ret.append('') - else: - tmp, tmp_index = getcomb(i, idx - 1) - ret.append(tmp) - return ret, index - raise Exception('invalid prepped value') + if prepped['type'] == 'terminal': + return [ prepped['value'] ], index + if prepped['type'] == 'sequence': + return prepped['value'], index + if prepped['type'] == 'select': + idx = index % prepped['size'] + index = index / prepped['size'] + + for i in prepped['value']: + if idx >= i['size']: + idx -= i['size'] + continue + ret, ign_index = getcomb(i, idx) + return ret, index + + raise Exception('should not be here') + if prepped['type'] == 'combine': + ret = [] + for i in prepped['value']: + idx = index % i['size'] + index = index / i['size'] + tmp, tmp_index = getcomb(i, idx) + ret.append(tmp) + return ret, index + if prepped['type'] == 'subset': + ret = [] + for i in prepped['value']: + idx = index % (i['size'] + 1) + index = index / (i['size'] + 1) + if idx == 0: + # no value + ret.append('') + else: + tmp, tmp_index = getcomb(i, idx - 1) + ret.append(tmp) + return ret, index + raise Exception('invalid prepped value') def flatten(v): - if isinstance(v, (str, unicode)): - return [ v ] - if isinstance(v, list): - ret = [] - for i in v: - ret += flatten(i) - return ret - raise Exception('invalid value: %s' % repr(v)) + if isinstance(v, (str, unicode)): + return [ v ] + if isinstance(v, list): + ret = [] + for i in v: + ret += flatten(i) + return ret + raise Exception('invalid value: %s' % repr(v)) def getcombination(val, index): - res, res_index = getcomb(val, index) - if res_index != 0: - sys.stderr.write('WARNING: index not consumed entirely, invalid index? (input index %d, output index %d)\n' % (index, res_index)) + res, res_index = getcomb(val, index) + if res_index != 0: + sys.stderr.write('WARNING: index not consumed entirely, invalid index? (input index %d, output index %d)\n' % (index, res_index)) - return res + return res # Generate all combinations. def getcombinations(val): - res = [] - for i in xrange(countcombinations(val)): - res.append(getcombination(val, i)) - return res + res = [] + for i in xrange(countcombinations(val)): + res.append(getcombination(val, i)) + return res # # Test matrix # def create_matrix(fn_duk): - # A lot of compiler versions are used, must install at least: - # - # gcc-4.6 - # gcc-4.7 - # gcc-4.8 - # gcc-4.6-multilib - # g++-4.6-multilib - # gcc-4.7-multilib - # g++-4.7-multilib - # gcc-4.8-multilib - # g++-4.8-multilib - # gcc-multilib - # g++-multilib - # llvm-gcc-4.6 - # llvm-gcc-4.7 - # llvm-3.4 - # clang - # - # The set of compilers tested is distribution specific and not ery - # stable, so you may need to edit the compilers manually. - - gcc_cmd_dialect_options = Select([ - # Some dialects and architectures are only available for newer g++ versions - Combine([ - # -m32 with older llvm causes self test failure (double union) - Select([ 'llvm-gcc' ]), - Select([ '-m64' ]), - Select([ - '', - '-std=c89', - '-std=c99', - [ '-std=c99', '-pedantic' ] - ]) - ]), - Combine([ - Select([ 'gcc', 'gcc-4.6' ]), - Select([ '-m64', '-m32' ]), - Select([ - '', - '-std=c89', - '-std=c99', - [ '-std=c99', '-pedantic' ] - ]) - ]), - Combine([ - Select([ 'gcc-4.7', 'gcc-4.8' ]), - Select([ '-m64', '-m32', '-mx32' ]), - Select([ - '', - '-std=c89', - '-std=c99', - [ '-std=c99', '-pedantic' ] - ]) - ]), - ]) - gxx_cmd_dialect_options = Select([ - # Some dialects and architectures are only available for newer g++ versions - Combine([ - Select([ 'llvm-g++' ]), - Select([ '-m64' ]), - Select([ - '', - '-std=c++98', - [ '-std=c++11', '-pedantic' ] - ]) - ]), - Combine([ - Select([ 'g++', 'g++-4.6' ]), - Select([ '-m64', '-m32' ]), - Select([ - '', - '-std=c++98', - ]) - ]), - Combine([ - Select([ 'g++-4.7', 'g++-4.8' ]), - Select([ '-m64', '-m32', '-mx32' ]), - Select([ - '', - '-std=c++98', - [ '-std=c++11', '-pedantic' ] - ]) - ]), - Combine([ - Select([ 'g++', 'g++-4.8' ]), - Select([ '-m64', '-m32', '-mx32' ]), - Select([ - '-std=c++1y', - '-std=gnu++1y' - ]) - ]), - ]) - gcc_gxx_debug_options = Select([ - '', - [ '-g', '-ggdb' ] - ]) - gcc_gxx_warning_options = Select([ - '', - #'-Wall', - [ '-Wall', '-Wextra' ] - #XXX: -Wfloat-equal - # [ '-Wall', '-Wextra', '-Werror' ] - ]) - gcc_gxx_optimization_options = Select([ - '-O0', - '-O1', - '-O2', - - # -O3 and -O4 produces spurious warnings on gcc 4.8.1, e.g. "error: assuming signed overflow does not occur when assuming that (X - c) > X is always false [-Werror=strict-overflow]" - # Not sure what causes these, but perhaps GCC converts signed comparisons into subtractions and then runs into: https://gcc.gnu.org/wiki/FAQ#signed_overflow - - [ '-O3', '-fno-strict-overflow' ], - #'-O3' - - [ '-O4', '-fno-strict-overflow' ], - #'-O4' - - '-Os' - ]) - clang_cmd_dialect_options = Select([ - Combine([ - 'clang', - Select([ '-m64', '-m32' ]), - Select([ - '', - '-std=c89', - '-std=c99', - [ '-std=c99', '-pedantic' ] - ]) - ]) - ]) - clang_debug_options = Select([ - '', - [ '-g', '-ggdb' ] - ]) - clang_warning_options = Select([ - '', - [ '-Wall', '-Wextra' ], - [ '-Wall', '-Wextra', '-Wcast-align' ] - #XXX: -Wfloat-equal - #[ '-Wall', '-Wextra', '-Werror' ] - ]) - clang_optimization_options = Select([ - '-O0', - '-O1', - '-O2', - '-O3', - #'-O4', - '-Os' - ]) - - # Feature options in suitable chunks that can be subsetted arbitrarily. - - duktape_options = Subset([ - Select([ '-DDUK_OPT_NO_REFERENCE_COUNTING', - '-DDUK_OPT_NO_MARK_AND_SWEEP', - '-DDUK_OPT_GC_TORTURE' ]), - '-DDUK_OPT_SHUFFLE_TORTURE', - '-DDUK_OPT_NO_VOLUNTARY_GC', - '-DDUK_OPT_NO_PACKED_TVAL', - Select([ '', '-DDUK_OPT_FORCE_ALIGN=4', '-DDUK_OPT_FORCE_ALIGN=8' ]), - '-DDUK_OPT_NO_TRACEBACKS', - '-DDUK_OPT_NO_VERBOSE_ERRORS', - '-DDUK_OPT_PARANOID_ERRORS', - '-DDUK_OPT_NO_MS_RESIZE_STRINGTABLE', - '-DDUK_OPT_NO_STRICT_DECL', - '-DDUK_OPT_NO_REGEXP_SUPPORT', - '-DDUK_OPT_NO_ES6_REGEXP_SYNTAX', - '-DDUK_OPT_NO_OCTAL_SUPPORT', - '-DDUK_OPT_NO_SOURCE_NONBMP', - '-DDUK_OPT_STRICT_UTF8_SOURCE', - #'-DDUK_OPT_NO_FILE_IO', # FIXME: no print() - '-DDUK_OPT_NO_SECTION_B', - '-DDUK_OPT_NO_JX', - '-DDUK_OPT_NO_JC', - '-DDUK_OPT_NO_NONSTD_ACCESSOR_KEY_ARGUMENT', - '-DDUK_OPT_NO_NONSTD_FUNC_STMT', - '-DDUK_OPT_NONSTD_FUNC_CALLER_PROPERTY', - '-DDUK_OPT_NONSTD_FUNC_SOURCE_PROPERTY', - '-DDUK_OPT_NO_NONSTD_ARRAY_SPLICE_DELCOUNT', - '-DDUK_OPT_NO_NONSTD_ARRAY_CONCAT_TRAILER', - '-DDUK_OPT_NO_NONSTD_ARRAY_MAP_TRAILER', - '-DDUK_OPT_NO_NONSTD_JSON_ESC_U2028_U2029', - '-DDUK_OPT_NO_BYTECODE_DUMP_SUPPORT', - '-DDUK_OPT_NO_ES6_OBJECT_PROTO_PROPERTY', - '-DDUK_OPT_NO_ES6_OBJECT_SETPROTOTYPEOF', - '-DDUK_OPT_NO_ES6_PROXY', - '-DDUK_OPT_NO_ZERO_BUFFER_DATA', - '-DDUK_OPT_LIGHTFUNC_BUILTINS', - '-DDUK_OPT_ASSERTIONS', - [ '-DDUK_OPT_DEBUG', '-DDUK_OPT_DEBUG_WRITE(level,file,line,func,msg)=do {fprintf(stderr, "%ld %s %ld %s %s\\n", (long) (level), (file), (long) (line), (func), (msg));} while(0)', '-DDUK_OPT_DPRINT', '-DDUK_OPT_DDDPRINT' ], - '-DDUK_OPT_SELF_TESTS', - [ '-DDUK_OPT_STRTAB_CHAIN', '-DDUK_OPT_STRTAB_CHAIN_SIZE=64' ], - - # DUK_OPT_DEBUGGER_SUPPORT depends on having pc2line and - # interrupt counter, so avoid invalid combinations. - Select([ - Subset([ '-DDUK_OPT_NO_PC2LINE', '-DDUK_OPT_INTERRUPT_COUNTER' ]), - [ '-DDUK_OPT_DEBUGGER_SUPPORT', '-DDUK_OPT_INTERRUPT_COUNTER' ] - ]), - '-DDUK_OPT_DEBUGGER_FWD_LOGGING', - '-DDUK_OPT_DEBUGGER_DUMPHEAP', - '-DDUK_OPT_DEBUGGER_INSPECT', - '-DDUK_OPT_NO_DEBUGGER_THROW_NOTIFY', - '-DDUK_OPT_DEBUGGER_PAUSE_UNCAUGHT', - '-DDUK_OPT_JSON_STRINGIFY_FASTPATH' - - # XXX: 16-bit options - ]) - - # FIXME: DUK_USE_LEXER_SLIDING_WINDOW - - # The final command is compiler specific because e.g. include path - # and link option syntax could (in principle) differ between compilers. - - gcc_cmd_matrix = Combine([ - gcc_cmd_dialect_options, - gcc_gxx_debug_options, - gcc_gxx_warning_options, - gcc_gxx_optimization_options, - duktape_options, - [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] - ]) - - gxx_cmd_matrix = Combine([ - gxx_cmd_dialect_options, - gcc_gxx_debug_options, - gcc_gxx_warning_options, - gcc_gxx_optimization_options, - duktape_options, - [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] - ]) - - clang_cmd_matrix = Combine([ - clang_cmd_dialect_options, - clang_debug_options, - clang_warning_options, - clang_optimization_options, - duktape_options, - [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] - ]) - - matrix = Select([ gcc_cmd_matrix, gxx_cmd_matrix, clang_cmd_matrix ]) - return matrix + # A lot of compiler versions are used, must install at least: + # + # gcc-4.6 + # gcc-4.7 + # gcc-4.8 + # gcc-4.6-multilib + # g++-4.6-multilib + # gcc-4.7-multilib + # g++-4.7-multilib + # gcc-4.8-multilib + # g++-4.8-multilib + # gcc-multilib + # g++-multilib + # llvm-gcc-4.6 + # llvm-gcc-4.7 + # llvm-3.4 + # clang + # + # The set of compilers tested is distribution specific and not ery + # stable, so you may need to edit the compilers manually. + + gcc_cmd_dialect_options = Select([ + # Some dialects and architectures are only available for newer g++ versions + Combine([ + # -m32 with older llvm causes self test failure (double union) + Select([ 'llvm-gcc' ]), + Select([ '-m64' ]), + Select([ + '', + '-std=c89', + '-std=c99', + [ '-std=c99', '-pedantic' ] + ]) + ]), + Combine([ + Select([ 'gcc', 'gcc-4.6' ]), + Select([ '-m64', '-m32' ]), + Select([ + '', + '-std=c89', + '-std=c99', + [ '-std=c99', '-pedantic' ] + ]) + ]), + Combine([ + Select([ 'gcc-4.7', 'gcc-4.8' ]), + Select([ '-m64', '-m32', '-mx32' ]), + Select([ + '', + '-std=c89', + '-std=c99', + [ '-std=c99', '-pedantic' ] + ]) + ]), + ]) + gxx_cmd_dialect_options = Select([ + # Some dialects and architectures are only available for newer g++ versions + Combine([ + Select([ 'llvm-g++' ]), + Select([ '-m64' ]), + Select([ + '', + '-std=c++98', + [ '-std=c++11', '-pedantic' ] + ]) + ]), + Combine([ + Select([ 'g++', 'g++-4.6' ]), + Select([ '-m64', '-m32' ]), + Select([ + '', + '-std=c++98', + ]) + ]), + Combine([ + Select([ 'g++-4.7', 'g++-4.8' ]), + Select([ '-m64', '-m32', '-mx32' ]), + Select([ + '', + '-std=c++98', + [ '-std=c++11', '-pedantic' ] + ]) + ]), + Combine([ + Select([ 'g++', 'g++-4.8' ]), + Select([ '-m64', '-m32', '-mx32' ]), + Select([ + '-std=c++1y', + '-std=gnu++1y' + ]) + ]), + ]) + gcc_gxx_debug_options = Select([ + '', + [ '-g', '-ggdb' ] + ]) + gcc_gxx_warning_options = Select([ + '', + #'-Wall', + [ '-Wall', '-Wextra' ] + #XXX: -Wfloat-equal + # [ '-Wall', '-Wextra', '-Werror' ] + ]) + gcc_gxx_optimization_options = Select([ + '-O0', + '-O1', + '-O2', + + # -O3 and -O4 produces spurious warnings on gcc 4.8.1, e.g. "error: assuming signed overflow does not occur when assuming that (X - c) > X is always false [-Werror=strict-overflow]" + # Not sure what causes these, but perhaps GCC converts signed comparisons into subtractions and then runs into: https://gcc.gnu.org/wiki/FAQ#signed_overflow + + [ '-O3', '-fno-strict-overflow' ], + #'-O3' + + [ '-O4', '-fno-strict-overflow' ], + #'-O4' + + '-Os' + ]) + clang_cmd_dialect_options = Select([ + Combine([ + 'clang', + Select([ '-m64', '-m32' ]), + Select([ + '', + '-std=c89', + '-std=c99', + [ '-std=c99', '-pedantic' ] + ]) + ]) + ]) + clang_debug_options = Select([ + '', + [ '-g', '-ggdb' ] + ]) + clang_warning_options = Select([ + '', + [ '-Wall', '-Wextra' ], + [ '-Wall', '-Wextra', '-Wcast-align' ] + #XXX: -Wfloat-equal + #[ '-Wall', '-Wextra', '-Werror' ] + ]) + clang_optimization_options = Select([ + '-O0', + '-O1', + '-O2', + '-O3', + #'-O4', + '-Os' + ]) + + # Feature options in suitable chunks that can be subsetted arbitrarily. + + duktape_options = Subset([ + Select([ '-DDUK_OPT_NO_REFERENCE_COUNTING', + '-DDUK_OPT_NO_MARK_AND_SWEEP', + '-DDUK_OPT_GC_TORTURE' ]), + '-DDUK_OPT_SHUFFLE_TORTURE', + '-DDUK_OPT_NO_VOLUNTARY_GC', + '-DDUK_OPT_NO_PACKED_TVAL', + Select([ '', '-DDUK_OPT_FORCE_ALIGN=4', '-DDUK_OPT_FORCE_ALIGN=8' ]), + '-DDUK_OPT_NO_TRACEBACKS', + '-DDUK_OPT_NO_VERBOSE_ERRORS', + '-DDUK_OPT_PARANOID_ERRORS', + '-DDUK_OPT_NO_MS_RESIZE_STRINGTABLE', + '-DDUK_OPT_NO_STRICT_DECL', + '-DDUK_OPT_NO_REGEXP_SUPPORT', + '-DDUK_OPT_NO_ES6_REGEXP_SYNTAX', + '-DDUK_OPT_NO_OCTAL_SUPPORT', + '-DDUK_OPT_NO_SOURCE_NONBMP', + '-DDUK_OPT_STRICT_UTF8_SOURCE', + '-DDUK_OPT_NO_SECTION_B', + '-DDUK_OPT_NO_JX', + '-DDUK_OPT_NO_JC', + '-DDUK_OPT_NO_NONSTD_ACCESSOR_KEY_ARGUMENT', + '-DDUK_OPT_NO_NONSTD_FUNC_STMT', + '-DDUK_OPT_NONSTD_FUNC_CALLER_PROPERTY', + '-DDUK_OPT_NONSTD_FUNC_SOURCE_PROPERTY', + '-DDUK_OPT_NO_NONSTD_ARRAY_SPLICE_DELCOUNT', + '-DDUK_OPT_NO_NONSTD_ARRAY_CONCAT_TRAILER', + '-DDUK_OPT_NO_NONSTD_ARRAY_MAP_TRAILER', + '-DDUK_OPT_NO_NONSTD_JSON_ESC_U2028_U2029', + '-DDUK_OPT_NO_BYTECODE_DUMP_SUPPORT', + '-DDUK_OPT_NO_ES6_OBJECT_PROTO_PROPERTY', + '-DDUK_OPT_NO_ES6_OBJECT_SETPROTOTYPEOF', + '-DDUK_OPT_NO_ES6_PROXY', + '-DDUK_OPT_NO_ZERO_BUFFER_DATA', + '-DDUK_OPT_LIGHTFUNC_BUILTINS', + '-DDUK_OPT_ASSERTIONS', + [ '-DDUK_OPT_DEBUG', '-DDUK_OPT_DEBUG_WRITE(level,file,line,func,msg)=do {fprintf(stderr, "%ld %s %ld %s %s\\n", (long) (level), (file), (long) (line), (func), (msg));} while(0)', '-DDUK_OPT_DPRINT', '-DDUK_OPT_DDDPRINT' ], + '-DDUK_OPT_SELF_TESTS', + [ '-DDUK_OPT_STRTAB_CHAIN', '-DDUK_OPT_STRTAB_CHAIN_SIZE=64' ], + + # DUK_OPT_DEBUGGER_SUPPORT depends on having pc2line and + # interrupt counter, so avoid invalid combinations. + Select([ + Subset([ '-DDUK_OPT_NO_PC2LINE', '-DDUK_OPT_INTERRUPT_COUNTER' ]), + [ '-DDUK_OPT_DEBUGGER_SUPPORT', '-DDUK_OPT_INTERRUPT_COUNTER' ] + ]), + '-DDUK_OPT_DEBUGGER_FWD_LOGGING', + '-DDUK_OPT_DEBUGGER_DUMPHEAP', + '-DDUK_OPT_DEBUGGER_INSPECT', + '-DDUK_OPT_NO_DEBUGGER_THROW_NOTIFY', + '-DDUK_OPT_DEBUGGER_PAUSE_UNCAUGHT', + '-DDUK_OPT_JSON_STRINGIFY_FASTPATH' + + # XXX: 16-bit options + ]) + + # XXX: DUK_USE_LEXER_SLIDING_WINDOW + + # The final command is compiler specific because e.g. include path + # and link option syntax could (in principle) differ between compilers. + + gcc_cmd_matrix = Combine([ + gcc_cmd_dialect_options, + gcc_gxx_debug_options, + gcc_gxx_warning_options, + gcc_gxx_optimization_options, + duktape_options, + [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] + ]) + + gxx_cmd_matrix = Combine([ + gxx_cmd_dialect_options, + gcc_gxx_debug_options, + gcc_gxx_warning_options, + gcc_gxx_optimization_options, + duktape_options, + [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] + ]) + + clang_cmd_matrix = Combine([ + clang_cmd_dialect_options, + clang_debug_options, + clang_warning_options, + clang_optimization_options, + duktape_options, + [ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ] + ]) + + matrix = Select([ gcc_cmd_matrix, gxx_cmd_matrix, clang_cmd_matrix ]) + return matrix # # Main # def check_unlink(filename): - if os.path.exists(filename): - os.unlink(filename) + if os.path.exists(filename): + os.unlink(filename) def main(): - # XXX: add option for testcase(s) to run? - # XXX: add valgrind support, restrict to -m64 compilation? - # XXX: proper tempfile usage and cleanup - - time_str = str(long(time.time() * 1000.0)) - - parser = optparse.OptionParser() - parser.add_option('--count', dest='count', default='1000') - parser.add_option('--seed', dest='seed', default='default_seed_' + time_str) - parser.add_option('--out-results-json', dest='out_results_json', default='/tmp/matrix_results%s.json' % time_str) - parser.add_option('--out-failed', dest='out_failed', default='/tmp/matrix_failed%s.txt' % time_str) - parser.add_option('--verbose', dest='verbose', default=False, action='store_true') + # XXX: add option for testcase(s) to run? + # XXX: add valgrind support, restrict to -m64 compilation? + # XXX: proper tempfile usage and cleanup + + time_str = str(long(time.time() * 1000.0)) + + parser = optparse.OptionParser() + parser.add_option('--count', dest='count', default='1000') + parser.add_option('--seed', dest='seed', default='default_seed_' + time_str) + parser.add_option('--out-results-json', dest='out_results_json', default='/tmp/matrix_results%s.json' % time_str) + parser.add_option('--out-failed', dest='out_failed', default='/tmp/matrix_failed%s.txt' % time_str) + parser.add_option('--verbose', dest='verbose', default=False, action='store_true') (opts, args) = parser.parse_args() - fn_testjs = '/tmp/test%s.js' % time_str - fn_duk = '/tmp/duk%s' % time_str + fn_testjs = '/tmp/test%s.js' % time_str + fn_duk = '/tmp/duk%s' % time_str - # Avoid any optional features (like JSON or RegExps) in the test. - # Don't make the test very long, as it executes very slowly when - # DUK_OPT_DDDPRINT and DUK_OPT_ASSERTIONS are enabled. + # Avoid any optional features (like JSON or RegExps) in the test. + # Don't make the test very long, as it executes very slowly when + # DUK_OPT_DDDPRINT and DUK_OPT_ASSERTIONS are enabled. - f = open(fn_testjs, 'wb') - f.write(''' + f = open(fn_testjs, 'wb') + f.write(''' // Fibonacci using try-catch, exercises setjmp/longjmp a lot function fibthrow(n) { var f1, f2; @@ -455,109 +454,109 @@ print(Math.PI); // tests constant endianness print(JSON.stringify({ foo: 'bar' })); try { fibthrow(9); } catch (e) { print(e); } ''') - f.close() - expect = 'Hello world\n3\n3.141592653589793\n{"foo":"bar"}\n34\n' - - print('Using seed: ' + repr(opts.seed)) - random.seed(opts.seed) - matrix = create_matrix(fn_duk) - prepped = prepcomb(matrix) -# print(json.dumps(prepped, indent=4)) -# print(json.dumps(getcombinations(prepped), indent=4)) - numcombinations = countcombinations(prepped) - - # The number of combinations is large so do (pseudo) random - # testing over the matrix. Ideally we'd avoid re-testing the - # same combination twice, but with the matrix space in billions - # this doesn't need to be checked. - - res = [] - failed = [] - for i in xrange(long(opts.count)): - fail = False - idx = random.randrange(0, numcombinations) - cmd = getcombination(prepped, idx) - #cmd = getcombination(prepped, idx) - compile_command = flatten(cmd) - compile_command = [ elem for elem in compile_command if elem != '' ] # remove empty strings - - print('%d/%d (combination %d, count %d)' % (i + 1, long(opts.count), idx, numcombinations)) - #print('%d/%d (combination %d, count %d) %s' % (i + 1, long(opts.count), idx, numcombinations, repr(compile_command))) - if opts.verbose: - print(' '.join(compile_command)) - - check_unlink(fn_duk) - #print(repr(compile_command)) - compile_p = subprocess.Popen(compile_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - compile_stdout, compile_stderr = compile_p.communicate() - compile_exitcode = compile_p.returncode - - if compile_exitcode != 0: - fail = True - else: - if not os.path.exists(fn_duk): - print('*** WARNING: compile success but no %s ***' % fn_duk) - - run_command = [ fn_duk, fn_testjs ] - if fail: - run_stdout = None - run_stderr = None - run_exitcode = 1 - else: - run_p = subprocess.Popen(run_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - run_stdout, run_stderr = run_p.communicate() - run_exitcode = run_p.returncode - - if run_exitcode != 0: - fail = True - if run_stdout != expect: - fail = True - - if fail: - print('------------------------------------------------------------------------------') - print('*** FAILED: %s' % repr(compile_command)) - print(' '.join(compile_command)) - failed.append(' '.join(compile_command)) - - print('COMPILE STDOUT:') - print(compile_stdout) - print('COMPILE STDERR:') - print(compile_stderr) - print('RUN STDOUT:') - print(run_stdout) - print('RUN STDERR:') - print(run_stderr) - print('------------------------------------------------------------------------------') - - res.append({ - 'compile_command': compile_command, - 'compile_stdout': compile_stdout, - 'compile_stderr': compile_stderr, - 'compile_exitcode': compile_exitcode, - 'run_command': run_command, - 'run_stdout': run_stdout, - # Don't include debug output, it's huge with DUK_OPT_DDDPRINT - #'run_stderr': run_stderr, - 'run_exitcode': run_exitcode, - 'run_expect': expect, - 'success': not fail - }) - - sys.stdout.flush() - sys.stderr.flush() - - f = open(opts.out_results_json, 'wb') - f.write(json.dumps(res, indent=4, sort_keys=True)) - f.close() - - f = open(opts.out_failed, 'wb') - f.write('\n'.join(failed) + '\n') - f.close() - - check_unlink(fn_duk) - check_unlink(fn_testjs) - - # XXX: summary of success/failure/warnings (= stderr got anything) + f.close() + expect = 'Hello world\n3\n3.141592653589793\n{"foo":"bar"}\n34\n' + + print('Using seed: ' + repr(opts.seed)) + random.seed(opts.seed) + matrix = create_matrix(fn_duk) + prepped = prepcomb(matrix) +# print(json.dumps(prepped, indent=4)) +# print(json.dumps(getcombinations(prepped), indent=4)) + numcombinations = countcombinations(prepped) + + # The number of combinations is large so do (pseudo) random + # testing over the matrix. Ideally we'd avoid re-testing the + # same combination twice, but with the matrix space in billions + # this doesn't need to be checked. + + res = [] + failed = [] + for i in xrange(long(opts.count)): + fail = False + idx = random.randrange(0, numcombinations) + cmd = getcombination(prepped, idx) + #cmd = getcombination(prepped, idx) + compile_command = flatten(cmd) + compile_command = [ elem for elem in compile_command if elem != '' ] # remove empty strings + + print('%d/%d (combination %d, count %d)' % (i + 1, long(opts.count), idx, numcombinations)) + #print('%d/%d (combination %d, count %d) %s' % (i + 1, long(opts.count), idx, numcombinations, repr(compile_command))) + if opts.verbose: + print(' '.join(compile_command)) + + check_unlink(fn_duk) + #print(repr(compile_command)) + compile_p = subprocess.Popen(compile_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + compile_stdout, compile_stderr = compile_p.communicate() + compile_exitcode = compile_p.returncode + + if compile_exitcode != 0: + fail = True + else: + if not os.path.exists(fn_duk): + print('*** WARNING: compile success but no %s ***' % fn_duk) + + run_command = [ fn_duk, fn_testjs ] + if fail: + run_stdout = None + run_stderr = None + run_exitcode = 1 + else: + run_p = subprocess.Popen(run_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + run_stdout, run_stderr = run_p.communicate() + run_exitcode = run_p.returncode + + if run_exitcode != 0: + fail = True + if run_stdout != expect: + fail = True + + if fail: + print('------------------------------------------------------------------------------') + print('*** FAILED: %s' % repr(compile_command)) + print(' '.join(compile_command)) + failed.append(' '.join(compile_command)) + + print('COMPILE STDOUT:') + print(compile_stdout) + print('COMPILE STDERR:') + print(compile_stderr) + print('RUN STDOUT:') + print(run_stdout) + print('RUN STDERR:') + print(run_stderr) + print('------------------------------------------------------------------------------') + + res.append({ + 'compile_command': compile_command, + 'compile_stdout': compile_stdout, + 'compile_stderr': compile_stderr, + 'compile_exitcode': compile_exitcode, + 'run_command': run_command, + 'run_stdout': run_stdout, + # Don't include debug output, it's huge with DUK_OPT_DDDPRINT + #'run_stderr': run_stderr, + 'run_exitcode': run_exitcode, + 'run_expect': expect, + 'success': not fail + }) + + sys.stdout.flush() + sys.stderr.flush() + + f = open(opts.out_results_json, 'wb') + f.write(json.dumps(res, indent=4, sort_keys=True)) + f.close() + + f = open(opts.out_failed, 'wb') + f.write('\n'.join(failed) + '\n') + f.close() + + check_unlink(fn_duk) + check_unlink(fn_testjs) + + # XXX: summary of success/failure/warnings (= stderr got anything) if __name__ == '__main__': - main() + main() diff --git a/util/prep_test.py b/util/prep_test.py index 851f54e1..f360bffb 100644 --- a/util/prep_test.py +++ b/util/prep_test.py @@ -26,168 +26,168 @@ import optparse re_include = re.compile(r'^/\*@include\s(.*?)\s*@\*/$') def readFile(fn): - f = open(fn, 'rb') - data = f.read() - f.close() - return data + f = open(fn, 'rb') + data = f.read() + f.close() + return data def writeFile(fn, data): - f = open(fn, 'wb') - f.write(data) - f.close() + f = open(fn, 'wb') + f.write(data) + f.close() def stripTrailingNewlines(data): - while data.endswith('\n'): - data = data[:-1] - return data + while data.endswith('\n'): + data = data[:-1] + return data class TestcasePreparer: - def __init__(self, - util_include_path=None, - minify_provider=None, - closure_jar_path=None, - uglifyjs_exe_path=None, - uglifyjs2_exe_path=None): - self.util_include_path = util_include_path - self.minify_provider = minify_provider - self.closure_jar_path = closure_jar_path - self.uglifyjs_exe_path = uglifyjs_exe_path - self.uglifyjs2_exe_path = uglifyjs2_exe_path - - def prepApiTest(self, fn, data): - # FIXME: implement API testcase prepping - return data - - def minifyClosure(self, fn): - fh, absFn = tempfile.mkstemp(suffix='prep_temp') - os.close(fh) - - rc = subprocess.call(['java', '-jar', self.closure_jar_path, '--js_output_file', absFn, fn ]) - if rc != 0: - raise Exception('closure minify failed') - - res = readFile(absFn) - os.unlink(absFn) - return res - - def minifyUglifyJS(self, fn): - fh, absFn = tempfile.mkstemp(suffix='prep_temp') - os.close(fh) - - rc = subprocess.call([self.uglifyjs_exe_path, '-o', absFn, fn]) - if rc != 0: - raise Exception('uglifyjs minify failed') - - res = readFile(absFn) - os.unlink(absFn) - return res - - def minifyUglifyJS2(self, fn): - fh, absFn = tempfile.mkstemp(suffix='prep_temp') - os.close(fh) - - rc = subprocess.call([self.uglifyjs2_exe_path, '-o', absFn, fn]) - if rc != 0: - raise Exception('uglifyjs2 minify failed') - - res = readFile(absFn) - os.unlink(absFn) - return res - - def minifyOneLine(self, fn): - # Closure is very slow to start so it's not ideal for test case use. - # The only thing we really need is to make Ecmascript a one-liner. - - if self.minify_provider == 'closure': - return self.minifyClosure(fn) - elif self.minify_provider == 'uglifyjs': - return self.minifyUglifyJS(fn) - elif self.minify_provider == 'uglifyjs2': - return self.minifyUglifyJS2(fn) - else: - raise Exception('no minifier') - - def prepEcmaPrologue(self, fn): - return stripTrailingNewlines(self.minifyOneLine(fn)) - - def prepEcmaInclude(self, fn): - absFn = os.path.join(self.util_include_path, fn) - return '/* INCLUDE: ' + fn + ' */ ' + stripTrailingNewlines(self.minifyOneLine(absFn)) - - def prepEcmaTest(self, fn_in, fn_prologue, data): - is_strict = False - - lines = [] - for line in data.split('\n'): - if line.startswith('/'): - m = re_include.match(line) - if m is not None: - lines.append(self.prepEcmaInclude(m.group(1))) - continue - elif line.startswith('"use strict"') or line.startswith("'use strict'"): - # This is very approximate, but correct for current tests. - is_strict = True - - lines.append(line) - - if fn_prologue is not None: - # Prepend prologue to first line; if the program is strict - # duplicate the 'use strict' declaration. - lines[0] = self.prepEcmaPrologue(fn_prologue) + ' /*...*/ ' + lines[0] - if is_strict: - lines[0] = "'use strict'; " + lines[0] - - return '\n'.join(lines) - - def prepareTestcase(self, fn_in, fn_out, fn_prologue): - data = readFile(fn_in) - - if fn_in.endswith('.c'): - res = self.prepApiTest(fn_in, fn_prologue, data) - elif fn_in.endswith('.js'): - res = self.prepEcmaTest(fn_in, fn_prologue, data) - else: - raise Exception('invalid file (not .c or .js)') - - writeFile(fn_out, res) + def __init__(self, + util_include_path=None, + minify_provider=None, + closure_jar_path=None, + uglifyjs_exe_path=None, + uglifyjs2_exe_path=None): + self.util_include_path = util_include_path + self.minify_provider = minify_provider + self.closure_jar_path = closure_jar_path + self.uglifyjs_exe_path = uglifyjs_exe_path + self.uglifyjs2_exe_path = uglifyjs2_exe_path + + def prepApiTest(self, fn, data): + # XXX: implement API testcase prepping + return data + + def minifyClosure(self, fn): + fh, absFn = tempfile.mkstemp(suffix='prep_temp') + os.close(fh) + + rc = subprocess.call(['java', '-jar', self.closure_jar_path, '--js_output_file', absFn, fn ]) + if rc != 0: + raise Exception('closure minify failed') + + res = readFile(absFn) + os.unlink(absFn) + return res + + def minifyUglifyJS(self, fn): + fh, absFn = tempfile.mkstemp(suffix='prep_temp') + os.close(fh) + + rc = subprocess.call([self.uglifyjs_exe_path, '-o', absFn, fn]) + if rc != 0: + raise Exception('uglifyjs minify failed') + + res = readFile(absFn) + os.unlink(absFn) + return res + + def minifyUglifyJS2(self, fn): + fh, absFn = tempfile.mkstemp(suffix='prep_temp') + os.close(fh) + + rc = subprocess.call([self.uglifyjs2_exe_path, '-o', absFn, fn]) + if rc != 0: + raise Exception('uglifyjs2 minify failed') + + res = readFile(absFn) + os.unlink(absFn) + return res + + def minifyOneLine(self, fn): + # Closure is very slow to start so it's not ideal for test case use. + # The only thing we really need is to make Ecmascript a one-liner. + + if self.minify_provider == 'closure': + return self.minifyClosure(fn) + elif self.minify_provider == 'uglifyjs': + return self.minifyUglifyJS(fn) + elif self.minify_provider == 'uglifyjs2': + return self.minifyUglifyJS2(fn) + else: + raise Exception('no minifier') + + def prepEcmaPrologue(self, fn): + return stripTrailingNewlines(self.minifyOneLine(fn)) + + def prepEcmaInclude(self, fn): + absFn = os.path.join(self.util_include_path, fn) + return '/* INCLUDE: ' + fn + ' */ ' + stripTrailingNewlines(self.minifyOneLine(absFn)) + + def prepEcmaTest(self, fn_in, fn_prologue, data): + is_strict = False + + lines = [] + for line in data.split('\n'): + if line.startswith('/'): + m = re_include.match(line) + if m is not None: + lines.append(self.prepEcmaInclude(m.group(1))) + continue + elif line.startswith('"use strict"') or line.startswith("'use strict'"): + # This is very approximate, but correct for current tests. + is_strict = True + + lines.append(line) + + if fn_prologue is not None: + # Prepend prologue to first line; if the program is strict + # duplicate the 'use strict' declaration. + lines[0] = self.prepEcmaPrologue(fn_prologue) + ' /*...*/ ' + lines[0] + if is_strict: + lines[0] = "'use strict'; " + lines[0] + + return '\n'.join(lines) + + def prepareTestcase(self, fn_in, fn_out, fn_prologue): + data = readFile(fn_in) + + if fn_in.endswith('.c'): + res = self.prepApiTest(fn_in, fn_prologue, data) + elif fn_in.endswith('.js'): + res = self.prepEcmaTest(fn_in, fn_prologue, data) + else: + raise Exception('invalid file (not .c or .js)') + + writeFile(fn_out, res) def main(): - parser = optparse.OptionParser() - parser.add_option('--input', dest='input', default=None) - parser.add_option('--output', dest='output', default=None) - parser.add_option('--prologue', dest='prologue', default=None) - parser.add_option('--util-include-path', dest='util_include_path', default=None) - parser.add_option('--minify-closure', dest='minify_closure', default=None) # point to compiler.jar - parser.add_option('--minify-uglifyjs', dest='minify_uglifyjs', default=None) # point to uglifyjs exe - parser.add_option('--minify-uglifyjs2', dest='minify_uglifyjs2', default=None) # point to uglifyjs exe - (opts, args) = parser.parse_args() - - if opts.input is None or opts.output is None: - raise Exception('filename argument(s) missing (--input and/or --output)') - if opts.util_include_path is None: - raise Exception('missing util include path (--util-include-path)') - - fn_in = opts.input - fn_out = opts.output - fn_prologue = opts.prologue - - minify_provider = None - if opts.minify_closure is not None: - minify_provider = 'closure' - elif opts.minify_uglifyjs is not None: - minify_provider = 'uglifyjs' - elif opts.minify_uglifyjs2 is not None: - minify_provider = 'uglifyjs2' - else: - raise Exception('must provide a minifier (include files must be converted to one-liners)') - - preparer = TestcasePreparer(util_include_path=opts.util_include_path, - minify_provider=minify_provider, - closure_jar_path=opts.minify_closure, - uglifyjs_exe_path=opts.minify_uglifyjs, - uglifyjs2_exe_path=opts.minify_uglifyjs2) - - preparer.prepareTestcase(fn_in, fn_out, fn_prologue) + parser = optparse.OptionParser() + parser.add_option('--input', dest='input', default=None) + parser.add_option('--output', dest='output', default=None) + parser.add_option('--prologue', dest='prologue', default=None) + parser.add_option('--util-include-path', dest='util_include_path', default=None) + parser.add_option('--minify-closure', dest='minify_closure', default=None) # point to compiler.jar + parser.add_option('--minify-uglifyjs', dest='minify_uglifyjs', default=None) # point to uglifyjs exe + parser.add_option('--minify-uglifyjs2', dest='minify_uglifyjs2', default=None) # point to uglifyjs exe + (opts, args) = parser.parse_args() + + if opts.input is None or opts.output is None: + raise Exception('filename argument(s) missing (--input and/or --output)') + if opts.util_include_path is None: + raise Exception('missing util include path (--util-include-path)') + + fn_in = opts.input + fn_out = opts.output + fn_prologue = opts.prologue + + minify_provider = None + if opts.minify_closure is not None: + minify_provider = 'closure' + elif opts.minify_uglifyjs is not None: + minify_provider = 'uglifyjs' + elif opts.minify_uglifyjs2 is not None: + minify_provider = 'uglifyjs2' + else: + raise Exception('must provide a minifier (include files must be converted to one-liners)') + + preparer = TestcasePreparer(util_include_path=opts.util_include_path, + minify_provider=minify_provider, + closure_jar_path=opts.minify_closure, + uglifyjs_exe_path=opts.minify_uglifyjs, + uglifyjs2_exe_path=opts.minify_uglifyjs2) + + preparer.prepareTestcase(fn_in, fn_out, fn_prologue) if __name__ == '__main__': - main() + main() diff --git a/util/rdfdiff.py b/util/rdfdiff.py new file mode 100644 index 00000000..c2cce6a5 --- /dev/null +++ b/util/rdfdiff.py @@ -0,0 +1,33 @@ +#!usr/bin/env python2 +# +# RDF graph diff, useful for diffing SPDX license for release checklist. +# +# Based on: +# +# - https://www.w3.org/2001/sw/wiki/How_to_diff_RDF +# - https://github.com/RDFLib/rdflib/blob/master/rdflib/compare.py +# + +import os +import sys + +def main(): + from rdflib import Graph + from rdflib.compare import to_isomorphic, graph_diff + + with open(sys.argv[1]) as f: + d1 = f.read() + with open(sys.argv[2]) as f: + d2 = f.read() + + print('Loading graph 1 from ' + sys.argv[1]) + g1 = Graph().parse(format='n3', data=d1) + + print('Loading graph 2 from ' + sys.argv[2]) + g2 = Graph().parse(format='n3', data=d2) + + iso1 = to_isomorphic(g1) + iso2 = to_isomorphic(g2) + +if __name__ == '__main__': + main() diff --git a/util/resolve_combined_lineno.py b/util/resolve_combined_lineno.py deleted file mode 100644 index e9db7d64..00000000 --- a/util/resolve_combined_lineno.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python2 -# -# Resolve a line number in the combined source into an uncombined file/line -# using a dist/src/metadata.json file. -# -# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345 -# - -import os -import sys -import json - -def main(): - with open(sys.argv[1], 'rb') as f: - metadata = json.loads(f.read()) - lineno = int(sys.argv[2]) - - for e in reversed(metadata['line_map']): - if lineno >= e['combined_line']: - orig_lineno = e['original_line'] + (lineno - e['combined_line']) - print('%s:%d -> %s:%d' % ('duktape.c', lineno, - e['original_file'], orig_lineno)) - break - -if __name__ == '__main__': - main() diff --git a/util/scan_strings.py b/util/scan_strings.py deleted file mode 100644 index 001137b8..00000000 --- a/util/scan_strings.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python2 -# -# Scan potential external strings from Ecmascript and C files. -# -# Very simplistic example with a lot of limitations: -# -# - Doesn't handle multiple variables in a variable declaration -# -# - Only extracts strings from C files, these may correspond to -# Duktape/C bindings (but in many cases don't) -# - -import os -import sys -import re -import json - -strmap = {} - -# Ecmascript function declaration -re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE) - -# Ecmascript variable declaration -# XXX: doesn't handle multiple variables -re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE) - -# Ecmascript variable assignment -re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE) - -# Ecmascript dotted property reference (also matches numbers like -# '4.0', which are separately rejected below) -re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE) -re_digits = re.compile(r'^\d+$', re.UNICODE) - -# Ecmascript or C string literal -re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE) -re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE) - -def strDecode(x): - # Need to decode hex, unicode, and other escapes. Python syntax - # is close enough to C and Ecmascript so use eval for now. - - try: - return eval('u' + x) # interpret as unicode string - except: - sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n') - return None - -def scan(f, fn): - global strmap - - # Scan rules depend on file type - if fn[-2:] == '.c': - use_funcname = False - use_vardecl = False - use_varassign = False - use_propref = False - use_strlit_dquot = True - use_strlit_squot = False - else: - use_funcname = True - use_vardecl = True - use_varassign = True - use_propref = True - use_strlit_dquot = True - use_strlit_squot = True - - for line in f: - # Assume input data is UTF-8 - line = line.decode('utf-8') - - if use_funcname: - for m in re_funcname.finditer(line): - strmap[m.group(1)] = True - - if use_vardecl: - for m in re_vardecl.finditer(line): - strmap[m.group(1)] = True - - if use_varassign: - for m in re_varassign.finditer(line): - strmap[m.group(1)] = True - - if use_propref: - for m in re_propref.finditer(line): - parts = m.group(1).split('.') - if re_digits.match(parts[0]) is not None: - # Probably a number ('4.0' or such) - pass - else: - for part in parts: - strmap[part] = True - - if use_strlit_dquot: - for m in re_strlit_dquot.finditer(line): - s = strDecode(m.group(1)) - if s is not None: - strmap[s] = True - - if use_strlit_squot: - for m in re_strlit_squot.finditer(line): - s = strDecode(m.group(1)) - if s is not None: - strmap[s] = True - -def main(): - for fn in sys.argv[1:]: - f = open(fn, 'rb') - scan(f, fn) - f.close() - - strs = [] - strs_base64 = [] - doc = { - # Strings as Unicode strings - 'scanned_strings': strs, - - # Strings as base64-encoded UTF-8 data, which should be ready - # to be used in C code (Duktape internal string representation - # is UTF-8) - 'scanned_strings_base64': strs_base64 - } - k = strmap.keys() - k.sort() - for s in k: - strs.append(s) - t = s.encode('utf-8').encode('base64') - if len(t) > 0 and t[-1] == '\n': - t = t[0:-1] - strs_base64.append(t) - - print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True)) - -if __name__ == '__main__': - main() diff --git a/util/time_multi.py b/util/time_multi.py index 5e658170..c0bf84f8 100644 --- a/util/time_multi.py +++ b/util/time_multi.py @@ -10,93 +10,93 @@ import optparse import subprocess def main(): - parser = optparse.OptionParser() - parser.add_option('--count', type='int', dest='count', default=3) - parser.add_option('--mode', dest='mode', default='min') - parser.add_option('--sleep', type='float', dest='sleep', default=0.0) - parser.add_option('--sleep-factor', type='float', dest='sleep_factor', default=0.0) - parser.add_option('--rerun-limit', type='int', dest='rerun_limit', default=30) - parser.add_option('--verbose', action='store_true', dest='verbose', default=False) - (opts, args) = parser.parse_args() - - time_min = None - time_max = None - time_sum = 0.0 - time_list = [] - - if opts.verbose: - sys.stderr.write('Running:') - sys.stderr.flush() - - for i in xrange(opts.count): - time.sleep(opts.sleep) - - cmd = [ - 'time', - '-f', '%U', - '--quiet' - ] - cmd = cmd + args - #print(repr(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - retval = p.wait() - #print(i, retval, stdout, stderr) - - if retval == 139: - print 'segv' - sys.exit(1) - elif retval != 0: - print 'n/a' - sys.exit(1) - - time_this = float(stderr) - #print(i, time_this) - - if time_min is None: - time_min = time_this - else: - time_min = min(time_min, time_this) - if time_max is None: - time_max = time_this - else: - time_max = max(time_max, time_this) - time_sum += time_this - - if opts.verbose: - sys.stderr.write(' %f' % time_this) - sys.stderr.flush() - - time_list.append(time_this) - - # Sleep time dependent on test time is useful for thermal throttling. - time.sleep(opts.sleep_factor * time_this) - - # If run takes too long, there's no point in trying to get an accurate - # estimate. - if time_this >= opts.rerun_limit: - break - - if opts.verbose: - sys.stderr.write('\n') - sys.stderr.flush() - - time_avg = time_sum / float(len(time_list)) - - # /usr/bin/time has only two digits of resolution - if opts.mode == 'min': - print('%.02f' % time_min) - elif opts.mode == 'max': - print('%.02f' % time_max) - elif opts.mode == 'avg': - print('%.02f' % time_avg) - elif opts.mode == 'all': - print('min=%.02f, max=%.02f, avg=%0.2f, count=%d: %r' % \ - (time_min, time_max, time_avg, len(time_list), time_list)) - else: - print('invalid mode: %r' % opts.mode) - - sys.exit(0) + parser = optparse.OptionParser() + parser.add_option('--count', type='int', dest='count', default=3) + parser.add_option('--mode', dest='mode', default='min') + parser.add_option('--sleep', type='float', dest='sleep', default=0.0) + parser.add_option('--sleep-factor', type='float', dest='sleep_factor', default=0.0) + parser.add_option('--rerun-limit', type='int', dest='rerun_limit', default=30) + parser.add_option('--verbose', action='store_true', dest='verbose', default=False) + (opts, args) = parser.parse_args() + + time_min = None + time_max = None + time_sum = 0.0 + time_list = [] + + if opts.verbose: + sys.stderr.write('Running:') + sys.stderr.flush() + + for i in xrange(opts.count): + time.sleep(opts.sleep) + + cmd = [ + 'time', + '-f', '%U', + '--quiet' + ] + cmd = cmd + args + #print(repr(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + retval = p.wait() + #print(i, retval, stdout, stderr) + + if retval == 139: + print 'segv' + sys.exit(1) + elif retval != 0: + print 'n/a' + sys.exit(1) + + time_this = float(stderr) + #print(i, time_this) + + if time_min is None: + time_min = time_this + else: + time_min = min(time_min, time_this) + if time_max is None: + time_max = time_this + else: + time_max = max(time_max, time_this) + time_sum += time_this + + if opts.verbose: + sys.stderr.write(' %f' % time_this) + sys.stderr.flush() + + time_list.append(time_this) + + # Sleep time dependent on test time is useful for thermal throttling. + time.sleep(opts.sleep_factor * time_this) + + # If run takes too long, there's no point in trying to get an accurate + # estimate. + if time_this >= opts.rerun_limit: + break + + if opts.verbose: + sys.stderr.write('\n') + sys.stderr.flush() + + time_avg = time_sum / float(len(time_list)) + + # /usr/bin/time has only two digits of resolution + if opts.mode == 'min': + print('%.02f' % time_min) + elif opts.mode == 'max': + print('%.02f' % time_max) + elif opts.mode == 'avg': + print('%.02f' % time_avg) + elif opts.mode == 'all': + print('min=%.02f, max=%.02f, avg=%0.2f, count=%d: %r' % \ + (time_min, time_max, time_avg, len(time_list), time_list)) + else: + print('invalid mode: %r' % opts.mode) + + sys.exit(0) if __name__ == '__main__': - main() + main()