Browse Source

Split dist util, reorg tools, Python PEP8

pull/928/head
Sami Vaarala 8 years ago
parent
commit
9e8f16c43e
  1. 3
      Makefile
  2. 41
      config/extract_unique_options.py
  3. 1536
      config/genconfig.py
  4. 32
      debugger/merge_debug_meta.py
  5. 70
      debugger/util/heapjson_convert.py
  6. 40
      examples/alloc-logging/log2gnuplot.py
  7. 1438
      examples/alloc-logging/pool_simulator.py
  8. 91
      misc/bin2img.py
  9. 34
      misc/c_overflow_test.py
  10. 1
      misc/chaos.py
  11. 266
      src/dukutil.py
  12. 444
      src/extract_caseconv.py
  13. 353
      src/extract_chars.py
  14. 44
      src/genbuildparams.py
  15. 2983
      src/genbuiltins.py
  16. 57
      src/genequivyear.py
  17. 164
      src/genexesizereport.py
  18. 124
      src/genhashsizes.py
  19. 73
      src/genobjsizereport.py
  20. 37
      src/prepare_unicode_data.py
  21. 56
      src/scan_used_stridx_bidx.py
  22. 257
      tools/combine_src.py
  23. 246
      tools/create_spdx_license.py
  24. 49
      tools/duk_meta_to_strarray.py
  25. 259
      tools/dukutil.py
  26. 130
      tools/dump_bytecode.py
  27. 444
      tools/extract_caseconv.py
  28. 382
      tools/extract_chars.py
  29. 41
      tools/extract_unique_options.py
  30. 44
      tools/genbuildparams.py
  31. 2985
      tools/genbuiltins.py
  32. 1530
      tools/genconfig.py
  33. 5
      tools/json2yaml.py
  34. 32
      tools/merge_debug_meta.py
  35. 854
      tools/prepare_sources.py
  36. 37
      tools/prepare_unicode_data.py
  37. 26
      tools/resolve_combined_lineno.py
  38. 135
      tools/scan_strings.py
  39. 56
      tools/scan_used_stridx_bidx.py
  40. 2
      tools/yaml2json.py
  41. 62
      util/autofix_debuglog_calls.py
  42. 722
      util/check_code_policy.py
  43. 257
      util/combine_src.py
  44. 246
      util/create_spdx_license.py
  45. 30
      util/ditz_hack.py
  46. 49
      util/duk_meta_to_strarray.py
  47. 130
      util/dump_bytecode.py
  48. 43
      util/example_rombuild.sh
  49. 6
      util/example_user_builtins1.yaml
  50. 102
      util/fastint_reps.py
  51. 216
      util/filter_test262_log.py
  52. 76
      util/find_func_calls.py
  53. 24
      util/find_non_ascii.py
  54. 64
      util/fix_emscripten.py
  55. 78
      util/format_perftest.py
  56. 40
      util/gendoubleconsts.py
  57. 57
      util/genequivyear.py
  58. 164
      util/genexesizereport.py
  59. 124
      util/genhashsizes.py
  60. 24
      util/gennumdigits.py
  61. 73
      util/genobjsizereport.py
  62. 5
      util/json2yaml.py
  63. 8
      util/make_ascii.py
  64. 1316
      util/make_dist.py
  65. 933
      util/matrix_compile.py
  66. 310
      util/prep_test.py
  67. 33
      util/rdfdiff.py
  68. 26
      util/resolve_combined_lineno.py
  69. 135
      util/scan_strings.py
  70. 176
      util/time_multi.py

3
Makefile

@ -1152,8 +1152,9 @@ codepolicycheck:
--check-non-ascii \
--check-trailing-whitespace \
--check-mixed-indent \
--check-tab-indent \
--dump-vim-commands \
src/*.py
src/*.py tools/*.py util/*.py debugger/*/*.py examples/*/*.py
@$(PYTHON) util/check_code_policy.py \
$(CODEPOLICYOPTS) \
--check-debug-log-calls \

41
config/extract_unique_options.py

@ -1,41 +0,0 @@
#!/usr/bin/env python2
#
# Extract unique DUK_USE_xxx flags from current code base:
#
# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in
#
import os, sys, re
# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched
# (only uppercase allowed)
re_use = re.compile(r'DUK_USE_[A-Z0-9_]+')
re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+')
def main():
uses = {}
opts = {}
for fn in sys.argv[1:]:
f = open(fn, 'rb')
for line in f:
for t in re.findall(re_use, line):
if t[-1] != '_': # skip e.g. 'DUK_USE_'
uses[t] = True
for t in re.findall(re_opt, line):
if t[-1] != '_':
opts[t] = True
f.close()
k = opts.keys()
k.sort()
for i in k:
print(i)
k = uses.keys()
k.sort()
for i in k:
print(i)
if __name__ == '__main__':
main()

1536
config/genconfig.py

File diff suppressed because it is too large

32
debugger/merge_debug_meta.py

@ -1,32 +0,0 @@
#!/usr/bin/env python2
#
# Merge debugger YAML metadata files and output a merged JSON metadata file.
#
import os, sys, json, yaml
import optparse
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option('--output', dest='output', default=None, help='output JSON filename')
parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names')
parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands')
parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes')
parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes')
(opts, args) = parser.parse_args()
res = {}
def merge(fn):
with open(fn, 'rb') as f:
doc = yaml.load(f)
for k in doc.keys():
res[k] = doc[k]
merge(opts.class_names)
merge(opts.debug_commands)
merge(opts.debug_errors)
merge(opts.opcodes)
with open(opts.output, 'wb') as f:
f.write(json.dumps(res, indent=4) + '\n')
print('Wrote merged debugger metadata to ' + str(opts.output))

70
debugger/util/heapjson_convert.py

@ -13,40 +13,40 @@ add_proto = False
add_props = True
def main():
f = open(sys.argv[1], 'rb')
heapdump = json.loads(f.read())
f.close()
objs = {}
for obj in heapdump['heapObjects']:
objs[obj['ptr']['HEAPPTR']] = obj
f = sys.stdout
def is_obj(x):
if not objs.has_key(x):
return False
return objs[x]['type'] == 2
def emit(x, y):
# XXX: only emit edges between objects (not strings or buffers)
if is_obj(y):
f.write('h%s,h%s\n' % (x, y))
#f.write('digraph heap {\n')
f.write('Source,Target\n')
for obj in heapdump['heapObjects']:
x = obj['ptr']['HEAPPTR']
if add_proto and obj.has_key('proto'):
#f.write('h%s -> h%s;\n' % (x, obj['proto']['HEAPPTR']))
f.write('h%s,h%s\n' % (x, obj['proto']['HEAPPTR']))
if add_props and obj.has_key('props'):
for p in obj['props']:
if p.has_key('key'):
emit(x, p['key']['HEAPPTR'])
if p.has_key('value') and isinstance(p['value'], dict) and p['value'].has_key('HEAPPTR'):
emit(x, p['value']['HEAPPTR'])
#f.write('}\n')
f = open(sys.argv[1], 'rb')
heapdump = json.loads(f.read())
f.close()
objs = {}
for obj in heapdump['heapObjects']:
objs[obj['ptr']['HEAPPTR']] = obj
f = sys.stdout
def is_obj(x):
if not objs.has_key(x):
return False
return objs[x]['type'] == 2
def emit(x, y):
# XXX: only emit edges between objects (not strings or buffers)
if is_obj(y):
f.write('h%s,h%s\n' % (x, y))
#f.write('digraph heap {\n')
f.write('Source,Target\n')
for obj in heapdump['heapObjects']:
x = obj['ptr']['HEAPPTR']
if add_proto and obj.has_key('proto'):
#f.write('h%s -> h%s;\n' % (x, obj['proto']['HEAPPTR']))
f.write('h%s,h%s\n' % (x, obj['proto']['HEAPPTR']))
if add_props and obj.has_key('props'):
for p in obj['props']:
if p.has_key('key'):
emit(x, p['key']['HEAPPTR'])
if p.has_key('value') and isinstance(p['value'], dict) and p['value'].has_key('HEAPPTR'):
emit(x, p['value']['HEAPPTR'])
#f.write('}\n')
if __name__ == '__main__':
main()
main()

40
examples/alloc-logging/log2gnuplot.py

@ -12,30 +12,30 @@ import os
import sys
def main():
allocated = 0
allocated = 0
for line in sys.stdin:
line = line.strip()
parts = line.split(' ')
for line in sys.stdin:
line = line.strip()
parts = line.split(' ')
# A ptr/NULL/FAIL size
# F ptr/NULL size
# R ptr/NULL oldsize ptr/NULL/FAIL newsize
# A ptr/NULL/FAIL size
# F ptr/NULL size
# R ptr/NULL oldsize ptr/NULL/FAIL newsize
# Note: ajduk doesn't log oldsize (uses -1 instead)
# Note: ajduk doesn't log oldsize (uses -1 instead)
if parts[0] == 'A':
if parts[1] != 'NULL' and parts[1] != 'FAIL':
allocated += long(parts[2])
elif parts[0] == 'F':
allocated -= long(parts[2])
elif parts[0] == 'R':
allocated -= long(parts[2])
if parts[3] != 'NULL' and parts[3] != 'FAIL':
allocated += long(parts[4])
print(allocated)
if parts[0] == 'A':
if parts[1] != 'NULL' and parts[1] != 'FAIL':
allocated += long(parts[2])
elif parts[0] == 'F':
allocated -= long(parts[2])
elif parts[0] == 'R':
allocated -= long(parts[2])
if parts[3] != 'NULL' and parts[3] != 'FAIL':
allocated += long(parts[4])
print(allocated)
print(allocated)
print(allocated)
if __name__ == '__main__':
main()
main()

1438
examples/alloc-logging/pool_simulator.py

File diff suppressed because it is too large

91
misc/bin2img.py

@ -2,51 +2,50 @@ import sys
from PIL import Image
def main():
f = open(sys.argv[1], 'rb')
data = f.read()
f.close()
use_bits = True
BYTESPERLINE = 128
BITSPERLINE = BYTESPERLINE * 8
if use_bits:
width = BITSPERLINE
height = (len(data) * 8 + BITSPERLINE - 1) / BITSPERLINE
else:
width = BYTESPERLINE
height = (len(data) + BYTESPERLINE - 1) / BYTESPERLINE
img = Image.new('RGBA', (width, height))
for y in xrange(height):
if use_bits:
for x in xrange(width):
idx = y * BYTESPERLINE + (x / 8)
bitidx = x % 8 # 0 = topmost
if idx >= len(data):
img.putpixel((x,y), (255, 255, 255, 255))
else:
v = ord(data[idx])
v = (v >> (7 - bitidx)) & 0x01
if v > 0:
v = 0
else:
v = 255
img.putpixel((x,y), (v, v, v, 255))
else:
for x in xrange(width):
idx = y * BYTESPERLINE + x
if idx >= len(data):
img.putpixel((x,y), (255, 255, 255, 255))
else:
v = ord(data[idx])
img.putpixel((x,y), (v, v, v, 255))
img.save(sys.argv[2])
f = open(sys.argv[1], 'rb')
data = f.read()
f.close()
use_bits = True
BYTESPERLINE = 128
BITSPERLINE = BYTESPERLINE * 8
if use_bits:
width = BITSPERLINE
height = (len(data) * 8 + BITSPERLINE - 1) / BITSPERLINE
else:
width = BYTESPERLINE
height = (len(data) + BYTESPERLINE - 1) / BYTESPERLINE
img = Image.new('RGBA', (width, height))
for y in xrange(height):
if use_bits:
for x in xrange(width):
idx = y * BYTESPERLINE + (x / 8)
bitidx = x % 8 # 0 = topmost
if idx >= len(data):
img.putpixel((x,y), (255, 255, 255, 255))
else:
v = ord(data[idx])
v = (v >> (7 - bitidx)) & 0x01
if v > 0:
v = 0
else:
v = 255
img.putpixel((x,y), (v, v, v, 255))
else:
for x in xrange(width):
idx = y * BYTESPERLINE + x
if idx >= len(data):
img.putpixel((x,y), (255, 255, 255, 255))
else:
v = ord(data[idx])
img.putpixel((x,y), (v, v, v, 255))
img.save(sys.argv[2])
if __name__ == '__main__':
main()
main()

34
misc/c_overflow_test.py

@ -4,23 +4,23 @@ import math
limit = (1 << 32) - 1
for i in xrange(65536 + 10):
if i == 0:
continue
if i == 0:
continue
temp = float(1 << 32) / float(i)
approx1 = int(math.floor(temp) - 3)
approx2 = int(math.floor(temp + 3))
for j in xrange(approx1, approx2 + 1):
if i*j >= (1 << 32):
exact = True
else:
exact = False
temp = float(1 << 32) / float(i)
approx1 = int(math.floor(temp) - 3)
approx2 = int(math.floor(temp + 3))
for j in xrange(approx1, approx2 + 1):
if i*j >= (1 << 32):
exact = True
else:
exact = False
if i > limit / j:
check = True
else:
check = False
if i > limit / j:
check = True
else:
check = False
#print(i, j, exact, check)
if exact != check:
print('inexact', i, j)
#print(i, j, exact, check)
if exact != check:
print('inexact', i, j)

1
misc/chaos.py

@ -10,4 +10,3 @@ data = sys.stdin.read()
data = data.strip()
data = data.decode('hex')
sys.stdout.write(data)

266
src/dukutil.py

@ -1,266 +0,0 @@
#!/usr/bin/env python2
#
# Python utilities shared by the build scripts.
#
import datetime
import json
class BitEncoder:
"Bitstream encoder."
_bits = None
def __init__(self):
self._bits = []
def bits(self, x, nbits):
if (x >> nbits) != 0:
raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits))
for i in xrange(nbits):
t = (x >> (nbits - i - 1)) & 0x01
self._bits.append(t)
def string(self, x):
nbits = len(x) * 8
for i in xrange(nbits):
byteidx = i / 8
bitidx = i % 8
if byteidx < 0 or byteidx >= len(x):
self._bits.append(0)
else:
t = (ord(x[byteidx]) >> (7 - bitidx)) & 0x01
self._bits.append(t)
def getNumBits(self):
"Get current number of encoded bits."
return len(self._bits)
def getNumBytes(self):
"Get current number of encoded bytes, rounded up."
nbits = len(self._bits)
while (nbits % 8) != 0:
nbits += 1
return nbits / 8
def getBytes(self):
"Get current bitstream as a byte sequence, padded with zero bits."
bytes = []
for i in xrange(self.getNumBytes()):
t = 0
for j in xrange(8):
off = i*8 + j
if off >= len(self._bits):
t = (t << 1)
else:
t = (t << 1) + self._bits[off]
bytes.append(t)
return bytes
def getByteString(self):
"Get current bitstream as a string."
return ''.join([chr(i) for i in self.getBytes()])
class GenerateC:
"Helper for generating C source and header files."
_data = None
wrap_col = 76
def __init__(self):
self._data = []
def emitRaw(self, text):
"Emit raw text (without automatic newline)."
self._data.append(text)
def emitLine(self, text):
"Emit a raw line (with automatic newline)."
self._data.append(text + '\n')
def emitHeader(self, autogen_by):
"Emit file header comments."
# Note: a timestamp would be nice but it breaks incremental building
self.emitLine('/*')
self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by)
self.emitLine(' */')
self.emitLine('')
def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True):
"Emit an array as a C array."
# lenient input
if isinstance(data, unicode):
data = data.encode('utf-8')
if isinstance(data, str):
tmp = []
for i in xrange(len(data)):
tmp.append(ord(data[i]))
data = tmp
size_spec = ''
if size is not None:
size_spec = '%d' % size
visib_qual = ''
if visibility is not None:
visib_qual = visibility + ' '
const_qual = ''
if const:
const_qual = 'const '
self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec))
line = ''
for i in xrange(len(data)):
if intvalues:
suffix = ''
if data[i] < -32768 or data[i] > 32767:
suffix = 'L'
t = "%d%s," % (data[i], suffix)
else:
t = "(%s)'\\x%02x', " % (typename, data[i])
if len(line) + len(t) >= self.wrap_col:
self.emitLine(line)
line = t
else:
line += t
if line != '':
self.emitLine(line)
self.emitLine('};')
def emitDefine(self, name, value, comment=None):
"Emit a C define with an optional comment."
# XXX: there is no escaping right now (for comment or value)
if comment is not None:
self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment))
else:
self.emitLine('#define %-60s %s' % (name, value))
def getString(self):
"Get the entire file as a string."
return ''.join(self._data)
def json_encode(x):
"JSON encode a value."
try:
return json.dumps(x)
except AttributeError:
pass
# for older library versions
return json.write(x)
def json_decode(x):
"JSON decode a value."
try:
return json.loads(x)
except AttributeError:
pass
# for older library versions
return json.read(x)
# Compute a byte hash identical to duk_util_hashbytes().
DUK__MAGIC_M = 0x5bd1e995
DUK__MAGIC_R = 24
def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian):
h = (str_seed ^ nbytes) & 0xffffffff
while nbytes >= 4:
# 4-byte fetch byte order:
# - native (endian dependent) if unaligned accesses allowed
# - little endian if unaligned accesses not allowed
if big_endian:
k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \
(ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24)
else:
k = ord(x[off]) + (ord(x[off + 1]) << 8) + \
(ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24)
k = (k * DUK__MAGIC_M) & 0xffffffff
k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff
k = (k * DUK__MAGIC_M) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ k) & 0xffffffff
off += 4
nbytes -= 4
if nbytes >= 3:
h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff
if nbytes >= 2:
h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff
if nbytes >= 1:
h = (h ^ ord(x[off])) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ (h >> 13)) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ (h >> 15)) & 0xffffffff
return h
# Compute a string hash identical to duk_heap_hashstring() when dense
# hashing is enabled.
DUK__STRHASH_SHORTSTRING = 4096
DUK__STRHASH_MEDIUMSTRING = 256 * 1024
DUK__STRHASH_BLOCKSIZE = 256
def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False):
str_seed = (hash_seed ^ len(x)) & 0xffffffff
if len(x) <= DUK__STRHASH_SHORTSTRING:
res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian)
else:
if len(x) <= DUK__STRHASH_MEDIUMSTRING:
skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
else:
skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian)
off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256
while off < len(x):
left = len(x) - off
now = left
if now > DUK__STRHASH_BLOCKSIZE:
now = DUK__STRHASH_BLOCKSIZE
res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff
off += skip
if strhash16:
res &= 0xffff
return res
# Compute a string hash identical to duk_heap_hashstring() when sparse
# hashing is enabled.
DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value
def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False):
res = (hash_seed ^ len(x)) & 0xffffffff
step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1
off = len(x)
while off >= step:
assert(off >= 1)
res = ((res * 33) + ord(x[off - 1])) & 0xffffffff
off -= step
if strhash16:
res &= 0xffff
return res
# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length().
def duk_unicode_unvalidated_utf8_length(x):
assert(isinstance(x, str))
clen = 0
for c in x:
t = ord(c)
if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted
clen += 1
return clen

444
src/extract_caseconv.py

@ -1,444 +0,0 @@
#!/usr/bin/env python2
#
# Extract rules for Unicode case conversion, specifically the behavior
# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The
# bitstream encoded rules are used for the slow path at run time, so
# compactness is favored over speed.
#
# There is no support for context or locale sensitive rules, as they
# are handled directly in C code before consulting tables generated
# here. Ecmascript requires case conversion both with and without
# locale/language specific rules (e.g. String.prototype.toLowerCase()
# and String.prototype.toLocaleLowerCase()), so they are best handled
# in C anyway.
#
# Case conversion rules for ASCII are also excluded as they are
# handled by C fast path. Rules for non-BMP characters (codepoints
# above U+FFFF) are omitted as they're not required for standard
# Ecmascript.
#
import os, sys, math
import optparse
import dukutil
class UnicodeData:
"Read UnicodeData.txt into an internal representation."
def __init__(self, filename):
self.data = self.read_unicode_data(filename)
print 'read %d unicode data entries' % len(self.data)
def read_unicode_data(self, filename):
res = []
f = open(filename, 'rb')
for line in f:
if line.startswith('#'):
continue
line = line.strip()
if line == '':
continue
parts = line.split(';')
if len(parts) != 15:
raise Exception('invalid unicode data line')
res.append(parts)
f.close()
# Sort based on Unicode codepoint
def mycmp(a,b):
return cmp(long(a[0], 16), long(b[0], 16))
res.sort(cmp=mycmp)
return res
class SpecialCasing:
"Read SpecialCasing.txt into an internal representation."
def __init__(self, filename):
self.data = self.read_special_casing_data(filename)
print 'read %d special casing entries' % len(self.data)
def read_special_casing_data(self, filename):
res = []
f = open(filename, 'rb')
for line in f:
try:
idx = line.index('#')
line = line[:idx]
except ValueError:
pass
line = line.strip()
if line == '':
continue
parts = line.split(';')
parts = [i.strip() for i in parts]
while len(parts) < 6:
parts.append('')
res.append(parts)
f.close()
return res
def parse_unicode_sequence(x):
res = ''
for i in x.split(' '):
i = i.strip()
if i == '':
continue
res += unichr(long(i, 16))
return res
def get_base_conversion_maps(unicode_data):
"Create case conversion tables without handling special casing yet."
uc = {} # codepoint (number) -> string
lc = {}
tc = {} # titlecase
for x in unicode_data.data:
c1 = long(x[0], 16)
# just 16-bit support needed
if c1 >= 0x10000:
continue
if x[12] != '':
# field 12: simple uppercase mapping
c2 = parse_unicode_sequence(x[12])
uc[c1] = c2
tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary
if x[13] != '':
# field 13: simple lowercase mapping
c2 = parse_unicode_sequence(x[13])
lc[c1] = c2
if x[14] != '':
# field 14: simple titlecase mapping
c2 = parse_unicode_sequence(x[14])
tc[c1] = c2
return uc, lc, tc
def update_special_casings(uc, lc, tc, special_casing):
"Update case conversion tables with special case conversion rules."
for x in special_casing.data:
c1 = long(x[0], 16)
if x[4] != '':
# conditions
continue
lower = parse_unicode_sequence(x[1])
title = parse_unicode_sequence(x[2])
upper = parse_unicode_sequence(x[3])
if len(lower) > 1:
lc[c1] = lower
if len(upper) > 1:
uc[c1] = upper
if len(title) > 1:
tc[c1] = title
print 'special case: %d %d %d' % (len(lower), len(upper), len(title))
def remove_ascii_part(convmap):
"Remove ASCII case conversion parts (handled by C fast path)."
for i in xrange(128):
if convmap.has_key(i):
del convmap[i]
def scan_range_with_skip(convmap, start_idx, skip):
"Scan for a range of continuous case conversion with a certain 'skip'."
conv_i = start_idx
if not convmap.has_key(conv_i):
return None, None, None
elif len(convmap[conv_i]) > 1:
return None, None, None
else:
conv_o = ord(convmap[conv_i])
start_i = conv_i
start_o = conv_o
while True:
new_i = conv_i + skip
new_o = conv_o + skip
if not convmap.has_key(new_i):
break
if len(convmap[new_i]) > 1:
break
if ord(convmap[new_i]) != new_o:
break
conv_i = new_i
conv_o = new_o
# [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char
count = (conv_i - start_i) / skip + 1
if count <= 1:
return None, None, None
# we have an acceptable range, remove them from the convmap here
for i in xrange(start_i, conv_i + skip, skip):
del convmap[i]
return start_i, start_o, count
def find_first_range_with_skip(convmap, skip):
"Find first range with a certain 'skip' value."
for i in xrange(65536):
start_i, start_o, count = scan_range_with_skip(convmap, i, skip)
if start_i is None:
continue
return start_i, start_o, count
return None, None, None
def generate_tables(convmap):
"Generate bit-packed case conversion table for a given conversion map."
# The bitstream encoding is based on manual inspection for whatever
# regularity the Unicode case conversion rules have.
#
# Start with a full description of case conversions which does not
# cover all codepoints; unmapped codepoints convert to themselves.
# Scan for range-to-range mappings with a range of skips starting from 1.
# Whenever a valid range is found, remove it from the map. Finally,
# output the remaining case conversions (1:1 and 1:n) on a per codepoint
# basis.
#
# This is very slow because we always scan from scratch, but its the
# most reliable and simple way to scan
ranges = [] # range mappings (2 or more consecutive mappings with a certain skip)
singles = [] # 1:1 character mappings
complex = [] # 1:n character mappings
# Ranges with skips
for skip in xrange(1,6+1): # skips 1...6 are useful
while True:
start_i, start_o, count = find_first_range_with_skip(convmap, skip)
if start_i is None:
break
print 'skip %d: %d %d %d' % (skip, start_i, start_o, count)
ranges.append([start_i, start_o, count, skip])
# 1:1 conversions
k = convmap.keys()
k.sort()
for i in k:
if len(convmap[i]) > 1:
continue
singles.append([i, ord(convmap[i])]) # codepoint, codepoint
del convmap[i]
# There are many mappings to 2-char sequences with latter char being U+0399.
# These could be handled as a special case, but we don't do that right now.
#
# [8064L, u'\u1f08\u0399']
# [8065L, u'\u1f09\u0399']
# [8066L, u'\u1f0a\u0399']
# [8067L, u'\u1f0b\u0399']
# [8068L, u'\u1f0c\u0399']
# [8069L, u'\u1f0d\u0399']
# [8070L, u'\u1f0e\u0399']
# [8071L, u'\u1f0f\u0399']
# ...
#
# tmp = {}
# k = convmap.keys()
# k.sort()
# for i in k:
# if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399':
# tmp[i] = convmap[i][0]
# del convmap[i]
# print repr(tmp)
#
# skip = 1
# while True:
# start_i, start_o, count = find_first_range_with_skip(tmp, skip)
# if start_i is None:
# break
# print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count)
# print len(tmp.keys())
# print repr(tmp)
# XXX: need to put 12 remaining mappings back to convmap...
# 1:n conversions
k = convmap.keys()
k.sort()
for i in k:
complex.append([i, convmap[i]]) # codepoint, string
del convmap[i]
for t in singles:
print repr(t)
for t in complex:
print repr(t)
print 'range mappings: %d' % len(ranges)
print 'single character mappings: %d' % len(singles)
print 'complex mappings (1:n): %d' % len(complex)
print 'remaining (should be zero): %d' % len(convmap.keys())
# XXX: opportunities for diff encoding skip=3 ranges?
prev = None
for t in ranges:
# range: [start_i, start_o, count, skip]
if t[3] != 3:
continue
if prev is not None:
print '%d %d' % (t[0] - prev[0], t[1] - prev[1])
else:
print 'start: %d %d' % (t[0], t[1])
prev = t
# bit packed encoding
be = dukutil.BitEncoder()
for curr_skip in xrange(1, 7): # 1...6
count = 0
for r in ranges:
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3]
if skip != curr_skip:
continue
count += 1
be.bits(count, 6)
print 'encode: skip=%d, count=%d' % (curr_skip, count)
for r in ranges:
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3]
if skip != curr_skip:
continue
be.bits(start_i, 16)
be.bits(start_o, 16)
be.bits(r_count, 7)
be.bits(0x3f, 6) # maximum count value = end of skips
count = len(singles)
be.bits(count, 6)
for t in singles:
cp_i, cp_o = t[0], t[1]
be.bits(cp_i, 16)
be.bits(cp_o, 16)
count = len(complex)
be.bits(count, 7)
for t in complex:
cp_i, str_o = t[0], t[1]
be.bits(cp_i, 16)
be.bits(len(str_o), 2)
for i in xrange(len(str_o)):
be.bits(ord(str_o[i]), 16)
return be.getBytes(), be.getNumBits()
def generate_regexp_canonicalize_lookup(convmap):
res = []
highest_nonid = -1
for cp in xrange(65536):
res_cp = cp # default to as is
if convmap.has_key(cp):
tmp = convmap[cp]
if len(tmp) == 1:
# Multiple codepoints from input, ignore
res_cp = ord(tmp[0])
if cp >= 0x80 and res_cp < 0x80:
res_cp = cp # non-ASCII mapped to ASCII, ignore
if cp != res_cp:
highest_nonid = cp
res.append(res_cp)
# At the moment this is 65370, which means there's very little
# gain in assuming 1:1 mapping above a certain BMP codepoint.
print('HIGHEST NON-ID MAPPING: %d' % highest_nonid)
return res
def clonedict(x):
"Shallow clone of input dict."
res = {}
for k in x.keys():
res[k] = x[k]
return res
def main():
parser = optparse.OptionParser()
parser.add_option('--command', dest='command', default='caseconv_bitpacked')
parser.add_option('--unicode-data', dest='unicode_data')
parser.add_option('--special-casing', dest='special_casing')
parser.add_option('--out-source', dest='out_source')
parser.add_option('--out-header', dest='out_header')
parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc')
parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc')
parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup')
(opts, args) = parser.parse_args()
unicode_data = UnicodeData(opts.unicode_data)
special_casing = SpecialCasing(opts.special_casing)
uc, lc, tc = get_base_conversion_maps(unicode_data)
update_special_casings(uc, lc, tc, special_casing)
if opts.command == 'caseconv_bitpacked':
# XXX: ASCII and non-BMP filtering could be an option but is now hardcoded
# ascii is handled with 'fast path' so not needed here
t = clonedict(uc)
remove_ascii_part(t)
uc_bytes, uc_nbits = generate_tables(t)
t = clonedict(lc)
remove_ascii_part(t)
lc_bytes, lc_nbits = generate_tables(t)
# Generate C source and header files
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True)
genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True)
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes)))
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes)))
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
elif opts.command == 're_canon_lookup':
# direct canonicalization lookup for case insensitive regexps, includes ascii part
t = clonedict(uc)
re_canon_lookup = generate_regexp_canonicalize_lookup(t)
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True)
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup)))
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
else:
raise Exception('invalid command: %r' % opts.command)
if __name__ == '__main__':
main()

353
src/extract_chars.py

@ -1,353 +0,0 @@
#!/usr/bin/env python2
#
# Select a set of Unicode characters (based on included/excluded categories
# etc) and write out a compact bitstream for matching a character against
# the set at runtime. This is for the slow path, where we're especially
# concerned with compactness. A C source file with the table is written,
# together with a matching C header.
#
# Unicode categories (such as 'Z') can be used. Two pseudo-categories
# are also available for exclusion only: ASCII and NONBMP. "ASCII"
# category excludes ASCII codepoints which is useful because C code
# typically contains an ASCII fast path so ASCII characters don't need
# to be considered in the Unicode tables. "NONBMP" excludes codepoints
# above U+FFFF which is useful because such codepoints don't need to be
# supported in standard Ecmascript.
#
import os, sys, math
import optparse
import dukutil
def read_unicode_data(unidata, catsinc, catsexc, filterfunc):
"Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc."
res = []
f = open(unidata, 'rb')
for line in f:
line = line.strip()
parts = line.split(';')
codepoint = parts[0]
category = parts[2]
if filterfunc is not None and not filterfunc(long(codepoint, 16)):
continue
excluded = False
for cat in catsexc:
if category.startswith(cat) or codepoint == cat:
excluded = True
if excluded:
continue
for cat in catsinc:
if category.startswith(cat) or codepoint == cat:
res.append(line)
f.close()
# Sort based on Unicode codepoint
def mycmp(a,b):
t1 = a.split(';')
t2 = b.split(';')
n1 = long(t1[0], 16)
n2 = long(t2[0], 16)
return cmp(n1, n2)
res.sort(cmp=mycmp)
return res
def scan_ranges(lines):
"Scan continuous ranges from (filtered) UnicodeData.txt lines."
ranges = []
range_start = None
prev = None
for line in lines:
t = line.split(';')
n = long(t[0], 16)
if range_start is None:
range_start = n
else:
if n == prev + 1:
# continue range
pass
else:
ranges.append((range_start, prev))
range_start = n
prev = n
if range_start is not None:
ranges.append((range_start, prev))
return ranges
def generate_png(lines, fname):
"Generate an illustrative PNG of the character set."
from PIL import Image
m = {}
for line in lines:
t = line.split(';')
n = long(t[0], 16)
m[n] = 1
codepoints = 0x10ffff + 1
width = int(256)
height = int(math.ceil(float(codepoints) / float(width)))
im = Image.new('RGB', (width, height))
black = (0,0,0)
white = (255,255,255)
for cp in xrange(codepoints):
y = cp / width
x = cp % width
if m.has_key(long(cp)):
im.putpixel((x,y), black)
else:
im.putpixel((x,y), white)
im.save(fname)
def generate_match_table1(ranges):
"Unused match table format."
# This is an earlier match table format which is no longer used.
# IdentifierStart-UnicodeLetter has 445 ranges and generates a
# match table of 2289 bytes.
data = []
prev_re = None
def genrange(rs, re):
if (rs > re):
raise Exception('assumption failed: rs=%d re=%d' % (rs, re))
while True:
now = re - rs + 1
if now > 255:
now = 255
data.append(now) # range now
data.append(0) # skip 0
rs = rs + now
else:
data.append(now) # range now
break
def genskip(ss, se):
if (ss > se):
raise Exception('assumption failed: ss=%d se=%s' % (ss, se))
while True:
now = se - ss + 1
if now > 255:
now = 255
data.append(now) # skip now
data.append(0) # range 0
ss = ss + now
else:
data.append(now) # skip now
break
for rs, re in ranges:
if prev_re is not None:
genskip(prev_re + 1, rs - 1)
genrange(rs, re)
prev_re = re
num_entries = len(data)
# header: start of first range
# num entries
hdr = []
hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over
hdr.append(ranges[0][1] & 0xff)
hdr.append(num_entries >> 8)
hdr.append(num_entries & 0xff)
return hdr + data
def generate_match_table2(ranges):
"Unused match table format."
# Another attempt at a match table which is also unused.
# Total tables for all current classes is now 1472 bytes.
data = []
def enc(x):
while True:
if x < 0x80:
data.append(x)
break
data.append(0x80 + (x & 0x7f))
x = x >> 7
prev_re = 0
for rs, re in ranges:
r1 = rs - prev_re # 1 or above (no unjoined ranges)
r2 = re - rs # 0 or above
enc(r1)
enc(r2)
prev_re = re
enc(0) # end marker
return data
def generate_match_table3(ranges):
"Current match table format."
# Yet another attempt, similar to generate_match_table2 except
# in packing format.
#
# Total match size now (at time of writing): 1194 bytes.
#
# This is the current encoding format used in duk_lexer.c.
be = dukutil.BitEncoder()
freq = [0] * (0x10ffff + 1) # informative
def enc(x):
freq[x] += 1
if x <= 0x0e:
# 4-bit encoding
be.bits(x, 4)
return
x -= 0x0e + 1
if x <= 0xfd:
# 12-bit encoding
be.bits(0x0f, 4)
be.bits(x, 8)
return
x -= 0xfd + 1
if x <= 0xfff:
# 24-bit encoding
be.bits(0x0f, 4)
be.bits(0xfe, 8)
be.bits(x, 12)
return
x -= 0xfff + 1
if True:
# 36-bit encoding
be.bits(0x0f, 4)
be.bits(0xff, 8)
be.bits(x, 24)
return
raise Exception('cannot encode')
prev_re = 0
for rs, re in ranges:
r1 = rs - prev_re # 1 or above (no unjoined ranges)
r2 = re - rs # 0 or above
enc(r1)
enc(r2)
prev_re = re
enc(0) # end marker
data, nbits = be.getBytes(), be.getNumBits()
return data, freq
def main():
parser = optparse.OptionParser()
parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt
parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt
parser.add_option('--include-categories', dest='include_categories')
parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE')
parser.add_option('--out-source', dest='out_source')
parser.add_option('--out-header', dest='out_header')
parser.add_option('--out-png', dest='out_png')
parser.add_option('--table-name', dest='table_name', default='match_table')
(opts, args) = parser.parse_args()
unidata = opts.unicode_data
catsinc = []
if opts.include_categories != '':
catsinc = opts.include_categories.split(',')
catsexc = []
if opts.exclude_categories != 'NONE':
catsexc = opts.exclude_categories.split(',')
print 'CATSEXC: %s' % repr(catsexc)
print 'CATSINC: %s' % repr(catsinc)
# pseudocategories
filter_ascii = ('ASCII' in catsexc)
filter_nonbmp = ('NONBMP' in catsexc)
# Read raw result
def filter1(x):
if filter_ascii and x <= 0x7f:
# exclude ascii
return False
if filter_nonbmp and x >= 0x10000:
# exclude non-bmp
return False
return True
res = read_unicode_data(unidata, catsinc, catsexc, filter1)
# Raw output
print('RAW OUTPUT:')
print('===========')
print('\n'.join(res))
# Scan ranges
print('')
print('RANGES:')
print('=======')
ranges = scan_ranges(res)
for i in ranges:
if i[0] == i[1]:
print('0x%04x' % i[0])
else:
print('0x%04x ... 0x%04x' % (i[0], i[1]))
print('')
print('%d ranges total' % len(ranges))
# Generate match table
print('')
print('MATCH TABLE:')
print('============')
#matchtable1 = generate_match_table1(ranges)
#matchtable2 = generate_match_table2(ranges)
matchtable3, freq = generate_match_table3(ranges)
print 'match table: %s' % repr(matchtable3)
print 'match table length: %d bytes' % len(matchtable3)
print 'encoding freq:'
for i in xrange(len(freq)):
if freq[i] == 0:
continue
print ' %6d: %d' % (i, freq[i])
print('')
print('MATCH C TABLE -> file %s' % repr(opts.out_header))
# Create C source and header files
genc = dukutil.GenerateC()
genc.emitHeader('extract_chars.py')
genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True)
if opts.out_source is not None:
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_chars.py')
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3)))
if opts.out_header is not None:
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
# Image (for illustrative purposes only)
if opts.out_png is not None:
generate_png(res, opts.out_png)
if __name__ == '__main__':
main()

44
src/genbuildparams.py

@ -1,44 +0,0 @@
#!/usr/bin/env python2
#
# Generate build parameter files based on build information.
# A C header is generated for C code, and a JSON file for
# build scripts etc which need to know the build config.
#
import os
import sys
import json
import optparse
import dukutil
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option('--version', dest='version')
parser.add_option('--git-commit', dest='git_commit')
parser.add_option('--git-describe', dest='git_describe')
parser.add_option('--git-branch', dest='git_branch')
parser.add_option('--out-json', dest='out_json')
parser.add_option('--out-header', dest='out_header')
(opts, args) = parser.parse_args()
t = {
'version': opts.version,
'git_commit': opts.git_commit,
'git_describe': opts.git_describe,
'git_branch': opts.git_branch,
}
f = open(opts.out_json, 'wb')
f.write(dukutil.json_encode(t).encode('ascii'))
f.close()
f = open(opts.out_header, 'wb')
f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n')
f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n')
f.write('/* automatically generated by genbuildparams.py, do not edit */\n')
f.write('\n')
f.write('/* DUK_VERSION is defined in duktape.h */')
f.write('\n')
f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n')
f.close()

2983
src/genbuiltins.py

File diff suppressed because it is too large

57
src/genequivyear.py

@ -1,57 +0,0 @@
#!/usr/bin/env python2
#
# Generate equivalent year table needed by duk_bi_date.c. Based on:
#
# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146
#
import datetime
import pytz
def isleapyear(year):
if (year % 4) != 0:
return False
if (year % 100) != 0:
return True
if (year % 400) != 0:
return False
return True
def eqyear(weekday, isleap):
# weekday: 0=Sunday, 1=Monday, ...
if isleap:
recent_year = 1956
else:
recent_year = 1967
recent_year += (weekday * 12) % 28
year = 2008 + (recent_year + 3 * 28 - 2008) % 28
# some assertions
#
# Note that Ecmascript internal weekday (0=Sunday) matches neither
# Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday).
# Python isoweekday() % 7 matches the Ecmascript weekday.
# https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday
dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC
#print(weekday, isleap, year, dt.isoweekday(), isleapyear(year))
#print(repr(dt))
#print(dt.isoformat())
if isleap != isleapyear(year):
raise Exception('internal error: equivalent year does not have same leap-year-ness')
pass
if weekday != dt.isoweekday() % 7:
raise Exception('internal error: equivalent year does not begin with the same weekday')
pass
return year
def main():
for i in xrange(14):
print(eqyear(i % 7, i >= 7))
if __name__ == '__main__':
main()

164
src/genexesizereport.py

@ -1,164 +0,0 @@
#!/usr/bin/env python2
#
# Generate a size report from a Duktape library / executable.
# Write out useful information about function sizes in a variety
# of forms.
#
import os
import sys
import re
import subprocess
#000000000040d200 <duk_to_hstring>:
# 40d200: 55 push %rbp
# 40d201: 89 f5 mov %esi,%ebp
re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$')
re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$')
def objdump(filename):
proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE)
curr_func = None
func_start = None
func_end = None
ret = {}
def storeFunc():
if curr_func is None or func_start is None or func_end is None:
return
ret[curr_func] = {
'name': curr_func,
'start': func_start,
'end': func_end, # exclusive
'length': func_end - func_start
}
for line in proc.stdout:
line = line.strip()
m = re_funcstart.match(line)
if m is not None:
if curr_func is not None:
storeFunc()
curr_func = m.group(1)
func_start = None
func_end = None
m = re_codeline.match(line)
if m is not None:
func_addr = long(m.group(1), 16)
func_bytes = m.group(2)
func_nbytes = len(func_bytes.split(' '))
func_instr = m.group(3)
if func_start is None:
func_start = func_addr
func_end = func_addr + func_nbytes
storeFunc()
return ret
def filterFuncs(funcs):
todo = [] # avoid mutation while iterating
def accept(fun):
n = fun['name']
if n in [ '.comment',
'.dynstr',
'.dynsym',
'.eh_frame_hdr',
'.interp',
'.rela.dyn',
'.rela.plt',
'_DYNAMIC',
'_GLOBAL_OFFSET_TABLE_',
'_IO_stdin_used',
'__CTOR_LIST__',
'__DTOR_LIST__',
'_fini',
'_init',
'_start',
'' ]:
return False
for pfx in [ '.debug', '.gnu', '.note',
'__FRAME_', '__' ]:
if n.startswith(pfx):
return False
return True
for k in funcs.keys():
if not accept(funcs[k]):
todo.append(k)
for k in todo:
del funcs[k]
def main():
funcs = objdump(sys.argv[1])
filterFuncs(funcs)
funcs_keys = funcs.keys()
funcs_keys.sort()
combined_size_all = 0
combined_size_duk = 0
for k in funcs_keys:
fun = funcs[k]
combined_size_all += fun['length']
if fun['name'].startswith('duk_'):
combined_size_duk += fun['length']
f = sys.stdout
f.write('<html>')
f.write('<head>')
f.write('<title>Size dump for %s</title>' % sys.argv[1])
f.write("""\
<style type="text/css">
tr:nth-child(2n) {
background: #eeeeee;
}
tr:nth-child(2n+1) {
background: #dddddd;
}
</style>
""")
f.write('</head>')
f.write('<body>')
f.write('<h1>Summary</h1>')
f.write('<table>')
f.write('<tr><td>Entries</td><td>%d</td></tr>' % len(funcs_keys))
f.write('<tr><td>Combined size (all)</td><td>%d</td></tr>' % combined_size_all)
f.write('<tr><td>Combined size (duk_*)</td><td>%d</td></tr>' % combined_size_duk)
f.write('</table>')
f.write('<h1>Sorted by function name</h1>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Bytes</th></tr>')
funcs_keys = funcs.keys()
funcs_keys.sort()
for k in funcs_keys:
fun = funcs[k]
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length']))
f.write('</table>')
f.write('<h1>Sorted by size</h1>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Bytes</th></tr>')
funcs_keys = funcs.keys()
def cmpSize(a,b):
return cmp(funcs[a]['length'], funcs[b]['length'])
funcs_keys.sort(cmp=cmpSize)
for k in funcs_keys:
fun = funcs[k]
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length']))
f.write('</table>')
f.write('</body>')
f.write('</html>')
if __name__ == '__main__':
main()

124
src/genhashsizes.py

@ -1,124 +0,0 @@
#!/usr/bin/env python2
#
# Find a sequence of duk_hobject hash sizes which have a desired 'ratio'
# and are primes. Prime hash sizes ensure that all probe sequence values
# (less than hash size) are relatively prime to hash size, i.e. cover the
# entire hash. Prime data is packed into about 1 byte/prime using a
# prediction-correction model.
#
# Also generates a set of probe steps which are relatively prime to every
# hash size.
import sys
import math
def is_prime(n):
if n == 0:
return False
if n == 1 or n == 2:
return True
n_limit = int(math.ceil(float(n) ** 0.5)) + 1
n_limit += 100 # paranoia
if n_limit >= n:
n_limit = n - 1
for i in xrange(2,n_limit + 1):
if (n % i) == 0:
return False
return True
def next_prime(n):
while True:
n += 1
if is_prime(n):
return n
def generate_sizes(min_size, max_size, step_ratio):
"Generate a set of hash sizes following a nice ratio."
sizes = []
ratios = []
curr = next_prime(min_size)
next = curr
sizes.append(curr)
step_ratio = float(step_ratio) / 1024
while True:
if next > max_size:
break
ratio = float(next) / float(curr)
if ratio < step_ratio:
next = next_prime(next)
continue
sys.stdout.write('.'); sys.stdout.flush()
sizes.append(next)
ratios.append(ratio)
curr = next
next = next_prime(int(next * step_ratio))
sys.stdout.write('\n'); sys.stdout.flush()
return sizes, ratios
def generate_corrections(sizes, step_ratio):
"Generate a set of correction from a ratio-based predictor."
# Generate a correction list for size list, assuming steps follow a certain
# ratio; this allows us to pack size list into one byte per size
res = []
res.append(sizes[0]) # first entry is first size
for i in xrange(1, len(sizes)):
prev = sizes[i - 1]
pred = int(prev * step_ratio) >> 10
diff = int(sizes[i] - pred)
res.append(diff)
if diff < 0 or diff > 127:
raise Exception('correction does not fit into 8 bits')
res.append(-1) # negative denotes last end of list
return res
def generate_probes(count, sizes):
res = []
# Generate probe values which are guaranteed to be relatively prime to
# all generated hash size primes. These don't have to be primes, but
# we currently use smallest non-conflicting primes here.
i = 2
while len(res) < count:
if is_prime(i) and (i not in sizes):
if i > 255:
raise Exception('probe step does not fit into 8 bits')
res.append(i)
i += 1
continue
i += 1
return res
# NB: these must match duk_hobject defines and code
step_ratio = 1177 # approximately (1.15 * (1 << 10))
min_size = 16
max_size = 2**32 - 1
sizes, ratios = generate_sizes(min_size, max_size, step_ratio)
corrections = generate_corrections(sizes, step_ratio)
probes = generate_probes(32, sizes)
print len(sizes)
print 'SIZES: ' + repr(sizes)
print 'RATIOS: ' + repr(ratios)
print 'CORRECTIONS: ' + repr(corrections)
print 'PROBES: ' + repr(probes)
# highest 32-bit prime
i = 2**32
while True:
i -= 1
if is_prime(i):
print 'highest 32-bit prime is: %d (0x%08x)' % (i, i)
break

73
src/genobjsizereport.py

@ -1,73 +0,0 @@
#!/usr/bin/env python2
#
# Size report of (stripped) object and source files.
#
import os
import sys
def getsize(fname):
return os.stat(fname).st_size
def getlines(fname):
f = None
try:
f = open(fname, 'rb')
lines = f.read().split('\n')
return len(lines)
finally:
if f is not None:
f.close()
f = None
def process(srcfile, objfile):
srcsize = getsize(srcfile)
srclines = getlines(srcfile)
srcbpl = float(srcsize) / float(srclines)
objsize = getsize(objfile)
objbpl = float(objsize) / float(srclines)
return objsize, objbpl, srcsize, srclines, srcbpl
def main():
tot_srcsize = 0
tot_srclines = 0
tot_objsize = 0
tmp = []
for i in sys.argv[1:]:
objfile = i
if i.endswith('.strip'):
objname = i[:-6]
else:
objname = i
base, ext = os.path.splitext(objname)
srcfile = base + '.c'
objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile)
srcbase = os.path.basename(srcfile)
objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o
tot_srcsize += srcsize
tot_srclines += srclines
tot_objsize += objsize
tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl))
def mycmp(a,b):
return cmp(a[5], b[5])
tmp.sort(cmp=mycmp, reverse=True) # sort by object size
fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f'
for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp:
print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl))
print('========================================================================')
print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines),
'', tot_objsize, float(tot_objsize) / float(tot_srclines)))
if __name__ == '__main__':
# Usage:
#
# $ strip *.o
# $ python genobjsizereport.py *.o
main()

37
src/prepare_unicode_data.py

@ -1,37 +0,0 @@
#!/usr/bin/env python2
#
# UnicodeData.txt may contain ranges in addition to individual characters.
# Unpack the ranges into individual characters for the other scripts to use.
#
import os
import sys
def main():
f_in = open(sys.argv[1], 'rb')
f_out = open(sys.argv[2], 'wb')
while True:
line = f_in.readline()
if line == '' or line == '\n':
break
parts = line.split(';') # keep newline
if parts[1].endswith('First>'):
line2 = f_in.readline()
parts2 = line2.split(';')
if not parts2[1].endswith('Last>'):
raise Exception('cannot parse range')
cp1 = long(parts[0], 16)
cp2 = long(parts2[0], 16)
for i in xrange(cp1, cp2 + 1): # inclusive
parts[0] = '%04X' % i
f_out.write(';'.join(parts))
else:
f_out.write(line)
f_in.close()
f_out.flush()
f_out.close()
if __name__ == '__main__':
main()

56
src/scan_used_stridx_bidx.py

@ -1,56 +0,0 @@
#!/usr/bin/env python2
#
# Scan Duktape code base for references to built-in strings and built-in
# objects, i.e. for:
#
# - Strings which will need DUK_STRIDX_xxx constants and a place in the
# thr->strs[] array.
#
# - Objects which will need DUK_BIDX_xxx constants and a place in the
# thr->builtins[] array.
#
import os
import sys
import re
import json
re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE)
re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE)
re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE)
re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE)
def main():
str_defs = {}
obj_defs = {}
for fn in sys.argv[1:]:
with open(fn, 'rb') as f:
d = f.read()
for m in re.finditer(re_str_stridx, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_str_heap, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_str_hthread, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_obj_bidx, d):
obj_defs[m.group(1)] = True
str_used = []
for k in sorted(str_defs.keys()):
str_used.append('DUK_STRIDX_' + k)
obj_used = []
for k in sorted(obj_defs.keys()):
obj_used.append('DUK_BIDX_' + k)
doc = {
'used_stridx_defines': str_used,
'used_bidx_defines': obj_used,
'count_used_stridx_defines': len(str_used),
'count_used_bidx_defines': len(obj_used)
}
print(json.dumps(doc, indent=4))
if __name__ == '__main__':
main()

257
tools/combine_src.py

@ -0,0 +1,257 @@
#!/usr/bin/env python2
#
# Combine a set of a source files into a single C file.
#
# Overview of the process:
#
# * Parse user supplied C files. Add automatic #undefs at the end
# of each C file to avoid defined bleeding from one file to another.
#
# * Combine the C files in specified order. If sources have ordering
# dependencies (depends on application), order may matter.
#
# * Process #include statements in the combined source, categorizing
# them either as "internal" (found in specified include path) or
# "external". Internal includes, unless explicitly excluded, are
# inlined into the result while extenal includes are left as is.
# Duplicate #include statements are replaced with a comment.
#
# At every step, source and header lines are represented with explicit
# line objects which keep track of original filename and line. The
# output contains #line directives, if necessary, to ensure error
# throwing and other diagnostic info will work in a useful manner when
# deployed. It's also possible to generate a combined source with no
# #line directives.
#
# Making the process deterministic is important, so that if users have
# diffs that they apply to the combined source, such diffs would apply
# for as long as possible.
#
# Limitations and notes:
#
# * While there are automatic #undef's for #define's introduced in each
# C file, it's not possible to "undefine" structs, unions, etc. If
# there are structs/unions/typedefs with conflicting names, these
# have to be resolved in the source files first.
#
# * Because duplicate #include statements are suppressed, currently
# assumes #include statements are not conditional.
#
# * A system header might be #include'd in multiple source files with
# different feature defines (like _BSD_SOURCE). Because the #include
# file will only appear once in the resulting source, the first
# occurrence wins. The result may not work correctly if the feature
# defines must actually be different between two or more source files.
#
import os
import sys
import re
import json
import optparse
# Include path for finding include files which are amalgamated.
include_paths = []
# Include files specifically excluded from being inlined.
include_excluded = []
class File:
filename_full = None
filename = None
lines = None
def __init__(self, filename, lines):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lines = lines
class Line:
filename_full = None
filename = None
lineno = None
data = None
def __init__(self, filename, lineno, data):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lineno = lineno
self.data = data
def readFile(filename):
lines = []
with open(filename, 'rb') as f:
lineno = 0
for line in f:
lineno += 1
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
lines.append(Line(filename, lineno, line))
return File(filename, lines)
def lookupInclude(incfn):
re_sep = re.compile(r'/|\\')
inccomp = re.split(re_sep, incfn) # split include path, support / and \
for path in include_paths:
fn = apply(os.path.join, [ path ] + inccomp)
if os.path.exists(fn):
return fn # Return full path to first match
return None
def addAutomaticUndefs(f):
defined = {}
re_def = re.compile(r'#define\s+(\w+).*$')
re_undef = re.compile(r'#undef\s+(\w+).*$')
for line in f.lines:
m = re_def.match(line.data)
if m is not None:
#print('DEFINED: %s' % repr(m.group(1)))
defined[m.group(1)] = True
m = re_undef.match(line.data)
if m is not None:
# Could just ignore #undef's here: we'd then emit
# reliable #undef's (though maybe duplicates) at
# the end.
#print('UNDEFINED: %s' % repr(m.group(1)))
if defined.has_key(m.group(1)):
del defined[m.group(1)]
# Undefine anything that seems to be left defined. This not a 100%
# process because some #undef's might be conditional which we don't
# track at the moment. Note that it's safe to #undef something that's
# not defined.
keys = sorted(defined.keys()) # deterministic order
if len(keys) > 0:
#print('STILL DEFINED: %r' % repr(defined.keys()))
f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
for k in keys:
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))
def createCombined(files, prologue_filename, line_directives):
res = []
line_map = [] # indicate combined source lines where uncombined file/line would change
metadata = {
'line_map': line_map
}
emit_state = [ None, None ] # curr_filename, curr_lineno
def emit(line):
if isinstance(line, (str, unicode)):
res.append(line)
emit_state[1] += 1
else:
if line.filename != emit_state[0] or line.lineno != emit_state[1]:
if line_directives:
res.append('#line %d "%s"' % (line.lineno, line.filename))
line_map.append({ 'original_file': line.filename,
'original_line': line.lineno,
'combined_line': len(res) + 1 })
res.append(line.data)
emit_state[0] = line.filename
emit_state[1] = line.lineno + 1
included = {} # headers already included
if prologue_filename is not None:
with open(prologue_filename, 'rb') as f:
for line in f.read().split('\n'):
res.append(line)
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')
# Process a file, appending it to the result; the input may be a
# source or an include file. #include directives are handled
# recursively.
def processFile(f):
#print('Process file: ' + f.filename)
for line in f.lines:
if not line.data.startswith('#include'):
emit(line)
continue
m = re_inc.match(line.data)
if m is None:
raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
incpath = m.group(2)
if incpath in include_excluded:
# Specific include files excluded from the
# inlining / duplicate suppression process.
emit(line) # keep as is
continue
if included.has_key(incpath):
# We suppress duplicate includes, both internal and
# external, based on the assumption that includes are
# not behind #ifdef checks. This is the case for
# Duktape (except for the include files excluded).
emit('/* #include %s -> already included */' % incpath)
continue
included[incpath] = True
# An include file is considered "internal" and is amalgamated
# if it is found in the include path provided by the user.
incfile = lookupInclude(incpath)
if incfile is not None:
#print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
emit('/* #include %s */' % incpath)
processFile(readFile(incfile))
else:
#print('Include considered external: %s' % repr(line.data))
emit(line) # keep as is
for f in files:
processFile(f)
return '\n'.join(res) + '\n', metadata
def main():
global include_paths, include_excluded
parser = optparse.OptionParser()
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
parser.add_option('--output-source', dest='output_source', help='Output source filename')
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
(opts, args) = parser.parse_args()
assert(opts.include_paths is not None)
include_paths = opts.include_paths # global for easy access
include_excluded = opts.include_excluded
assert(opts.output_source)
assert(opts.output_metadata)
print('Read input files, add automatic #undefs')
sources = args
files = []
for fn in sources:
res = readFile(fn)
#print('Add automatic undefs for: ' + fn)
addAutomaticUndefs(res)
files.append(res)
print('Create combined source file from %d source files' % len(files))
combined_source, metadata = \
createCombined(files, opts.prologue, opts.line_directives)
with open(opts.output_source, 'wb') as f:
f.write(combined_source)
with open(opts.output_metadata, 'wb') as f:
f.write(json.dumps(metadata, indent=4))
print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source))
if __name__ == '__main__':
main()

246
tools/create_spdx_license.py

@ -0,0 +1,246 @@
#!/usr/bin/env python2
#
# Helper to create an SPDX license file (http://spdx.org)
#
# This must be executed when the dist/ directory is otherwise complete,
# except for the SPDX license, so that the file lists and such contained
# in the SPDX license will be correct.
#
# The utility outputs RDF/XML to specified file:
#
# $ python create_spdx_license.py /tmp/license.spdx
#
# Then, validate with SPDXViewer and SPDXTools:
#
# $ java -jar SPDXViewer.jar /tmp/license.spdx
# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html
#
# Finally, copy to dist:
#
# $ cp /tmp/license.spdx dist/license.spdx
#
# SPDX FAQ indicates there is no standard extension for an SPDX license file
# but '.spdx' is a common practice.
#
# The algorithm to compute a "verification code", implemented in this file,
# can be verified as follows:
#
# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz
# $ cd /tmp
# $ tar xvfJ duktape-N.N.N.tar.xz
# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code
# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/
#
# Compare the resulting verification code manually with the one in license.spdx.
#
# Resources:
#
# - http://spdx.org/about-spdx/faqs
# - http://wiki.spdx.org/view/Technical_Team/Best_Practices
#
import os
import sys
import re
import datetime
import sha
import rdflib
from rdflib import URIRef, BNode, Literal, Namespace
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
XSD = Namespace('http://www.w3.org/2001/XMLSchema#')
SPDX = Namespace('http://spdx.org/rdf/terms#')
DOAP = Namespace('http://usefulinc.com/ns/doap#')
DUKTAPE = Namespace('http://duktape.org/rdf/terms#')
def checksumFile(g, filename):
f = open(filename, 'rb')
d = f.read()
f.close()
shasum = sha.sha(d).digest().encode('hex').lower()
csum_node = BNode()
g.add((csum_node, RDF.type, SPDX.Checksum))
g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1))
g.add((csum_node, SPDX.checksumValue, Literal(shasum)))
return csum_node
def computePackageVerification(g, dirname, excluded):
# SPDX 1.2 Section 4.7
# The SPDXTools command "GenerateVerificationCode" can be used to
# check the verification codes created. Note that you must manually
# remove "license.spdx" from the unpacked dist directory before
# computing the verification code.
verify_node = BNode()
hashes = []
for dirpath, dirnames, filenames in os.walk(dirname):
for fn in filenames:
full_fn = os.path.join(dirpath, fn)
f = open(full_fn, 'rb')
d = f.read()
f.close()
if full_fn in excluded:
#print('excluded in verification: ' + full_fn)
continue
#print('included in verification: ' + full_fn)
file_sha1 = sha.sha(d).digest().encode('hex').lower()
hashes.append(file_sha1)
#print(repr(hashes))
hashes.sort()
#print(repr(hashes))
verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower()
for fn in excluded:
g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn)))
g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code)))
return verify_node
def fileType(filename):
ign, ext = os.path.splitext(filename)
if ext in [ '.c', '.h', '.js' ]:
return SPDX.fileType_source
else:
return SPDX.fileType_other
def getDuktapeVersion():
f = open('./src/duktape.h')
re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$')
for line in f:
line = line.strip()
m = re_ver.match(line)
if m is None:
continue
ver = int(m.group(1))
return '%d.%d.%d' % ((ver / 10000) % 100,
(ver / 100) % 100,
ver % 100)
raise Exception('could not figure out Duktape version')
def main():
outfile = sys.argv[1]
if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'):
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built')
sys.exit(1)
os.chdir('dist')
if not os.path.exists('Makefile.cmdline'):
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built')
sys.exit(1)
duktape_version = getDuktapeVersion()
duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz'
now = datetime.datetime.utcnow()
now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second)
creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime)
duktape_org = Literal('Organization: duktape.org')
mit_license = URIRef('http://spdx.org/licenses/MIT')
duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)')
g = rdflib.Graph()
crea_node = BNode()
g.add((crea_node, RDF.type, SPDX.CreationInfo))
g.add((crea_node, RDFS.comment, Literal('')))
g.add((crea_node, SPDX.creator, duktape_org))
g.add((crea_node, SPDX.created, creation_date))
g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/
# 'name' should not include a version number (see best practices)
pkg_node = BNode()
g.add((pkg_node, RDF.type, SPDX.Package))
g.add((pkg_node, SPDX.name, Literal('Duktape')))
g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version)))
g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname)))
g.add((pkg_node, SPDX.supplier, duktape_org))
g.add((pkg_node, SPDX.originator, duktape_org))
g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI)))
g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI)))
verify_node = computePackageVerification(g, '.', [ './license.spdx' ])
g.add((pkg_node, SPDX.packageVerificationCode, verify_node))
# SPDX.checksum: omitted because license is inside the package
g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.')))
# NOTE: MIT license alone is sufficient for now, because Duktape, Lua,
# Murmurhash2, and CommonJS (though probably not even relevant for
# licensing) are all MIT.
g.add((pkg_node, SPDX.licenseConcluded, mit_license))
g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license))
g.add((pkg_node, SPDX.licenseDeclared, mit_license))
g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.')))
g.add((pkg_node, SPDX.copyrightText, duktape_copyright))
g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter')))
g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint')))
# hasFile properties added separately below
#reviewed_node = BNode()
#g.add((reviewed_node, RDF.type, SPDX.Review))
#g.add((reviewed_node, SPDX.reviewer, XXX))
#g.add((reviewed_node, SPDX.reviewDate, XXX))
#g.add((reviewed_node, RDFS.comment, ''))
spdx_doc = BNode()
g.add((spdx_doc, RDF.type, SPDX.SpdxDocument))
g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2')))
g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0')))
g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version)))
g.add((spdx_doc, SPDX.creationInfo, crea_node))
g.add((spdx_doc, SPDX.describesPackage, pkg_node))
# SPDX.hasExtractedLicensingInfo
# SPDX.reviewed
# SPDX.referencesFile: added below
for dirpath, dirnames, filenames in os.walk('.'):
for fn in filenames:
full_fn = os.path.join(dirpath, fn)
#print('# file: ' + full_fn)
file_node = BNode()
g.add((file_node, RDF.type, SPDX.File))
g.add((file_node, SPDX.fileName, Literal(full_fn)))
g.add((file_node, SPDX.fileType, fileType(full_fn)))
g.add((file_node, SPDX.checksum, checksumFile(g, full_fn)))
# Here we assume that LICENSE.txt provides the actual "in file"
# licensing information, and everything else is implicitly under
# MIT license.
g.add((file_node, SPDX.licenseConcluded, mit_license))
if full_fn == './LICENSE.txt':
g.add((file_node, SPDX.licenseInfoInFile, mit_license))
else:
g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none)))
# SPDX.licenseComments
g.add((file_node, SPDX.copyrightText, duktape_copyright))
# SPDX.noticeText
# SPDX.artifactOf
# SPDX.fileDependency
# SPDX.fileContributor
# XXX: should referencesFile include all files?
g.add((spdx_doc, SPDX.referencesFile, file_node))
g.add((pkg_node, SPDX.hasFile, file_node))
# Serialize into RDF/XML directly. We could also serialize into
# N-Triples and use external tools (like 'rapper') to get cleaner,
# abbreviated output.
#print('# Duktape SPDX license file (autogenerated)')
#print(g.serialize(format='turtle'))
#print(g.serialize(format='nt'))
f = open(outfile, 'wb')
#f.write(g.serialize(format='rdf/xml'))
f.write(g.serialize(format='xml'))
f.close()
if __name__ == '__main__':
main()

49
tools/duk_meta_to_strarray.py

@ -0,0 +1,49 @@
#!/usr/bin/env python2
#
# Create an array of C strings with Duktape built-in strings.
# Useful when using external strings.
#
import os
import sys
import json
def to_c_string(x):
res = '"'
term = False
for i, c in enumerate(x):
if term:
term = False
res += '" "'
o = ord(c)
if o < 0x20 or o > 0x7e or c in '\'"\\':
# Terminate C string so that escape doesn't become
# ambiguous
res += '\\x%02x' % o
term = True
else:
res += c
res += '"'
return res
def main():
f = open(sys.argv[1], 'rb')
d = f.read()
f.close()
meta = json.loads(d)
print('const char *duk_builtin_strings[] = {')
strlist = meta['builtin_strings_base64']
for i in xrange(len(strlist)):
s = strlist[i]
if i == len(strlist) - 1:
print(' %s' % to_c_string(s.decode('base64')))
else:
print(' %s,' % to_c_string(s.decode('base64')))
print('};')
if __name__ == '__main__':
main()

259
tools/dukutil.py

@ -0,0 +1,259 @@
#!/usr/bin/env python2
#
# Python utilities shared by the build scripts.
#
import datetime
import json
class BitEncoder:
"Bitstream encoder."
_bits = None
def __init__(self):
self._bits = []
def bits(self, x, nbits):
if (x >> nbits) != 0:
raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits))
for shift in xrange(nbits - 1, -1, -1): # nbits - 1, nbits - 2, ..., 0
self._bits.append((x >> shift) & 0x01)
def string(self, x):
for i in xrange(len(x)):
ch = ord(x[i])
for shift in xrange(7, -1, -1): # 7, 6, ..., 0
self._bits.append((ch >> shift) & 0x01)
def getNumBits(self):
"Get current number of encoded bits."
return len(self._bits)
def getNumBytes(self):
"Get current number of encoded bytes, rounded up."
nbits = len(self._bits)
while (nbits % 8) != 0:
nbits += 1
return nbits / 8
def getBytes(self):
"Get current bitstream as a byte sequence, padded with zero bits."
bytes = []
for i in xrange(self.getNumBytes()):
t = 0
for j in xrange(8):
off = i*8 + j
if off >= len(self._bits):
t = (t << 1)
else:
t = (t << 1) + self._bits[off]
bytes.append(t)
return bytes
def getByteString(self):
"Get current bitstream as a string."
return ''.join([chr(i) for i in self.getBytes()])
class GenerateC:
"Helper for generating C source and header files."
_data = None
wrap_col = 76
def __init__(self):
self._data = []
def emitRaw(self, text):
"Emit raw text (without automatic newline)."
self._data.append(text)
def emitLine(self, text):
"Emit a raw line (with automatic newline)."
self._data.append(text + '\n')
def emitHeader(self, autogen_by):
"Emit file header comments."
# Note: a timestamp would be nice but it breaks incremental building
self.emitLine('/*')
self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by)
self.emitLine(' */')
self.emitLine('')
def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True):
"Emit an array as a C array."
# lenient input
if isinstance(data, unicode):
data = data.encode('utf-8')
if isinstance(data, str):
tmp = []
for i in xrange(len(data)):
tmp.append(ord(data[i]))
data = tmp
size_spec = ''
if size is not None:
size_spec = '%d' % size
visib_qual = ''
if visibility is not None:
visib_qual = visibility + ' '
const_qual = ''
if const:
const_qual = 'const '
self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec))
line = ''
for i in xrange(len(data)):
if intvalues:
suffix = ''
if data[i] < -32768 or data[i] > 32767:
suffix = 'L'
t = "%d%s," % (data[i], suffix)
else:
t = "(%s)'\\x%02x', " % (typename, data[i])
if len(line) + len(t) >= self.wrap_col:
self.emitLine(line)
line = t
else:
line += t
if line != '':
self.emitLine(line)
self.emitLine('};')
def emitDefine(self, name, value, comment=None):
"Emit a C define with an optional comment."
# XXX: there is no escaping right now (for comment or value)
if comment is not None:
self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment))
else:
self.emitLine('#define %-60s %s' % (name, value))
def getString(self):
"Get the entire file as a string."
return ''.join(self._data)
def json_encode(x):
"JSON encode a value."
try:
return json.dumps(x)
except AttributeError:
pass
# for older library versions
return json.write(x)
def json_decode(x):
"JSON decode a value."
try:
return json.loads(x)
except AttributeError:
pass
# for older library versions
return json.read(x)
# Compute a byte hash identical to duk_util_hashbytes().
DUK__MAGIC_M = 0x5bd1e995
DUK__MAGIC_R = 24
def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian):
h = (str_seed ^ nbytes) & 0xffffffff
while nbytes >= 4:
# 4-byte fetch byte order:
# - native (endian dependent) if unaligned accesses allowed
# - little endian if unaligned accesses not allowed
if big_endian:
k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \
(ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24)
else:
k = ord(x[off]) + (ord(x[off + 1]) << 8) + \
(ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24)
k = (k * DUK__MAGIC_M) & 0xffffffff
k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff
k = (k * DUK__MAGIC_M) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ k) & 0xffffffff
off += 4
nbytes -= 4
if nbytes >= 3:
h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff
if nbytes >= 2:
h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff
if nbytes >= 1:
h = (h ^ ord(x[off])) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ (h >> 13)) & 0xffffffff
h = (h * DUK__MAGIC_M) & 0xffffffff
h = (h ^ (h >> 15)) & 0xffffffff
return h
# Compute a string hash identical to duk_heap_hashstring() when dense
# hashing is enabled.
DUK__STRHASH_SHORTSTRING = 4096
DUK__STRHASH_MEDIUMSTRING = 256 * 1024
DUK__STRHASH_BLOCKSIZE = 256
def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False):
str_seed = (hash_seed ^ len(x)) & 0xffffffff
if len(x) <= DUK__STRHASH_SHORTSTRING:
res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian)
else:
if len(x) <= DUK__STRHASH_MEDIUMSTRING:
skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
else:
skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian)
off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256
while off < len(x):
left = len(x) - off
now = left
if now > DUK__STRHASH_BLOCKSIZE:
now = DUK__STRHASH_BLOCKSIZE
res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff
off += skip
if strhash16:
res &= 0xffff
return res
# Compute a string hash identical to duk_heap_hashstring() when sparse
# hashing is enabled.
DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value
def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False):
res = (hash_seed ^ len(x)) & 0xffffffff
step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1
off = len(x)
while off >= step:
assert(off >= 1)
res = ((res * 33) + ord(x[off - 1])) & 0xffffffff
off -= step
if strhash16:
res &= 0xffff
return res
# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length().
def duk_unicode_unvalidated_utf8_length(x):
assert(isinstance(x, str))
clen = 0
for c in x:
t = ord(c)
if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted
clen += 1
return clen

130
tools/dump_bytecode.py

@ -0,0 +1,130 @@
#!/usr/bin/env python2
#
# Utility to dump bytecode into a human readable form.
#
import os
import sys
import struct
import optparse
def decode_string(buf, off):
strlen, = struct.unpack('>L', buf[off:off+4])
off += 4
strdata = buf[off:off+strlen]
off += strlen
return off, strdata
def sanitize_string(val):
# Don't try to UTF-8 decode, just escape non-printable ASCII.
def f(c):
if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"':
return '\\x%02x' % ord(c)
else:
return c
return "'" + ''.join(map(f, val)) + "'"
def decode_sanitize_string(buf, off):
off, val = decode_string(buf, off)
return off, sanitize_string(val)
def dump_function(buf, off, ind):
count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12])
off += 12
print '%sInstructions: %d' % (ind, count_inst)
print '%sConstants: %d' % (ind, count_const)
print '%sInner functions: %d' % (ind, count_funcs)
nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12])
off += 12
print '%sNregs: %d' % (ind, nregs)
print '%sNargs: %d' % (ind, nargs)
print '%sStart line number: %d' % (ind, start_line)
print '%sEnd line number: %d' % (ind, end_line)
compfunc_flags, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags)
for i in xrange(count_inst):
ins, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s %06d: %08lx' % (ind, i, ins)
print '%sConstants:' % ind
for i in xrange(count_const):
const_type, = struct.unpack('B', buf[off:off+1])
off += 1
if const_type == 0x00:
off, strdata = decode_sanitize_string(buf, off)
print '%s %06d: %s' % (ind, i, strdata)
elif const_type == 0x01:
num, = struct.unpack('>d', buf[off:off+8])
off += 8
print '%s %06d: %f' % (ind, i, num)
else:
raise Exception('invalid constant type: %d' % const_type)
for i in xrange(count_funcs):
print '%sInner function %d:' % (ind, i)
off = dump_function(buf, off, ind + ' ')
val, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s.length: %d' % (ind, val)
off, val = decode_sanitize_string(buf, off)
print '%s.name: %s' % (ind, val)
off, val = decode_sanitize_string(buf, off)
print '%s.fileName: %s' % (ind, val)
off, val = decode_string(buf, off) # actually a buffer
print '%s._Pc2line: %s' % (ind, val.encode('hex'))
while True:
off, name = decode_string(buf, off)
if name == '':
break
name = sanitize_string(name)
val, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s_Varmap[%s] = %d' % (ind, name, val)
idx = 0
while True:
off, name = decode_string(buf, off)
if name == '':
break
name = sanitize_string(name)
print '%s_Formals[%d] = %s' % (ind, idx, name)
idx += 1
return off
def dump_bytecode(buf, off, ind):
sig, ver = struct.unpack('BB', buf[off:off+2])
off += 2
if sig != 0xff:
raise Exception('invalid signature byte: %d' % sig)
if ver != 0x00:
raise Exception('unsupported bytecode version: %d' % ver)
print '%sBytecode version: 0x%02x' % (ind, ver)
off = dump_function(buf, off, ind + ' ')
return off
def main():
parser = optparse.OptionParser()
parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump')
(opts, args) = parser.parse_args()
with open(args[0], 'rb') as f:
d = f.read()
if opts.hex_decode:
d = d.strip()
d = d.decode('hex')
dump_bytecode(d, 0, '')
if __name__ == '__main__':
main()

444
tools/extract_caseconv.py

@ -0,0 +1,444 @@
#!/usr/bin/env python2
#
# Extract rules for Unicode case conversion, specifically the behavior
# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The
# bitstream encoded rules are used for the slow path at run time, so
# compactness is favored over speed.
#
# There is no support for context or locale sensitive rules, as they
# are handled directly in C code before consulting tables generated
# here. Ecmascript requires case conversion both with and without
# locale/language specific rules (e.g. String.prototype.toLowerCase()
# and String.prototype.toLocaleLowerCase()), so they are best handled
# in C anyway.
#
# Case conversion rules for ASCII are also excluded as they are
# handled by C fast path. Rules for non-BMP characters (codepoints
# above U+FFFF) are omitted as they're not required for standard
# Ecmascript.
#
import os, sys, math
import optparse
import dukutil
class UnicodeData:
"Read UnicodeData.txt into an internal representation."
def __init__(self, filename):
self.data = self.read_unicode_data(filename)
print 'read %d unicode data entries' % len(self.data)
def read_unicode_data(self, filename):
res = []
f = open(filename, 'rb')
for line in f:
if line.startswith('#'):
continue
line = line.strip()
if line == '':
continue
parts = line.split(';')
if len(parts) != 15:
raise Exception('invalid unicode data line')
res.append(parts)
f.close()
# Sort based on Unicode codepoint
def mycmp(a,b):
return cmp(long(a[0], 16), long(b[0], 16))
res.sort(cmp=mycmp)
return res
class SpecialCasing:
"Read SpecialCasing.txt into an internal representation."
def __init__(self, filename):
self.data = self.read_special_casing_data(filename)
print 'read %d special casing entries' % len(self.data)
def read_special_casing_data(self, filename):
res = []
f = open(filename, 'rb')
for line in f:
try:
idx = line.index('#')
line = line[:idx]
except ValueError:
pass
line = line.strip()
if line == '':
continue
parts = line.split(';')
parts = [i.strip() for i in parts]
while len(parts) < 6:
parts.append('')
res.append(parts)
f.close()
return res
def parse_unicode_sequence(x):
res = ''
for i in x.split(' '):
i = i.strip()
if i == '':
continue
res += unichr(long(i, 16))
return res
def get_base_conversion_maps(unicode_data):
"Create case conversion tables without handling special casing yet."
uc = {} # codepoint (number) -> string
lc = {}
tc = {} # titlecase
for x in unicode_data.data:
c1 = long(x[0], 16)
# just 16-bit support needed
if c1 >= 0x10000:
continue
if x[12] != '':
# field 12: simple uppercase mapping
c2 = parse_unicode_sequence(x[12])
uc[c1] = c2
tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary
if x[13] != '':
# field 13: simple lowercase mapping
c2 = parse_unicode_sequence(x[13])
lc[c1] = c2
if x[14] != '':
# field 14: simple titlecase mapping
c2 = parse_unicode_sequence(x[14])
tc[c1] = c2
return uc, lc, tc
def update_special_casings(uc, lc, tc, special_casing):
"Update case conversion tables with special case conversion rules."
for x in special_casing.data:
c1 = long(x[0], 16)
if x[4] != '':
# conditions
continue
lower = parse_unicode_sequence(x[1])
title = parse_unicode_sequence(x[2])
upper = parse_unicode_sequence(x[3])
if len(lower) > 1:
lc[c1] = lower
if len(upper) > 1:
uc[c1] = upper
if len(title) > 1:
tc[c1] = title
print 'special case: %d %d %d' % (len(lower), len(upper), len(title))
def remove_ascii_part(convmap):
"Remove ASCII case conversion parts (handled by C fast path)."
for i in xrange(128):
if convmap.has_key(i):
del convmap[i]
def scan_range_with_skip(convmap, start_idx, skip):
"Scan for a range of continuous case conversion with a certain 'skip'."
conv_i = start_idx
if not convmap.has_key(conv_i):
return None, None, None
elif len(convmap[conv_i]) > 1:
return None, None, None
else:
conv_o = ord(convmap[conv_i])
start_i = conv_i
start_o = conv_o
while True:
new_i = conv_i + skip
new_o = conv_o + skip
if not convmap.has_key(new_i):
break
if len(convmap[new_i]) > 1:
break
if ord(convmap[new_i]) != new_o:
break
conv_i = new_i
conv_o = new_o
# [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char
count = (conv_i - start_i) / skip + 1
if count <= 1:
return None, None, None
# we have an acceptable range, remove them from the convmap here
for i in xrange(start_i, conv_i + skip, skip):
del convmap[i]
return start_i, start_o, count
def find_first_range_with_skip(convmap, skip):
"Find first range with a certain 'skip' value."
for i in xrange(65536):
start_i, start_o, count = scan_range_with_skip(convmap, i, skip)
if start_i is None:
continue
return start_i, start_o, count
return None, None, None
def generate_tables(convmap):
"Generate bit-packed case conversion table for a given conversion map."
# The bitstream encoding is based on manual inspection for whatever
# regularity the Unicode case conversion rules have.
#
# Start with a full description of case conversions which does not
# cover all codepoints; unmapped codepoints convert to themselves.
# Scan for range-to-range mappings with a range of skips starting from 1.
# Whenever a valid range is found, remove it from the map. Finally,
# output the remaining case conversions (1:1 and 1:n) on a per codepoint
# basis.
#
# This is very slow because we always scan from scratch, but its the
# most reliable and simple way to scan
ranges = [] # range mappings (2 or more consecutive mappings with a certain skip)
singles = [] # 1:1 character mappings
complex = [] # 1:n character mappings
# Ranges with skips
for skip in xrange(1,6+1): # skips 1...6 are useful
while True:
start_i, start_o, count = find_first_range_with_skip(convmap, skip)
if start_i is None:
break
print 'skip %d: %d %d %d' % (skip, start_i, start_o, count)
ranges.append([start_i, start_o, count, skip])
# 1:1 conversions
k = convmap.keys()
k.sort()
for i in k:
if len(convmap[i]) > 1:
continue
singles.append([i, ord(convmap[i])]) # codepoint, codepoint
del convmap[i]
# There are many mappings to 2-char sequences with latter char being U+0399.
# These could be handled as a special case, but we don't do that right now.
#
# [8064L, u'\u1f08\u0399']
# [8065L, u'\u1f09\u0399']
# [8066L, u'\u1f0a\u0399']
# [8067L, u'\u1f0b\u0399']
# [8068L, u'\u1f0c\u0399']
# [8069L, u'\u1f0d\u0399']
# [8070L, u'\u1f0e\u0399']
# [8071L, u'\u1f0f\u0399']
# ...
#
# tmp = {}
# k = convmap.keys()
# k.sort()
# for i in k:
# if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399':
# tmp[i] = convmap[i][0]
# del convmap[i]
# print repr(tmp)
#
# skip = 1
# while True:
# start_i, start_o, count = find_first_range_with_skip(tmp, skip)
# if start_i is None:
# break
# print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count)
# print len(tmp.keys())
# print repr(tmp)
# XXX: need to put 12 remaining mappings back to convmap...
# 1:n conversions
k = convmap.keys()
k.sort()
for i in k:
complex.append([i, convmap[i]]) # codepoint, string
del convmap[i]
for t in singles:
print repr(t)
for t in complex:
print repr(t)
print 'range mappings: %d' % len(ranges)
print 'single character mappings: %d' % len(singles)
print 'complex mappings (1:n): %d' % len(complex)
print 'remaining (should be zero): %d' % len(convmap.keys())
# XXX: opportunities for diff encoding skip=3 ranges?
prev = None
for t in ranges:
# range: [start_i, start_o, count, skip]
if t[3] != 3:
continue
if prev is not None:
print '%d %d' % (t[0] - prev[0], t[1] - prev[1])
else:
print 'start: %d %d' % (t[0], t[1])
prev = t
# bit packed encoding
be = dukutil.BitEncoder()
for curr_skip in xrange(1, 7): # 1...6
count = 0
for r in ranges:
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3]
if skip != curr_skip:
continue
count += 1
be.bits(count, 6)
print 'encode: skip=%d, count=%d' % (curr_skip, count)
for r in ranges:
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3]
if skip != curr_skip:
continue
be.bits(start_i, 16)
be.bits(start_o, 16)
be.bits(r_count, 7)
be.bits(0x3f, 6) # maximum count value = end of skips
count = len(singles)
be.bits(count, 6)
for t in singles:
cp_i, cp_o = t[0], t[1]
be.bits(cp_i, 16)
be.bits(cp_o, 16)
count = len(complex)
be.bits(count, 7)
for t in complex:
cp_i, str_o = t[0], t[1]
be.bits(cp_i, 16)
be.bits(len(str_o), 2)
for i in xrange(len(str_o)):
be.bits(ord(str_o[i]), 16)
return be.getBytes(), be.getNumBits()
def generate_regexp_canonicalize_lookup(convmap):
res = []
highest_nonid = -1
for cp in xrange(65536):
res_cp = cp # default to as is
if convmap.has_key(cp):
tmp = convmap[cp]
if len(tmp) == 1:
# Multiple codepoints from input, ignore
res_cp = ord(tmp[0])
if cp >= 0x80 and res_cp < 0x80:
res_cp = cp # non-ASCII mapped to ASCII, ignore
if cp != res_cp:
highest_nonid = cp
res.append(res_cp)
# At the moment this is 65370, which means there's very little
# gain in assuming 1:1 mapping above a certain BMP codepoint.
print('HIGHEST NON-ID MAPPING: %d' % highest_nonid)
return res
def clonedict(x):
"Shallow clone of input dict."
res = {}
for k in x.keys():
res[k] = x[k]
return res
def main():
parser = optparse.OptionParser()
parser.add_option('--command', dest='command', default='caseconv_bitpacked')
parser.add_option('--unicode-data', dest='unicode_data')
parser.add_option('--special-casing', dest='special_casing')
parser.add_option('--out-source', dest='out_source')
parser.add_option('--out-header', dest='out_header')
parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc')
parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc')
parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup')
(opts, args) = parser.parse_args()
unicode_data = UnicodeData(opts.unicode_data)
special_casing = SpecialCasing(opts.special_casing)
uc, lc, tc = get_base_conversion_maps(unicode_data)
update_special_casings(uc, lc, tc, special_casing)
if opts.command == 'caseconv_bitpacked':
# XXX: ASCII and non-BMP filtering could be an option but is now hardcoded
# ascii is handled with 'fast path' so not needed here
t = clonedict(uc)
remove_ascii_part(t)
uc_bytes, uc_nbits = generate_tables(t)
t = clonedict(lc)
remove_ascii_part(t)
lc_bytes, lc_nbits = generate_tables(t)
# Generate C source and header files
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True)
genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True)
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes)))
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes)))
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
elif opts.command == 're_canon_lookup':
# direct canonicalization lookup for case insensitive regexps, includes ascii part
t = clonedict(uc)
re_canon_lookup = generate_regexp_canonicalize_lookup(t)
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True)
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_caseconv.py')
genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup)))
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
else:
raise Exception('invalid command: %r' % opts.command)
if __name__ == '__main__':
main()

382
tools/extract_chars.py

@ -0,0 +1,382 @@
#!/usr/bin/env python2
#
# Select a set of Unicode characters (based on included/excluded categories
# etc) and write out a compact bitstream for matching a character against
# the set at runtime. This is for the slow path, where we're especially
# concerned with compactness. A C source file with the table is written,
# together with a matching C header.
#
# Unicode categories (such as 'Z') can be used. Two pseudo-categories
# are also available for exclusion only: ASCII and NONBMP. "ASCII"
# category excludes ASCII codepoints which is useful because C code
# typically contains an ASCII fast path so ASCII characters don't need
# to be considered in the Unicode tables. "NONBMP" excludes codepoints
# above U+FFFF which is useful because such codepoints don't need to be
# supported in standard Ecmascript.
#
import os, sys, math
import optparse
import dukutil
def read_unicode_data(unidata, catsinc, catsexc, filterfunc):
"Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc."
res = []
f = open(unidata, 'rb')
def filter_none(cp):
return True
if filterfunc is None:
filterfunc = filter_none
# The Unicode parsing is slow enough to warrant some speedups.
exclude_cat_exact = {}
for cat in catsexc:
exclude_cat_exact[cat] = True
include_cat_exact = {}
for cat in catsinc:
include_cat_exact[cat] = True
for line in f:
#line = line.strip()
parts = line.split(';')
codepoint = parts[0]
if not filterfunc(long(codepoint, 16)):
continue
category = parts[2]
if exclude_cat_exact.has_key(category):
continue # quick reject
rejected = False
for cat in catsexc:
if category.startswith(cat) or codepoint == cat:
rejected = True
break
if rejected:
continue
if include_cat_exact.has_key(category):
res.append(line)
continue
accepted = False
for cat in catsinc:
if category.startswith(cat) or codepoint == cat:
accepted = True
break
if accepted:
res.append(line)
f.close()
# Sort based on Unicode codepoint
def mycmp(a,b):
t1 = a.split(';')
t2 = b.split(';')
n1 = long(t1[0], 16)
n2 = long(t2[0], 16)
return cmp(n1, n2)
res.sort(cmp=mycmp)
return res
def scan_ranges(lines):
"Scan continuous ranges from (filtered) UnicodeData.txt lines."
ranges = []
range_start = None
prev = None
for line in lines:
t = line.split(';')
n = long(t[0], 16)
if range_start is None:
range_start = n
else:
if n == prev + 1:
# continue range
pass
else:
ranges.append((range_start, prev))
range_start = n
prev = n
if range_start is not None:
ranges.append((range_start, prev))
return ranges
def generate_png(lines, fname):
"Generate an illustrative PNG of the character set."
from PIL import Image
m = {}
for line in lines:
t = line.split(';')
n = long(t[0], 16)
m[n] = 1
codepoints = 0x10ffff + 1
width = int(256)
height = int(math.ceil(float(codepoints) / float(width)))
im = Image.new('RGB', (width, height))
black = (0,0,0)
white = (255,255,255)
for cp in xrange(codepoints):
y = cp / width
x = cp % width
if m.has_key(long(cp)):
im.putpixel((x,y), black)
else:
im.putpixel((x,y), white)
im.save(fname)
def generate_match_table1(ranges):
"Unused match table format."
# This is an earlier match table format which is no longer used.
# IdentifierStart-UnicodeLetter has 445 ranges and generates a
# match table of 2289 bytes.
data = []
prev_re = None
def genrange(rs, re):
if (rs > re):
raise Exception('assumption failed: rs=%d re=%d' % (rs, re))
while True:
now = re - rs + 1
if now > 255:
now = 255
data.append(now) # range now
data.append(0) # skip 0
rs = rs + now
else:
data.append(now) # range now
break
def genskip(ss, se):
if (ss > se):
raise Exception('assumption failed: ss=%d se=%s' % (ss, se))
while True:
now = se - ss + 1
if now > 255:
now = 255
data.append(now) # skip now
data.append(0) # range 0
ss = ss + now
else:
data.append(now) # skip now
break
for rs, re in ranges:
if prev_re is not None:
genskip(prev_re + 1, rs - 1)
genrange(rs, re)
prev_re = re
num_entries = len(data)
# header: start of first range
# num entries
hdr = []
hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over
hdr.append(ranges[0][1] & 0xff)
hdr.append(num_entries >> 8)
hdr.append(num_entries & 0xff)
return hdr + data
def generate_match_table2(ranges):
"Unused match table format."
# Another attempt at a match table which is also unused.
# Total tables for all current classes is now 1472 bytes.
data = []
def enc(x):
while True:
if x < 0x80:
data.append(x)
break
data.append(0x80 + (x & 0x7f))
x = x >> 7
prev_re = 0
for rs, re in ranges:
r1 = rs - prev_re # 1 or above (no unjoined ranges)
r2 = re - rs # 0 or above
enc(r1)
enc(r2)
prev_re = re
enc(0) # end marker
return data
def generate_match_table3(ranges):
"Current match table format."
# Yet another attempt, similar to generate_match_table2 except
# in packing format.
#
# Total match size now (at time of writing): 1194 bytes.
#
# This is the current encoding format used in duk_lexer.c.
be = dukutil.BitEncoder()
freq = [0] * (0x10ffff + 1) # informative
def enc(x):
freq[x] += 1
if x <= 0x0e:
# 4-bit encoding
be.bits(x, 4)
return
x -= 0x0e + 1
if x <= 0xfd:
# 12-bit encoding
be.bits(0x0f, 4)
be.bits(x, 8)
return
x -= 0xfd + 1
if x <= 0xfff:
# 24-bit encoding
be.bits(0x0f, 4)
be.bits(0xfe, 8)
be.bits(x, 12)
return
x -= 0xfff + 1
if True:
# 36-bit encoding
be.bits(0x0f, 4)
be.bits(0xff, 8)
be.bits(x, 24)
return
raise Exception('cannot encode')
prev_re = 0
for rs, re in ranges:
r1 = rs - prev_re # 1 or above (no unjoined ranges)
r2 = re - rs # 0 or above
enc(r1)
enc(r2)
prev_re = re
enc(0) # end marker
data, nbits = be.getBytes(), be.getNumBits()
return data, freq
def main():
parser = optparse.OptionParser()
parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt
parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt
parser.add_option('--include-categories', dest='include_categories')
parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE')
parser.add_option('--out-source', dest='out_source')
parser.add_option('--out-header', dest='out_header')
parser.add_option('--out-png', dest='out_png')
parser.add_option('--table-name', dest='table_name', default='match_table')
(opts, args) = parser.parse_args()
unidata = opts.unicode_data
catsinc = []
if opts.include_categories != '':
catsinc = opts.include_categories.split(',')
catsexc = []
if opts.exclude_categories != 'NONE':
catsexc = opts.exclude_categories.split(',')
print 'CATSEXC: %s' % repr(catsexc)
print 'CATSINC: %s' % repr(catsinc)
# pseudocategories
filter_ascii = ('ASCII' in catsexc)
filter_nonbmp = ('NONBMP' in catsexc)
# Read raw result
def filter1(x):
if filter_ascii and x <= 0x7f:
# exclude ascii
return False
if filter_nonbmp and x >= 0x10000:
# exclude non-bmp
return False
return True
print('read unicode data')
res = read_unicode_data(unidata, catsinc, catsexc, filter1)
print('done reading unicode data')
# Raw output
#print('RAW OUTPUT:')
#print('===========')
#print('\n'.join(res))
# Scan ranges
#print('')
#print('RANGES:')
#print('=======')
ranges = scan_ranges(res)
#for i in ranges:
# if i[0] == i[1]:
# print('0x%04x' % i[0])
# else:
# print('0x%04x ... 0x%04x' % (i[0], i[1]))
#print('')
print('%d ranges total' % len(ranges))
# Generate match table
#print('')
#print('MATCH TABLE:')
#print('============')
#matchtable1 = generate_match_table1(ranges)
#matchtable2 = generate_match_table2(ranges)
matchtable3, freq = generate_match_table3(ranges)
#print 'match table: %s' % repr(matchtable3)
print 'match table length: %d bytes' % len(matchtable3)
print 'encoding freq:'
for i in xrange(len(freq)):
if freq[i] == 0:
continue
print ' %6d: %d' % (i, freq[i])
print('')
print('MATCH C TABLE -> file %s' % repr(opts.out_header))
# Create C source and header files
genc = dukutil.GenerateC()
genc.emitHeader('extract_chars.py')
genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True)
if opts.out_source is not None:
f = open(opts.out_source, 'wb')
f.write(genc.getString())
f.close()
genc = dukutil.GenerateC()
genc.emitHeader('extract_chars.py')
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3)))
if opts.out_header is not None:
f = open(opts.out_header, 'wb')
f.write(genc.getString())
f.close()
# Image (for illustrative purposes only)
if opts.out_png is not None:
generate_png(res, opts.out_png)
if __name__ == '__main__':
main()

41
tools/extract_unique_options.py

@ -0,0 +1,41 @@
#!/usr/bin/env python2
#
# Extract unique DUK_USE_xxx flags from current code base:
#
# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in
#
import os, sys, re
# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched
# (only uppercase allowed)
re_use = re.compile(r'DUK_USE_[A-Z0-9_]+')
re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+')
def main():
uses = {}
opts = {}
for fn in sys.argv[1:]:
f = open(fn, 'rb')
for line in f:
for t in re.findall(re_use, line):
if t[-1] != '_': # skip e.g. 'DUK_USE_'
uses[t] = True
for t in re.findall(re_opt, line):
if t[-1] != '_':
opts[t] = True
f.close()
k = opts.keys()
k.sort()
for i in k:
print(i)
k = uses.keys()
k.sort()
for i in k:
print(i)
if __name__ == '__main__':
main()

44
tools/genbuildparams.py

@ -0,0 +1,44 @@
#!/usr/bin/env python2
#
# Generate build parameter files based on build information.
# A C header is generated for C code, and a JSON file for
# build scripts etc which need to know the build config.
#
import os
import sys
import json
import optparse
import dukutil
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option('--version', dest='version')
parser.add_option('--git-commit', dest='git_commit')
parser.add_option('--git-describe', dest='git_describe')
parser.add_option('--git-branch', dest='git_branch')
parser.add_option('--out-json', dest='out_json')
parser.add_option('--out-header', dest='out_header')
(opts, args) = parser.parse_args()
t = {
'version': opts.version,
'git_commit': opts.git_commit,
'git_describe': opts.git_describe,
'git_branch': opts.git_branch,
}
f = open(opts.out_json, 'wb')
f.write(dukutil.json_encode(t).encode('ascii'))
f.close()
f = open(opts.out_header, 'wb')
f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n')
f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n')
f.write('/* automatically generated by genbuildparams.py, do not edit */\n')
f.write('\n')
f.write('/* DUK_VERSION is defined in duktape.h */')
f.write('\n')
f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n')
f.close()

2985
tools/genbuiltins.py

File diff suppressed because it is too large

1530
tools/genconfig.py

File diff suppressed because it is too large

5
tools/json2yaml.py

@ -0,0 +1,5 @@
import os, sys, json, yaml
if __name__ == '__main__':
# Use safe_dump() instead of dump() to avoid tags like "!!python/unicode"
print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False))

32
tools/merge_debug_meta.py

@ -0,0 +1,32 @@
#!/usr/bin/env python2
#
# Merge debugger YAML metadata files and output a merged JSON metadata file.
#
import os, sys, json, yaml
import optparse
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option('--output', dest='output', default=None, help='output JSON filename')
parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names')
parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands')
parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes')
parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes')
(opts, args) = parser.parse_args()
res = {}
def merge(fn):
with open(fn, 'rb') as f:
doc = yaml.load(f)
for k in doc.keys():
res[k] = doc[k]
merge(opts.class_names)
merge(opts.debug_commands)
merge(opts.debug_errors)
merge(opts.opcodes)
with open(opts.output, 'wb') as f:
f.write(json.dumps(res, indent=4) + '\n')
print('Wrote merged debugger metadata to ' + str(opts.output))

854
tools/prepare_sources.py

@ -0,0 +1,854 @@
#!/usr/bin/env python2
#
# Config-and-prepare: create a duk_config.h and combined/separate sources
# for configuration options specified on the command line.
#
import os
import sys
import re
import shutil
import glob
import optparse
import tarfile
import json
import yaml
import subprocess
# Helpers
def exec_get_stdout(cmd, input=None, default=None, print_stdout=False):
try:
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ret = proc.communicate(input=input)
if print_stdout:
sys.stdout.write(ret[0])
sys.stdout.flush()
if proc.returncode != 0:
sys.stdout.write(ret[1]) # print stderr on error
sys.stdout.flush()
if default is not None:
print('WARNING: command %r failed, return default' % cmd)
return default
raise Exception('command failed, return code %d: %r' % (proc.returncode, cmd))
return ret[0]
except:
if default is not None:
print('WARNING: command %r failed, return default' % cmd)
return default
raise
def exec_print_stdout(cmd, input=None):
ret = exec_get_stdout(cmd, input=input, print_stdout=True)
def mkdir(path):
os.mkdir(path)
def copy_file(src, dst):
with open(src, 'rb') as f_in:
with open(dst, 'wb') as f_out:
f_out.write(f_in.read())
def copy_files(filelist, srcdir, dstdir):
for i in filelist:
copy_file(os.path.join(srcdir, i), os.path.join(dstdir, i))
def copy_and_replace(src, dst, rules):
# Read and write separately to allow in-place replacement
keys = sorted(rules.keys())
res = []
with open(src, 'rb') as f_in:
for line in f_in:
for k in keys:
line = line.replace(k, rules[k])
res.append(line)
with open(dst, 'wb') as f_out:
f_out.write(''.join(res))
def copy_and_cquote(src, dst):
with open(src, 'rb') as f_in:
with open(dst, 'wb') as f_out:
f_out.write('/*\n')
for line in f_in:
line = line.decode('utf-8')
f_out.write(' * ')
for c in line:
if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'):
f_out.write(c.encode('ascii'))
else:
f_out.write('\\u%04x' % ord(c))
f_out.write(' */\n')
def read_file(src, strip_last_nl=False):
with open(src, 'rb') as f:
data = f.read()
if len(data) > 0 and data[-1] == '\n':
data = data[:-1]
return data
def delete_matching_files(dirpath, cb):
for fn in os.listdir(dirpath):
if os.path.isfile(os.path.join(dirpath, fn)) and cb(fn):
#print('Deleting %r' % os.path.join(dirpath, fn))
os.unlink(os.path.join(dirpath, fn))
def create_targz(dstfile, filelist):
# https://docs.python.org/2/library/tarfile.html#examples
def _add(tf, fn): # recursive add
#print('Adding to tar: ' + fn)
if os.path.isdir(fn):
for i in sorted(os.listdir(fn)):
_add(tf, os.path.join(fn, i))
elif os.path.isfile(fn):
tf.add(fn)
else:
raise Exception('invalid file: %r' % fn)
with tarfile.open(dstfile, 'w:gz') as tf:
for fn in filelist:
_add(tf, fn)
def cstring(x):
return '"' + x + '"' # good enough for now
# DUK_VERSION is grepped from duk_api_public.h.in: it is needed for the
# public API and we want to avoid defining it in two places.
def get_duk_version(apiheader_filename):
r = re.compile(r'^#define\s+DUK_VERSION\s+(.*?)L?\s*$')
with open(apiheader_filename, 'rb') as f:
for line in f:
m = r.match(line)
if m is not None:
duk_version = int(m.group(1))
duk_major = duk_version / 10000
duk_minor = (duk_version % 10000) / 100
duk_patch = duk_version % 100
duk_version_formatted = '%d.%d.%d' % (duk_major, duk_minor, duk_patch)
return duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted
raise Exception('cannot figure out duktape version')
# Python module check and friendly errors
def check_python_modules():
# make_dist.py doesn't need yaml but other dist utils will; check for it and
# warn if it is missing.
failed = False
def _warning(module, aptPackage, pipPackage):
sys.stderr.write('\n')
sys.stderr.write('*** NOTE: Could not "import %s" needed for dist. Install it using e.g.:\n' % module)
sys.stderr.write('\n')
sys.stderr.write(' # Linux\n')
sys.stderr.write(' $ sudo apt-get install %s\n' % aptPackage)
sys.stderr.write('\n')
sys.stderr.write(' # Windows\n')
sys.stderr.write(' > pip install %s\n' % pipPackage)
try:
import yaml
except ImportError:
_warning('yaml', 'python-yaml', 'PyYAML')
failed = True
if failed:
sys.stderr.write('\n')
raise Exception('Missing some required Python modules')
check_python_modules()
# Option parsing
def main():
parser = optparse.OptionParser()
# Forced options from multiple sources are gathered into a shared list
# so that the override order remains the same as on the command line.
force_options_yaml = []
def add_force_option_yaml(option, opt, value, parser):
# XXX: check that YAML parses
force_options_yaml.append(value)
def add_force_option_file(option, opt, value, parser):
# XXX: check that YAML parses
with open(value, 'rb') as f:
force_options_yaml.append(f.read())
def add_force_option_define(option, opt, value, parser):
tmp = value.split('=')
if len(tmp) == 1:
doc = { tmp[0]: True }
elif len(tmp) == 2:
doc = { tmp[0]: tmp[1] }
else:
raise Exception('invalid option value: %r' % value)
force_options_yaml.append(yaml.safe_dump(doc))
def add_force_option_undefine(option, opt, value, parser):
tmp = value.split('=')
if len(tmp) == 1:
doc = { tmp[0]: False }
else:
raise Exception('invalid option value: %r' % value)
force_options_yaml.append(yaml.safe_dump(doc))
fixup_header_lines = []
def add_fixup_header_line(option, opt, value, parser):
fixup_header_lines.append(value)
def add_fixup_header_file(option, opt, value, parser):
with open(value, 'rb') as f:
for line in f:
if line[-1] == '\n':
line = line[:-1]
fixup_header_lines.append(line)
# Options for config-and-prepare tool itself.
parser.add_option('--source-directory', dest='source_directory', default=None, help='Directory with raw input sources (src-input/)')
parser.add_option('--output-directory', dest='output_directory', default=None, help='Directory for output files, must already exist')
parser.add_option('--duk-build-meta', dest='duk_build_meta', default=None, help='duk_build_meta.json for git commit info etc')
parser.add_option('--git-commit', dest='git_commit', default=None, help='Force git commit hash')
parser.add_option('--git-describe', dest='git_describe', default=None, help='Force git describe')
parser.add_option('--git-branch', dest='git_branch', default=None, help='Force git branch name')
# Options forwarded to genbuiltins.py.
parser.add_option('--rom-support', dest='rom_support', action='store_true', help='Add support for ROM strings/objects (increases duktape.c size considerably)')
parser.add_option('--rom-auto-lightfunc', dest='rom_auto_lightfunc', action='store_true', default=False, help='Convert ROM built-in function properties into lightfuncs automatically whenever possible')
parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', action='append', default=[], help='User strings and objects to add, YAML format (can be repeated for multiple overrides)')
# Options forwarded to genconfig.py.
parser.add_option('--config-metadata', dest='config_metadata', default=None, help='metadata directory or metadata tar.gz file')
parser.add_option('--platform', dest='platform', default=None, help='platform (default is autodetect)')
parser.add_option('--compiler', dest='compiler', default=None, help='compiler (default is autodetect)')
parser.add_option('--architecture', dest='architecture', default=None, help='architecture (default is autodetec)')
parser.add_option('--c99-types-only', dest='c99_types_only', action='store_true', default=False, help='assume C99 types, no legacy type detection')
parser.add_option('--dll', dest='dll', action='store_true', default=False, help='dll build of Duktape, affects symbol visibility macros especially on Windows')
parser.add_option('--support-feature-options', dest='support_feature_options', action='store_true', default=False, help='support DUK_OPT_xxx feature options in duk_config.h')
parser.add_option('--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help='emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)')
parser.add_option('--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help='emit preprocessor checks for config option consistency (DUK_OPT_xxx)')
parser.add_option('--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers')
parser.add_option('--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers')
parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers')
parser.add_option('--add-active-defines-macro', dest='add_active_defines_macro', action='store_true', default=False, help='add DUK_ACTIVE_DEFINES macro, for development only')
parser.add_option('--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"')
parser.add_option('-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"')
parser.add_option('--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"')
parser.add_option('-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"')
parser.add_option('--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help='force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")')
parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides')
parser.add_option('--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help='C header snippet file(s) to be appended to generated header, useful for manual option fixups')
parser.add_option('--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help='C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")')
parser.add_option('--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues')
parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate')
(opts, args) = parser.parse_args()
assert(opts.source_directory)
srcdir = opts.source_directory
assert(opts.output_directory)
outdir = opts.output_directory
# Figure out directories, git info, etc
entry_pwd = os.getcwd()
duk_build_meta = None
if opts.duk_build_meta is not None:
with open(opts.duk_build_meta, 'rb') as f:
duk_build_meta = json.loads(f.read())
duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted = \
get_duk_version(os.path.join(srcdir, 'duk_api_public.h.in'))
git_commit = None
git_branch = None
git_describe = None
if duk_build_meta is not None:
git_commit = duk_build_meta['git_commit']
git_branch = duk_build_meta['git_branch']
git_describe = duk_build_meta['git_describe']
else:
print('No --duk-build-meta, git commit information determined automatically')
if opts.git_commit is not None:
git_commit = opts.git_commit
if opts.git_describe is not None:
git_describe = opts.git_describe
if opts.git_branch is not None:
git_branch = opts.git_branch
if git_commit is None:
git_commit = exec_get_stdout([ 'git', 'rev-parse', 'HEAD' ], default='external').strip()
if git_describe is None:
git_describe = exec_get_stdout([ 'git', 'describe', '--always', '--dirty' ], default='external').strip()
if git_branch is None:
git_branch = exec_get_stdout([ 'git', 'rev-parse', '--abbrev-ref', 'HEAD' ], default='external').strip()
git_commit = str(git_commit)
git_describe = str(git_describe)
git_branch = str(git_branch)
git_commit_cstring = cstring(git_commit)
git_describe_cstring = cstring(git_describe)
git_branch_cstring = cstring(git_branch)
print('Config-and-prepare for Duktape version %s, commit %s, describe %s, branch %s' % \
(duk_version_formatted, git_commit, git_describe, git_branch))
# For now, create the src/, src-noline/, and src-separate/ structure into the
# output directory. Later on the output directory should get the specific
# variant output directly.
mkdir(os.path.join(outdir, 'src'))
mkdir(os.path.join(outdir, 'src-noline'))
mkdir(os.path.join(outdir, 'src-separate'))
# Separate sources are mostly copied as is at present.
copy_files([
'duk_alloc_default.c',
'duk_api_internal.h',
'duk_api_stack.c',
'duk_api_heap.c',
'duk_api_buffer.c',
'duk_api_call.c',
'duk_api_codec.c',
'duk_api_compile.c',
'duk_api_bytecode.c',
'duk_api_memory.c',
'duk_api_object.c',
'duk_api_string.c',
'duk_api_time.c',
'duk_api_debug.c',
'duk_bi_array.c',
'duk_bi_boolean.c',
'duk_bi_buffer.c',
'duk_bi_date.c',
'duk_bi_date_unix.c',
'duk_bi_date_windows.c',
'duk_bi_duktape.c',
'duk_bi_error.c',
'duk_bi_function.c',
'duk_bi_global.c',
'duk_bi_json.c',
'duk_bi_math.c',
'duk_bi_number.c',
'duk_bi_object.c',
'duk_bi_pointer.c',
'duk_bi_protos.h',
'duk_bi_regexp.c',
'duk_bi_string.c',
'duk_bi_proxy.c',
'duk_bi_thread.c',
'duk_bi_thrower.c',
'duk_debug_fixedbuffer.c',
'duk_debug.h',
'duk_debug_macros.c',
'duk_debug_vsnprintf.c',
'duk_error_augment.c',
'duk_error.h',
'duk_error_longjmp.c',
'duk_error_macros.c',
'duk_error_misc.c',
'duk_error_throw.c',
'duk_forwdecl.h',
'duk_harray.h',
'duk_hbuffer_alloc.c',
'duk_hbuffer.h',
'duk_hbuffer_ops.c',
'duk_hcompfunc.h',
'duk_heap_alloc.c',
'duk_heap.h',
'duk_heap_hashstring.c',
'duk_heaphdr.h',
'duk_heap_markandsweep.c',
'duk_heap_memory.c',
'duk_heap_misc.c',
'duk_heap_refcount.c',
'duk_heap_stringcache.c',
'duk_heap_stringtable.c',
'duk_hnatfunc.h',
'duk_hobject_alloc.c',
'duk_hobject_class.c',
'duk_hobject_enum.c',
'duk_hobject_finalizer.c',
'duk_hobject.h',
'duk_hobject_misc.c',
'duk_hobject_pc2line.c',
'duk_hobject_props.c',
'duk_hstring.h',
'duk_hstring_misc.c',
'duk_hthread_alloc.c',
'duk_hthread_builtins.c',
'duk_hthread.h',
'duk_hthread_misc.c',
'duk_hthread_stacks.c',
'duk_hbufobj.h',
'duk_hbufobj_misc.c',
'duk_debugger.c',
'duk_debugger.h',
'duk_internal.h',
'duk_jmpbuf.h',
'duk_exception.h',
'duk_js_bytecode.h',
'duk_js_call.c',
'duk_js_compiler.c',
'duk_js_compiler.h',
'duk_js_executor.c',
'duk_js.h',
'duk_json.h',
'duk_js_ops.c',
'duk_js_var.c',
'duk_lexer.c',
'duk_lexer.h',
'duk_numconv.c',
'duk_numconv.h',
'duk_regexp_compiler.c',
'duk_regexp_executor.c',
'duk_regexp.h',
'duk_tval.c',
'duk_tval.h',
'duk_unicode.h',
'duk_unicode_support.c',
'duk_unicode_tables.c',
'duk_util_bitdecoder.c',
'duk_util_bitencoder.c',
'duk_util.h',
'duk_util_hashbytes.c',
'duk_util_hashprime.c',
'duk_util_misc.c',
'duk_util_tinyrandom.c',
'duk_util_bufwriter.c',
'duk_selftest.c',
'duk_selftest.h',
'duk_strings.h',
'duk_replacements.c',
'duk_replacements.h'
], srcdir, os.path.join(outdir, 'src-separate'))
# Build temp versions of LICENSE.txt and AUTHORS.rst for embedding into
# autogenerated C/H files.
# XXX: use a proper temp directory
copy_and_cquote('LICENSE.txt', os.path.join(outdir, 'LICENSE.txt.tmp'))
copy_and_cquote('AUTHORS.rst', os.path.join(outdir, 'AUTHORS.rst.tmp'))
# Create a duk_config.h.
# XXX: might be easier to invoke genconfig directly
def forward_genconfig_options():
res = []
res += [ '--metadata', os.path.abspath(opts.config_metadata) ] # rename option, --config-metadata => --metadata
if opts.platform is not None:
res += [ '--platform', opts.platform ]
if opts.compiler is not None:
res += [ '--compiler', opts.compiler ]
if opts.architecture is not None:
res += [ '--architecture', opts.architecture ]
if opts.c99_types_only:
res += [ '--c99-types-only' ]
if opts.dll:
res += [ '--dll' ]
if opts.support_feature_options:
res += [ '--support-feature-options' ]
if opts.emit_legacy_feature_check:
res += [ '--emit-legacy-feature-check' ]
if opts.emit_config_sanity_check:
res += [ '--emit-config-sanity-check' ]
if opts.omit_removed_config_options:
res += [ '--omit-removed-config-options' ]
if opts.omit_deprecated_config_options:
res += [ '--omit-deprecated-config-options' ]
if opts.omit_unused_config_options:
res += [ '--omit-unused-config-options' ]
if opts.add_active_defines_macro:
res += [ '--add-active-defines-macro' ]
for i in force_options_yaml:
res += [ '--option-yaml', i ]
for i in fixup_header_lines:
res += [ '--fixup-linu', i ]
if not opts.sanity_strict:
res += [ '--sanity-warning' ]
if opts.use_cpp_warning:
res += [ '--use-cpp-warning' ]
return res
cmd = [
sys.executable, os.path.join('tools', 'genconfig.py'),
'--output', os.path.join(outdir, 'duk_config.h.tmp'),
'--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch
]
cmd += forward_genconfig_options()
cmd += [
'duk-config-header'
]
print(repr(cmd))
exec_print_stdout(cmd)
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src', 'duk_config.h'))
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-noline', 'duk_config.h'))
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-separate', 'duk_config.h'))
# Build duktape.h from parts, with some git-related replacements.
# The only difference between single and separate file duktape.h
# is the internal DUK_SINGLE_FILE define.
#
# Newline after 'i \':
# http://stackoverflow.com/questions/25631989/sed-insert-line-command-osx
copy_and_replace(os.path.join(srcdir, 'duktape.h.in'), os.path.join(outdir, 'src', 'duktape.h'), {
'@DUK_SINGLE_FILE@': '#define DUK_SINGLE_FILE',
'@LICENSE_TXT@': read_file(os.path.join(outdir, 'LICENSE.txt.tmp'), strip_last_nl=True),
'@AUTHORS_RST@': read_file(os.path.join(outdir, 'AUTHORS.rst.tmp'), strip_last_nl=True),
'@DUK_API_PUBLIC_H@': read_file(os.path.join(srcdir, 'duk_api_public.h.in'), strip_last_nl=True),
'@DUK_DBLUNION_H@': read_file(os.path.join(srcdir, 'duk_dblunion.h.in'), strip_last_nl=True),
'@DUK_VERSION_FORMATTED@': duk_version_formatted,
'@GIT_COMMIT@': git_commit,
'@GIT_COMMIT_CSTRING@': git_commit_cstring,
'@GIT_DESCRIBE@': git_describe,
'@GIT_DESCRIBE_CSTRING@': git_describe_cstring,
'@GIT_BRANCH@': git_branch,
'@GIT_BRANCH_CSTRING@': git_branch_cstring
})
# keep the line so line numbers match between the two variant headers
copy_and_replace(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-separate', 'duktape.h'), {
'#define DUK_SINGLE_FILE': '#undef DUK_SINGLE_FILE'
})
copy_file(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-noline', 'duktape.h'))
# Autogenerated strings and built-in files
#
# There are currently no profile specific variants of strings/builtins, but
# this will probably change when functions are added/removed based on profile.
# XXX: nuke this util, it's pointless
exec_print_stdout([
sys.executable,
os.path.join('tools', 'genbuildparams.py'),
'--version=' + str(duk_version),
'--git-commit=' + git_commit,
'--git-describe=' + git_describe,
'--git-branch=' + git_branch,
'--out-json=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'),
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_buildparams.h.tmp')
])
res = exec_get_stdout([
sys.executable,
os.path.join('tools', 'scan_used_stridx_bidx.py')
] + glob.glob(os.path.join(srcdir, '*.c')) \
+ glob.glob(os.path.join(srcdir, '*.h')) \
+ glob.glob(os.path.join(srcdir, '*.h.in'))
)
with open(os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'), 'wb') as f:
f.write(res)
gb_opts = []
gb_opts.append('--ram-support') # enable by default
if opts.rom_support:
# ROM string/object support is not enabled by default because
# it increases the generated duktape.c considerably.
print('Enabling --rom-support for genbuiltins.py')
gb_opts.append('--rom-support')
if opts.rom_auto_lightfunc:
print('Enabling --rom-auto-lightfunc for genbuiltins.py')
gb_opts.append('--rom-auto-lightfunc')
for fn in opts.user_builtin_metadata:
print('Forwarding --user-builtin-metadata %s' % fn)
gb_opts.append('--user-builtin-metadata')
gb_opts.append(fn)
exec_print_stdout([
sys.executable,
os.path.join('tools', 'genbuiltins.py'),
'--buildinfo=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'),
'--used-stridx-metadata=' + os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'),
'--strings-metadata=' + os.path.join(srcdir, 'strings.yaml'),
'--objects-metadata=' + os.path.join(srcdir, 'builtins.yaml'),
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_builtins.h'),
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_builtins.c'),
'--out-metadata-json=' + os.path.join(outdir, 'duk_build_meta.json')
] + gb_opts)
# Autogenerated Unicode files
#
# Note: not all of the generated headers are used. For instance, the
# match table for "WhiteSpace-Z" is not used, because a custom piece
# of code handles that particular match.
#
# UnicodeData.txt contains ranges expressed like this:
#
# 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
# 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
#
# These are currently decoded into individual characters as a prestep.
#
# For IDPART:
# UnicodeCombiningMark -> categories Mn, Mc
# UnicodeDigit -> categories Nd
# UnicodeConnectorPunctuation -> categories Pc
# Whitespace (unused now)
WHITESPACE_INCL='Zs' # USP = Any other Unicode space separator
WHITESPACE_EXCL='NONE'
# Unicode letter (unused now)
LETTER_INCL='Lu,Ll,Lt,Lm,Lo'
LETTER_EXCL='NONE'
LETTER_NOA_INCL='Lu,Ll,Lt,Lm,Lo'
LETTER_NOA_EXCL='ASCII'
LETTER_NOABMP_INCL=LETTER_NOA_INCL
LETTER_NOABMP_EXCL='ASCII,NONBMP'
# Identifier start
# E5 Section 7.6
IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F'
IDSTART_EXCL='NONE'
IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F'
IDSTART_NOA_EXCL='ASCII'
IDSTART_NOABMP_INCL=IDSTART_NOA_INCL
IDSTART_NOABMP_EXCL='ASCII,NONBMP'
# Identifier start - Letter: allows matching of (rarely needed) 'Letter'
# production space efficiently with the help of IdentifierStart. The
# 'Letter' production is only needed in case conversion of Greek final
# sigma.
IDSTART_MINUS_LETTER_INCL=IDSTART_NOA_INCL
IDSTART_MINUS_LETTER_EXCL='Lu,Ll,Lt,Lm,Lo'
IDSTART_MINUS_LETTER_NOA_INCL=IDSTART_NOA_INCL
IDSTART_MINUS_LETTER_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII'
IDSTART_MINUS_LETTER_NOABMP_INCL=IDSTART_NOA_INCL
IDSTART_MINUS_LETTER_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII,NONBMP'
# Identifier start - Identifier part
# E5 Section 7.6: IdentifierPart, but remove IdentifierStart (already above)
IDPART_MINUS_IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D'
IDPART_MINUS_IDSTART_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F'
IDPART_MINUS_IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D'
IDPART_MINUS_IDSTART_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII'
IDPART_MINUS_IDSTART_NOABMP_INCL=IDPART_MINUS_IDSTART_NOA_INCL
IDPART_MINUS_IDSTART_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII,NONBMP'
print('Expand UnicodeData.txt ranges')
exec_print_stdout([
sys.executable,
os.path.join('tools', 'prepare_unicode_data.py'),
os.path.join(srcdir, 'UnicodeData.txt'),
os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp')
])
def extract_chars(incl, excl, suffix):
#print('- extract_chars: %s %s %s' % (incl, excl, suffix))
res = exec_get_stdout([
sys.executable,
os.path.join('tools', 'extract_chars.py'),
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'),
'--include-categories=' + incl,
'--exclude-categories=' + excl,
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.c.tmp' % suffix),
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.h.tmp' % suffix),
'--table-name=' + 'duk_unicode_%s' % suffix
])
with open(os.path.join(outdir, 'src-separate', suffix + '.txt'), 'wb') as f:
f.write(res)
def extract_caseconv():
#print('- extract_caseconv case conversion')
res = exec_get_stdout([
sys.executable,
os.path.join('tools', 'extract_caseconv.py'),
'--command=caseconv_bitpacked',
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'),
'--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'),
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'),
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'),
'--table-name-lc=duk_unicode_caseconv_lc',
'--table-name-uc=duk_unicode_caseconv_uc'
])
with open(os.path.join(outdir, 'src-separate', 'caseconv.txt'), 'wb') as f:
f.write(res)
#print('- extract_caseconv canon lookup')
res = exec_get_stdout([
sys.executable,
os.path.join('tools', 'extract_caseconv.py'),
'--command=re_canon_lookup',
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'),
'--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'),
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'),
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'),
'--table-name-re-canon-lookup=duk_unicode_re_canon_lookup'
])
with open(os.path.join(outdir, 'src-separate', 'caseconv_re_canon_lookup.txt'), 'wb') as f:
f.write(res)
print('Create Unicode tables for codepoint classes')
extract_chars(WHITESPACE_INCL, WHITESPACE_EXCL, 'ws')
extract_chars(LETTER_INCL, LETTER_EXCL, 'let')
extract_chars(LETTER_NOA_INCL, LETTER_NOA_EXCL, 'let_noa')
extract_chars(LETTER_NOABMP_INCL, LETTER_NOABMP_EXCL, 'let_noabmp')
extract_chars(IDSTART_INCL, IDSTART_EXCL, 'ids')
extract_chars(IDSTART_NOA_INCL, IDSTART_NOA_EXCL, 'ids_noa')
extract_chars(IDSTART_NOABMP_INCL, IDSTART_NOABMP_EXCL, 'ids_noabmp')
extract_chars(IDSTART_MINUS_LETTER_INCL, IDSTART_MINUS_LETTER_EXCL, 'ids_m_let')
extract_chars(IDSTART_MINUS_LETTER_NOA_INCL, IDSTART_MINUS_LETTER_NOA_EXCL, 'ids_m_let_noa')
extract_chars(IDSTART_MINUS_LETTER_NOABMP_INCL, IDSTART_MINUS_LETTER_NOABMP_EXCL, 'ids_m_let_noabmp')
extract_chars(IDPART_MINUS_IDSTART_INCL, IDPART_MINUS_IDSTART_EXCL, 'idp_m_ids')
extract_chars(IDPART_MINUS_IDSTART_NOA_INCL, IDPART_MINUS_IDSTART_NOA_EXCL, 'idp_m_ids_noa')
extract_chars(IDPART_MINUS_IDSTART_NOABMP_INCL, IDPART_MINUS_IDSTART_NOABMP_EXCL, 'idp_m_ids_noabmp')
print('Create Unicode tables for case conversion')
extract_caseconv()
print('Combine sources and clean up')
# Inject autogenerated files into source and header files so that they are
# usable (for all profiles and define cases) directly.
#
# The injection points use a standard C preprocessor #include syntax
# (earlier these were actual includes).
copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode.h'), os.path.join(outdir, 'src-separate', 'duk_unicode.h'), {
'#include "duk_unicode_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_m_let_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_m_let_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_idp_m_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_idp_m_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_caseconv.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'), strip_last_nl=True),
'#include "duk_unicode_re_canon_lookup.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'), strip_last_nl=True)
})
copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), {
'#include "duk_unicode_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_m_let_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_ids_m_let_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_idp_m_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_idp_m_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_caseconv.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'), strip_last_nl=True),
'#include "duk_unicode_re_canon_lookup.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'), strip_last_nl=True)
})
# Clean up some temporary files
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[-4:] == '.tmp')
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x in [
'ws.txt',
'let.txt', 'let_noa.txt', 'let_noabmp.txt',
'ids.txt', 'ids_noa.txt', 'ids_noabmp.txt',
'ids_m_let.txt', 'ids_m_let_noa.txt', 'ids_m_let_noabmp.txt',
'idp_m_ids.txt', 'idp_m_ids_noa.txt', 'idp_m_ids_noabmp.txt'
])
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[0:8] == 'caseconv' and x[-4:] == '.txt')
# Create a combined source file, duktape.c, into a separate combined source
# directory. This allows user to just include "duktape.c", "duktape.h", and
# "duk_config.h" into a project and maximizes inlining and size optimization
# opportunities even with older compilers. Because some projects include
# these files into their repository, the result should be deterministic and
# diffable. Also, it must retain __FILE__/__LINE__ behavior through
# preprocessor directives. Whitespace and comments can be stripped as long
# as the other requirements are met. For some users it's preferable *not*
# to use #line directives in the combined source, so a separate variant is
# created for that, see: https://github.com/svaarala/duktape/pull/363.
def create_source_prologue(license_file, authors_file):
res = []
# Because duktape.c/duktape.h/duk_config.h are often distributed or
# included in project sources as is, add a license reminder and
# Duktape version information to the duktape.c header (duktape.h
# already contains them).
duk_major = duk_version / 10000
duk_minor = duk_version / 100 % 100
duk_patch = duk_version % 100
res.append('/*')
res.append(' * Single source autogenerated distributable for Duktape %d.%d.%d.' % (duk_major, duk_minor, duk_patch))
res.append(' *')
res.append(' * Git commit %s (%s).' % (git_commit, git_describe))
res.append(' * Git branch %s.' % git_branch)
res.append(' *')
res.append(' * See Duktape AUTHORS.rst and LICENSE.txt for copyright and')
res.append(' * licensing information.')
res.append(' */')
res.append('')
# Add LICENSE.txt and AUTHORS.rst to combined source so that they're automatically
# included and are up-to-date.
res.append('/* LICENSE.txt */')
with open(license_file, 'rb') as f:
for line in f:
res.append(line.strip())
res.append('')
res.append('/* AUTHORS.rst */')
with open(authors_file, 'rb') as f:
for line in f:
res.append(line.strip())
return '\n'.join(res) + '\n'
def select_combined_sources():
# These files must appear before the alphabetically sorted
# ones so that static variables get defined before they're
# used. We can't forward declare them because that would
# cause C++ issues (see GH-63). When changing, verify by
# compiling with g++.
handpick = [
'duk_replacements.c',
'duk_debug_macros.c',
'duk_builtins.c',
'duk_error_macros.c',
'duk_unicode_support.c',
'duk_util_misc.c',
'duk_util_hashprime.c',
'duk_hobject_class.c'
]
files = []
for fn in handpick:
files.append(fn)
for fn in sorted(os.listdir(os.path.join(outdir, 'src-separate'))):
f_ext = os.path.splitext(fn)[1]
if f_ext not in [ '.c' ]:
continue
if fn in files:
continue
files.append(fn)
res = map(lambda x: os.path.join(outdir, 'src-separate', x), files)
#print(repr(files))
#print(repr(res))
return res
with open(os.path.join(outdir, 'prologue.tmp'), 'wb') as f:
f.write(create_source_prologue(os.path.join(outdir, 'LICENSE.txt.tmp'), os.path.join(outdir, 'AUTHORS.rst.tmp')))
exec_print_stdout([
sys.executable,
os.path.join('tools', 'combine_src.py'),
'--include-path', os.path.join(outdir, 'src-separate'),
'--include-exclude', 'duk_config.h', # don't inline
'--include-exclude', 'duktape.h', # don't inline
'--prologue', os.path.join(outdir, 'prologue.tmp'),
'--output-source', os.path.join(outdir, 'src', 'duktape.c'),
'--output-metadata', os.path.join(outdir, 'src', 'metadata.json'),
'--line-directives'
] + select_combined_sources())
exec_print_stdout([
sys.executable,
os.path.join('tools', 'combine_src.py'),
'--include-path', os.path.join(outdir, 'src-separate'),
'--include-exclude', 'duk_config.h', # don't inline
'--include-exclude', 'duktape.h', # don't inline
'--prologue', os.path.join(outdir, 'prologue.tmp'),
'--output-source', os.path.join(outdir, 'src-noline', 'duktape.c'),
'--output-metadata', os.path.join(outdir, 'src-noline', 'metadata.json')
] + select_combined_sources())
# Clean up remaining temp files
delete_matching_files(outdir, lambda x: x[-4:] == '.tmp')
print('Config-and-prepare finished successfully')
if __name__ == '__main__':
main()

37
tools/prepare_unicode_data.py

@ -0,0 +1,37 @@
#!/usr/bin/env python2
#
# UnicodeData.txt may contain ranges in addition to individual characters.
# Unpack the ranges into individual characters for the other scripts to use.
#
import os
import sys
def main():
f_in = open(sys.argv[1], 'rb')
f_out = open(sys.argv[2], 'wb')
while True:
line = f_in.readline()
if line == '' or line == '\n':
break
parts = line.split(';') # keep newline
if parts[1].endswith('First>'):
line2 = f_in.readline()
parts2 = line2.split(';')
if not parts2[1].endswith('Last>'):
raise Exception('cannot parse range')
cp1 = long(parts[0], 16)
cp2 = long(parts2[0], 16)
suffix = ';'.join(parts[1:])
for i in xrange(cp1, cp2 + 1): # inclusive
f_out.write('%04X;%s' % (i, suffix))
else:
f_out.write(line)
f_in.close()
f_out.flush()
f_out.close()
if __name__ == '__main__':
main()

26
tools/resolve_combined_lineno.py

@ -0,0 +1,26 @@
#!/usr/bin/env python2
#
# Resolve a line number in the combined source into an uncombined file/line
# using a dist/src/metadata.json file.
#
# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345
#
import os
import sys
import json
def main():
with open(sys.argv[1], 'rb') as f:
metadata = json.loads(f.read())
lineno = int(sys.argv[2])
for e in reversed(metadata['line_map']):
if lineno >= e['combined_line']:
orig_lineno = e['original_line'] + (lineno - e['combined_line'])
print('%s:%d -> %s:%d' % ('duktape.c', lineno,
e['original_file'], orig_lineno))
break
if __name__ == '__main__':
main()

135
tools/scan_strings.py

@ -0,0 +1,135 @@
#!/usr/bin/env python2
#
# Scan potential external strings from Ecmascript and C files.
#
# Very simplistic example with a lot of limitations:
#
# - Doesn't handle multiple variables in a variable declaration
#
# - Only extracts strings from C files, these may correspond to
# Duktape/C bindings (but in many cases don't)
#
import os
import sys
import re
import json
strmap = {}
# Ecmascript function declaration
re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE)
# Ecmascript variable declaration
# XXX: doesn't handle multiple variables
re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE)
# Ecmascript variable assignment
re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE)
# Ecmascript dotted property reference (also matches numbers like
# '4.0', which are separately rejected below)
re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE)
re_digits = re.compile(r'^\d+$', re.UNICODE)
# Ecmascript or C string literal
re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE)
re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE)
def strDecode(x):
# Need to decode hex, unicode, and other escapes. Python syntax
# is close enough to C and Ecmascript so use eval for now.
try:
return eval('u' + x) # interpret as unicode string
except:
sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n')
return None
def scan(f, fn):
global strmap
# Scan rules depend on file type
if fn[-2:] == '.c':
use_funcname = False
use_vardecl = False
use_varassign = False
use_propref = False
use_strlit_dquot = True
use_strlit_squot = False
else:
use_funcname = True
use_vardecl = True
use_varassign = True
use_propref = True
use_strlit_dquot = True
use_strlit_squot = True
for line in f:
# Assume input data is UTF-8
line = line.decode('utf-8')
if use_funcname:
for m in re_funcname.finditer(line):
strmap[m.group(1)] = True
if use_vardecl:
for m in re_vardecl.finditer(line):
strmap[m.group(1)] = True
if use_varassign:
for m in re_varassign.finditer(line):
strmap[m.group(1)] = True
if use_propref:
for m in re_propref.finditer(line):
parts = m.group(1).split('.')
if re_digits.match(parts[0]) is not None:
# Probably a number ('4.0' or such)
pass
else:
for part in parts:
strmap[part] = True
if use_strlit_dquot:
for m in re_strlit_dquot.finditer(line):
s = strDecode(m.group(1))
if s is not None:
strmap[s] = True
if use_strlit_squot:
for m in re_strlit_squot.finditer(line):
s = strDecode(m.group(1))
if s is not None:
strmap[s] = True
def main():
for fn in sys.argv[1:]:
f = open(fn, 'rb')
scan(f, fn)
f.close()
strs = []
strs_base64 = []
doc = {
# Strings as Unicode strings
'scanned_strings': strs,
# Strings as base64-encoded UTF-8 data, which should be ready
# to be used in C code (Duktape internal string representation
# is UTF-8)
'scanned_strings_base64': strs_base64
}
k = strmap.keys()
k.sort()
for s in k:
strs.append(s)
t = s.encode('utf-8').encode('base64')
if len(t) > 0 and t[-1] == '\n':
t = t[0:-1]
strs_base64.append(t)
print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True))
if __name__ == '__main__':
main()

56
tools/scan_used_stridx_bidx.py

@ -0,0 +1,56 @@
#!/usr/bin/env python2
#
# Scan Duktape code base for references to built-in strings and built-in
# objects, i.e. for:
#
# - Strings which will need DUK_STRIDX_xxx constants and a place in the
# thr->strs[] array.
#
# - Objects which will need DUK_BIDX_xxx constants and a place in the
# thr->builtins[] array.
#
import os
import sys
import re
import json
re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE)
re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE)
re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE)
re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE)
def main():
str_defs = {}
obj_defs = {}
for fn in sys.argv[1:]:
with open(fn, 'rb') as f:
d = f.read()
for m in re.finditer(re_str_stridx, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_str_heap, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_str_hthread, d):
str_defs[m.group(1)] = True
for m in re.finditer(re_obj_bidx, d):
obj_defs[m.group(1)] = True
str_used = []
for k in sorted(str_defs.keys()):
str_used.append('DUK_STRIDX_' + k)
obj_used = []
for k in sorted(obj_defs.keys()):
obj_used.append('DUK_BIDX_' + k)
doc = {
'used_stridx_defines': str_used,
'used_bidx_defines': obj_used,
'count_used_stridx_defines': len(str_used),
'count_used_bidx_defines': len(obj_used)
}
print(json.dumps(doc, indent=4))
if __name__ == '__main__':
main()

2
util/yaml2json.py → tools/yaml2json.py

@ -1,4 +1,4 @@
import os, sys, json, yaml
if __name__ == '__main__':
print(json.dumps(yaml.load(sys.stdin)))
print(json.dumps(yaml.load(sys.stdin)))

62
util/autofix_debuglog_calls.py

@ -26,47 +26,47 @@ import re
re_callsite = re.compile(r'^\s*(DUK_D+PRINT).*?;$')
wrappers = {
'DUK_DPRINT': 'DUK_D',
'DUK_DDPRINT': 'DUK_DD',
'DUK_DDDPRINT': 'DUK_DDD'
'DUK_DPRINT': 'DUK_D',
'DUK_DDPRINT': 'DUK_DD',
'DUK_DDDPRINT': 'DUK_DDD'
}
warnings = []
def process(filename):
f = open(filename, 'rb')
output = []
f = open(filename, 'rb')
output = []
linenumber = 0
fixes = 0
for line in f:
linenumber += 1
if 'DPRINT' not in line:
output.append(line)
continue
m = re_callsite.match(line)
if m is None:
output.append(line)
continue
log_macro = m.group(1)
log_wrapper = wrappers[log_macro]
line = line.replace(log_macro, log_wrapper + '(' + log_macro) # DUK_DPRINT( -> DUK_D(DUK_DPRINT(
line = line.replace(');', '));') # ...); -> ...));
output.append(line)
fixes += 1
linenumber = 0
fixes = 0
for line in f:
linenumber += 1
if 'DPRINT' not in line:
output.append(line)
continue
m = re_callsite.match(line)
if m is None:
output.append(line)
continue
log_macro = m.group(1)
log_wrapper = wrappers[log_macro]
line = line.replace(log_macro, log_wrapper + '(' + log_macro) # DUK_DPRINT( -> DUK_D(DUK_DPRINT(
line = line.replace(');', '));') # ...); -> ...));
output.append(line)
fixes += 1
f.close()
f.close()
if fixes > 0:
print '%s: %d fixes' % (filename, fixes)
if fixes > 0:
print '%s: %d fixes' % (filename, fixes)
f = open(filename, 'wb')
f.write(''.join(output))
f.close()
f = open(filename, 'wb')
f.write(''.join(output))
f.close()
def main():
for filename in sys.argv[1:]:
process(filename)
for filename in sys.argv[1:]:
process(filename)
if __name__ == '__main__':
main()
main()

722
util/check_code_policy.py

@ -13,17 +13,16 @@ import re
import optparse
class Problem:
filename = None
linenumber = None
line = None
reason = None
def __init__(self, filename, linenumber, line, reason):
self.filename = filename
self.linenumber = linenumber
self.line = line
self.reason = reason
filename = None
linenumber = None
line = None
reason = None
def __init__(self, filename, linenumber, line, reason):
self.filename = filename
self.linenumber = linenumber
self.line = line
self.reason = reason
re_debuglog_callsite = re.compile(r'^.*?(DUK_D+PRINT).*?$')
re_trailing_ws = re.compile(r'^.*?\s$')
@ -34,115 +33,117 @@ re_nonascii = re.compile(r'^.*?[\x80-\xff].*?$')
re_func_decl_or_def = re.compile(r'^(\w+)\s+(?:\w+\s+)*(\w+)\(.*?.*?$') # may not finish on same line
re_cpp_comment = re.compile(r'^.*?//.*?$')
fixmeString = 'FIX' + 'ME' # avoid triggering a code policy check warning :)
# These identifiers are wrapped in duk_config.h, and should only be used
# through the wrappers elsewhere.
rejected_plain_identifiers_list = [
# math classification
'fpclassify',
'signbit',
'isfinite',
'isnan',
'isinf',
'FP_NAN',
'FP_INFINITE',
'FP_ZERO',
'FP_SUBNORMAL',
'FP_NORMAL',
# math functions
'fabs',
'fmin',
'fmax',
'floor',
'ceil',
'fmod',
'pow',
'acos',
'asin',
'atan',
'atan2',
'sin',
'cos',
'tan',
'exp',
'log',
'sqrt',
# memory functions
'malloc',
'realloc',
'calloc',
'free',
'memcpy',
'memmove',
'memcmp',
'memset',
# string functions
'strlen',
'strcmp',
'strncmp',
'printf',
'fprintf',
'sprintf',
'_snprintf',
'snprintf',
'vsprintf',
'_vsnprintf',
'vsnprintf',
'sscanf',
'vsscanf',
# streams
'stdout',
'stderr',
'stdin',
# file ops
'fopen',
'fclose',
'fread',
'fwrite',
'fseek',
'ftell',
'fflush',
'fputc',
# misc
'abort',
'exit',
'setjmp',
'longjmp',
# variable/argument names which have shadowing issues with platform headers
# see e.g. https://github.com/svaarala/duktape/pull/810
'index',
'rindex',
# for consistency avoid these too, use obj_idx rather than obj_index, etc
'obj_index',
'from_index',
'to_index',
'arr_index',
'uindex',
# math classification
'fpclassify',
'signbit',
'isfinite',
'isnan',
'isinf',
'FP_NAN',
'FP_INFINITE',
'FP_ZERO',
'FP_SUBNORMAL',
'FP_NORMAL',
# math functions
'fabs',
'fmin',
'fmax',
'floor',
'ceil',
'fmod',
'pow',
'acos',
'asin',
'atan',
'atan2',
'sin',
'cos',
'tan',
'exp',
'log',
'sqrt',
# memory functions
'malloc',
'realloc',
'calloc',
'free',
'memcpy',
'memmove',
'memcmp',
'memset',
# string functions
'strlen',
'strcmp',
'strncmp',
'printf',
'fprintf',
'sprintf',
'_snprintf',
'snprintf',
'vsprintf',
'_vsnprintf',
'vsnprintf',
'sscanf',
'vsscanf',
# streams
'stdout',
'stderr',
'stdin',
# file ops
'fopen',
'fclose',
'fread',
'fwrite',
'fseek',
'ftell',
'fflush',
'fputc',
# misc
'abort',
'exit',
'setjmp',
'longjmp',
# variable/argument names which have shadowing issues with platform headers
# see e.g. https://github.com/svaarala/duktape/pull/810
'index',
'rindex',
# for consistency avoid these too, use obj_idx rather than obj_index, etc
'obj_index',
'from_index',
'to_index',
'arr_index',
'uindex',
]
rejected_plain_identifiers = {}
for id in rejected_plain_identifiers_list:
rejected_plain_identifiers[id] = True
rejected_plain_identifiers[id] = True
debuglog_wrappers = {
'DUK_DPRINT': 'DUK_D',
'DUK_DDPRINT': 'DUK_DD',
'DUK_DDDPRINT': 'DUK_DDD'
'DUK_DPRINT': 'DUK_D',
'DUK_DDPRINT': 'DUK_DD',
'DUK_DDDPRINT': 'DUK_DDD'
}
allowed_visibility_macros = [
'DUK_EXTERNAL_DECL',
'DUK_EXTERNAL',
'DUK_INTERNAL_DECL',
'DUK_INTERNAL',
'DUK_LOCAL_DECL',
'DUK_LOCAL'
'DUK_EXTERNAL_DECL',
'DUK_EXTERNAL',
'DUK_INTERNAL_DECL',
'DUK_INTERNAL',
'DUK_LOCAL_DECL',
'DUK_LOCAL'
]
problems = []
@ -155,305 +156,318 @@ re_repl_expect_strings = re.compile(r'/\*===.*?===*?\*/', re.DOTALL)
re_not_newline = re.compile(r'[^\n]+', re.DOTALL)
def repl_c(m):
tmp = re.sub(re_not_newline, '', m.group(0))
if tmp == '':
tmp = ' ' # avoid /**/
return '/*' + tmp + '*/'
tmp = re.sub(re_not_newline, '', m.group(0))
if tmp == '':
tmp = ' ' # avoid /**/
return '/*' + tmp + '*/'
def repl_cpp(m):
return '// removed\n'
return '// removed\n'
def repl_dquot(m):
return '"' + ('.' * (len(m.group(0)) - 2)) + '"'
return '"' + ('.' * (len(m.group(0)) - 2)) + '"'
def repl_squot(m):
return "'" + ('.' * (len(m.group(0)) - 2)) + "'"
return "'" + ('.' * (len(m.group(0)) - 2)) + "'"
def removeLiterals(data):
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
def removeCCommentsAndLiterals(data):
data = re.sub(re_repl_c_comments, repl_c, data)
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
data = re.sub(re_repl_c_comments, repl_c, data)
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
def removeAnyCommentsAndLiterals(data):
data = re.sub(re_repl_c_comments, repl_c, data)
data = re.sub(re_repl_cpp_comments, repl_cpp, data)
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
data = re.sub(re_repl_c_comments, repl_c, data)
data = re.sub(re_repl_cpp_comments, repl_cpp, data)
data = re.sub(re_repl_string_literals_dquot, repl_dquot, data)
data = re.sub(re_repl_string_literals_squot, repl_squot, data)
return data
def removeExpectStrings(data):
def repl(m):
tmp = re.sub(re_not_newline, '', m.group(0))
if tmp == '':
tmp = ' ' # avoid /*======*/
return '/*===' + tmp + '===*/'
def repl(m):
tmp = re.sub(re_not_newline, '', m.group(0))
if tmp == '':
tmp = ' ' # avoid /*======*/
return '/*===' + tmp + '===*/'
data = re.sub(re_repl_expect_strings, repl, data)
return data
data = re.sub(re_repl_expect_strings, repl, data)
return data
def checkDebugLogCalls(lines, idx, filename):
# Allowed debug log forms:
#
# DUK_D(DUK_DPRINT(...))
# DUK_DD(DUK_DDPRINT(...))
# DUK_DDD(DUK_DDDPRINT(...))
#
# The calls may span multiple lines, but the wrapper (DUK_D)
# and the log macro (DUK_DPRINT) must be on the same line.
line = lines[idx]
if 'DPRINT' not in line:
return
m = re_debuglog_callsite.match(line)
if m is None:
return
log_macro = m.group(1)
log_wrapper = debuglog_wrappers[log_macro]
if log_wrapper + '(' in line:
return
# exclude '#define DUK_DPRINT...' macros in duk_debug.h
if len(line) >= 1 and line[0] == '#':
return
# exclude a few comment lines in duk_debug.h
if len(line) >= 3 and line[0:3] == ' * ':
return
raise Exception('invalid debug log call form')
# Allowed debug log forms:
#
# DUK_D(DUK_DPRINT(...))
# DUK_DD(DUK_DDPRINT(...))
# DUK_DDD(DUK_DDDPRINT(...))
#
# The calls may span multiple lines, but the wrapper (DUK_D)
# and the log macro (DUK_DPRINT) must be on the same line.
line = lines[idx]
if 'DPRINT' not in line:
return
m = re_debuglog_callsite.match(line)
if m is None:
return
log_macro = m.group(1)
log_wrapper = debuglog_wrappers[log_macro]
if log_wrapper + '(' in line:
return
# exclude '#define DUK_DPRINT...' macros in duk_debug.h
if len(line) >= 1 and line[0] == '#':
return
# exclude a few comment lines in duk_debug.h
if len(line) >= 3 and line[0:3] == ' * ':
return
raise Exception('invalid debug log call form')
def checkTrailingWhitespace(lines, idx, filename):
line = lines[idx]
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
line = lines[idx]
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
m = re_trailing_ws.match(line)
if m is None:
return
m = re_trailing_ws.match(line)
if m is None:
return
raise Exception('trailing whitespace')
raise Exception('trailing whitespace')
def checkCarriageReturns(lines, idx, filename):
line = lines[idx]
if not '\x0d' in line:
return
line = lines[idx]
if not '\x0d' in line:
return
raise Exception('carriage return')
raise Exception('carriage return')
def checkMixedIndent(lines, idx, filename):
line = lines[idx]
if not '\x20\x09' in line:
return
line = lines[idx]
if not '\x20\x09' in line:
return
# Mixed tab/space are only allowed after non-whitespace characters
idx = line.index('\x20\x09')
tmp = line[0:idx]
m = re_only_ws.match(tmp)
if m is None:
return
raise Exception('mixed space/tab indent (idx %d)' % idx)
def checkTabIndent(lines, idx, filename):
line = lines[idx]
if not '\x09' in line:
return
# Mixed tab/space are only allowed after non-whitespace characters
idx = line.index('\x20\x09')
tmp = line[0:idx]
m = re_only_ws.match(tmp)
if m is None:
return
# Now just checks for presence of TAB characters which is fine for Python
# code (which this check is used for).
raise Exception('mixed space/tab indent (idx %d)' % idx)
raise Exception('tab indent (idx %d)' % idx)
def checkNonLeadingTab(lines, idx, filename):
line = lines[idx]
m = re_nonleading_tab.match(line)
if m is None:
return
line = lines[idx]
m = re_nonleading_tab.match(line)
if m is None:
return
raise Exception('non-leading tab (idx %d)' % idx)
raise Exception('non-leading tab (idx %d)' % idx)
def checkFixme(lines, idx, filename):
line = lines[idx]
if not 'FIXME' in line:
return
line = lines[idx]
if not fixmeString in line:
return
raise Exception('FIXME on line')
raise Exception(fixmeString + ' on line')
def checkIdentifiers(lines, idx, filename):
line = lines[idx]
# XXX: this now executes for every line which is pointless
bn = os.path.basename(filename)
excludePlain = (bn[0:5] == 'test-')
line = lines[idx]
# XXX: this now executes for every line which is pointless
bn = os.path.basename(filename)
excludePlain = (bn[0:5] == 'test-')
for m in re.finditer(re_identifier, line):
if rejected_plain_identifiers.has_key(m.group(0)):
if not excludePlain:
raise Exception('invalid identifier %r (perhaps plain)' % m.group(0))
for m in re.finditer(re_identifier, line):
if rejected_plain_identifiers.has_key(m.group(0)):
if not excludePlain:
raise Exception('invalid identifier %r (perhaps plain)' % m.group(0))
def checkNonAscii(lines, idx, filename):
line = lines[idx]
m = re_nonascii.match(line)
if m is None:
return
bn = os.path.basename(filename)
if bn == 'test-lex-utf8.js':
# this specific file is intentionally exempt
pass
else:
raise Exception('non-ascii character')
line = lines[idx]
m = re_nonascii.match(line)
if m is None:
return
bn = os.path.basename(filename)
if bn == 'test-lex-utf8.js':
# this specific file is intentionally exempt
pass
else:
raise Exception('non-ascii character')
def checkNoSymbolVisibility(lines, idx, filename):
line = lines[idx]
line = lines[idx]
# Workaround for DUK_ALWAYS_INLINE preceding a declaration
# (e.g. "DUK_ALWAYS_INLINE DUK_LOCAL ...")
if line.startswith('DUK_ALWAYS_INLINE '):
line = line[18:]
# Workaround for DUK_ALWAYS_INLINE preceding a declaration
# (e.g. "DUK_ALWAYS_INLINE DUK_LOCAL ...")
if line.startswith('DUK_ALWAYS_INLINE '):
line = line[18:]
m = re_func_decl_or_def.match(line)
if m is None:
return
m = re_func_decl_or_def.match(line)
if m is None:
return
bn = os.path.basename(filename)
if not ((bn[-2:] == '.c' or bn[-2:] == '.h' or bn[-5:] == '.h.in') and bn[0:5] != 'test-'):
# Apply to only specific files in src/
return
bn = os.path.basename(filename)
if not ((bn[-2:] == '.c' or bn[-2:] == '.h' or bn[-5:] == '.h.in') and bn[0:5] != 'test-'):
# Apply to only specific files in src/
return
if m.group(1) in allowed_visibility_macros and \
not ((m.group(1) != 'DUK_LOCAL' and m.group(1) != 'DUK_LOCAL_DECL') and 'duk__' in m.group(2)) and \
not ((m.group(1) == 'DUK_LOCAL' or m.group(1) == 'DUK_LOCAL_DECL') and 'duk__' not in m.group(2)):
return
if m.group(1) in allowed_visibility_macros and \
not ((m.group(1) != 'DUK_LOCAL' and m.group(1) != 'DUK_LOCAL_DECL') and 'duk__' in m.group(2)) and \
not ((m.group(1) == 'DUK_LOCAL' or m.group(1) == 'DUK_LOCAL_DECL') and 'duk__' not in m.group(2)):
return
# Previous line may contain the declaration (alone)
if idx > 0 and lines[idx - 1].strip() in allowed_visibility_macros:
return
# Previous line may contain the declaration (alone)
if idx > 0 and lines[idx - 1].strip() in allowed_visibility_macros:
return
# Special exceptions
# (None now)
# Special exceptions
# (None now)
raise Exception('missing symbol visibility macro')
raise Exception('missing symbol visibility macro')
def checkCppComment(lines, idx, filename):
line = lines[idx]
m = re_cpp_comment.match(line)
if m is None:
return
line = lines[idx]
m = re_cpp_comment.match(line)
if m is None:
return
raise Exception('c++ comment')
raise Exception('c++ comment')
def processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings):
f = open(filename, 'rb')
dataRaw = f.read()
f.close()
dataNoCommentsOrLiterals = removeAnyCommentsAndLiterals(dataRaw) # no C/javascript comments, literals removed
dataNoCCommentsOrLiterals = removeCCommentsAndLiterals(dataRaw) # no C comments, literals removed
dataNoExpectStrings = removeExpectStrings(dataRaw) # no testcase expect strings
linesRaw = dataRaw.split('\n')
linesNoCommentsOrLiterals = dataNoCommentsOrLiterals.split('\n')
linesNoCCommentsOrLiterals = dataNoCCommentsOrLiterals.split('\n')
linesNoExpectStrings = dataNoExpectStrings.split('\n')
def f(lines, checkers):
for linenumber in xrange(len(lines)):
for fun in checkers:
try:
fun(lines, linenumber, filename) # linenumber is zero-based here
except Exception as e:
problems.append(Problem(filename, linenumber + 1, lines[linenumber], str(e)))
f(linesRaw, checkersRaw)
f(linesNoCommentsOrLiterals, checkersNoCommentsOrLiterals)
f(linesNoCCommentsOrLiterals, checkersNoCCommentsOrLiterals)
f(linesNoExpectStrings, checkersNoExpectStrings)
# Last line should have a newline, and there should not be an empty line.
# The 'split' result will have one empty string as its last item in the
# expected case. For a single line file there will be two split results
# (the line itself, and an empty string).
if len(linesRaw) == 0 or \
len(linesRaw) == 1 and linesRaw[-1] != '' or \
len(linesRaw) >= 2 and linesRaw[-1] != '' or \
len(linesRaw) >= 2 and linesRaw[-1] == '' and linesRaw[-2] == '':
problems.append(Problem(filename, len(linesRaw), '(no line)', 'No newline on last line or empty line at end of file'))
# First line should not be empty (unless it's the only line, len(linesRaw)==2)
if len(linesRaw) > 2 and linesRaw[0] == '':
problems.append(Problem(filename, 1, '(no line)', 'First line is empty'))
f = open(filename, 'rb')
dataRaw = f.read()
f.close()
dataNoCommentsOrLiterals = removeAnyCommentsAndLiterals(dataRaw) # no C/javascript comments, literals removed
dataNoCCommentsOrLiterals = removeCCommentsAndLiterals(dataRaw) # no C comments, literals removed
dataNoExpectStrings = removeExpectStrings(dataRaw) # no testcase expect strings
linesRaw = dataRaw.split('\n')
linesNoCommentsOrLiterals = dataNoCommentsOrLiterals.split('\n')
linesNoCCommentsOrLiterals = dataNoCCommentsOrLiterals.split('\n')
linesNoExpectStrings = dataNoExpectStrings.split('\n')
def f(lines, checkers):
for linenumber in xrange(len(lines)):
for fun in checkers:
try:
fun(lines, linenumber, filename) # linenumber is zero-based here
except Exception as e:
problems.append(Problem(filename, linenumber + 1, lines[linenumber], str(e)))
f(linesRaw, checkersRaw)
f(linesNoCommentsOrLiterals, checkersNoCommentsOrLiterals)
f(linesNoCCommentsOrLiterals, checkersNoCCommentsOrLiterals)
f(linesNoExpectStrings, checkersNoExpectStrings)
# Last line should have a newline, and there should not be an empty line.
# The 'split' result will have one empty string as its last item in the
# expected case. For a single line file there will be two split results
# (the line itself, and an empty string).
if len(linesRaw) == 0 or \
len(linesRaw) == 1 and linesRaw[-1] != '' or \
len(linesRaw) >= 2 and linesRaw[-1] != '' or \
len(linesRaw) >= 2 and linesRaw[-1] == '' and linesRaw[-2] == '':
problems.append(Problem(filename, len(linesRaw), '(no line)', 'No newline on last line or empty line at end of file'))
# First line should not be empty (unless it's the only line, len(linesRaw)==2)
if len(linesRaw) > 2 and linesRaw[0] == '':
problems.append(Problem(filename, 1, '(no line)', 'First line is empty'))
def asciiOnly(x):
return re.sub(r'[\x80-\xff]', '#', x)
return re.sub(r'[\x80-\xff]', '#', x)
def main():
parser = optparse.OptionParser()
parser.add_option('--dump-vim-commands', dest='dump_vim_commands', default=False, help='Dump oneline vim command')
parser.add_option('--check-debug-log-calls', dest='check_debug_log_calls', action='store_true', default=False, help='Check debug log call consistency')
parser.add_option('--check-carriage-returns', dest='check_carriage_returns', action='store_true', default=False, help='Check carriage returns')
parser.add_option('--check-fixme', dest='check_fixme', action='store_true', default=False, help='Check FIXME tags')
parser.add_option('--check-non-ascii', dest='check_non_ascii', action='store_true', default=False, help='Check non-ASCII characters')
parser.add_option('--check-no-symbol-visibility', dest='check_no_symbol_visibility', action='store_true', default=False, help='Check for missing symbol visibility macros')
parser.add_option('--check-rejected-identifiers', dest='check_rejected_identifiers', action='store_true', default=False, help='Check for rejected identifiers like plain "printf()" calls')
parser.add_option('--check-trailing-whitespace', dest='check_trailing_whitespace', action='store_true', default=False, help='Check for trailing whitespace')
parser.add_option('--check-mixed-indent', dest='check_mixed_indent', action='store_true', default=False, help='Check for mixed indent (space and tabs)')
parser.add_option('--check-nonleading-tab', dest='check_nonleading_tab', action='store_true', default=False, help='Check for non-leading tab characters')
parser.add_option('--check-cpp-comment', dest='check_cpp_comment', action='store_true', default=False, help='Check for c++ comments ("// ...")')
parser.add_option('--fail-on-errors', dest='fail_on_errors', action='store_true', default=False, help='Fail on errors (exit code != 0)')
(opts, args) = parser.parse_args()
checkersRaw = []
if opts.check_debug_log_calls:
checkersRaw.append(checkDebugLogCalls)
if opts.check_carriage_returns:
checkersRaw.append(checkCarriageReturns)
if opts.check_fixme:
checkersRaw.append(checkFixme)
if opts.check_non_ascii:
checkersRaw.append(checkNonAscii)
if opts.check_no_symbol_visibility:
checkersRaw.append(checkNoSymbolVisibility)
checkersNoCCommentsOrLiterals = []
if opts.check_cpp_comment:
checkersNoCCommentsOrLiterals.append(checkCppComment)
checkersNoCommentsOrLiterals = []
if opts.check_rejected_identifiers:
checkersNoCommentsOrLiterals.append(checkIdentifiers)
checkersNoExpectStrings = []
if opts.check_trailing_whitespace:
checkersNoExpectStrings.append(checkTrailingWhitespace)
if opts.check_mixed_indent:
checkersNoExpectStrings.append(checkMixedIndent)
if opts.check_nonleading_tab:
checkersNoExpectStrings.append(checkNonLeadingTab)
for filename in args:
processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings)
if len(problems) > 0:
for i in problems:
tmp = 'vim +' + str(i.linenumber)
while len(tmp) < 10:
tmp = tmp + ' '
tmp += ' ' + str(i.filename) + ' : ' + str(i.reason)
while len(tmp) < 80:
tmp = tmp + ' '
tmp += ' - ' + asciiOnly(i.line.strip())
print(tmp)
print '*** Total: %d problems' % len(problems)
if opts.dump_vim_commands:
cmds = []
for i in problems:
cmds.append('vim +' + str(i.linenumber) + ' "' + i.filename + '"')
print ''
print('; '.join(cmds))
if opts.fail_on_errors:
sys.exit(1)
sys.exit(0)
parser = optparse.OptionParser()
parser.add_option('--dump-vim-commands', dest='dump_vim_commands', default=False, help='Dump oneline vim command')
parser.add_option('--check-debug-log-calls', dest='check_debug_log_calls', action='store_true', default=False, help='Check debug log call consistency')
parser.add_option('--check-carriage-returns', dest='check_carriage_returns', action='store_true', default=False, help='Check carriage returns')
parser.add_option('--check-fixme', dest='check_fixme', action='store_true', default=False, help='Check ' + fixmeString + ' tags')
parser.add_option('--check-non-ascii', dest='check_non_ascii', action='store_true', default=False, help='Check non-ASCII characters')
parser.add_option('--check-no-symbol-visibility', dest='check_no_symbol_visibility', action='store_true', default=False, help='Check for missing symbol visibility macros')
parser.add_option('--check-rejected-identifiers', dest='check_rejected_identifiers', action='store_true', default=False, help='Check for rejected identifiers like plain "printf()" calls')
parser.add_option('--check-trailing-whitespace', dest='check_trailing_whitespace', action='store_true', default=False, help='Check for trailing whitespace')
parser.add_option('--check-mixed-indent', dest='check_mixed_indent', action='store_true', default=False, help='Check for mixed indent (space and tabs)')
parser.add_option('--check-tab-indent', dest='check_tab_indent', action='store_true', default=False, help='Check for tab indent')
parser.add_option('--check-nonleading-tab', dest='check_nonleading_tab', action='store_true', default=False, help='Check for non-leading tab characters')
parser.add_option('--check-cpp-comment', dest='check_cpp_comment', action='store_true', default=False, help='Check for c++ comments ("// ...")')
parser.add_option('--fail-on-errors', dest='fail_on_errors', action='store_true', default=False, help='Fail on errors (exit code != 0)')
(opts, args) = parser.parse_args()
checkersRaw = []
if opts.check_debug_log_calls:
checkersRaw.append(checkDebugLogCalls)
if opts.check_carriage_returns:
checkersRaw.append(checkCarriageReturns)
if opts.check_fixme:
checkersRaw.append(checkFixme)
if opts.check_non_ascii:
checkersRaw.append(checkNonAscii)
if opts.check_no_symbol_visibility:
checkersRaw.append(checkNoSymbolVisibility)
checkersNoCCommentsOrLiterals = []
if opts.check_cpp_comment:
checkersNoCCommentsOrLiterals.append(checkCppComment)
checkersNoCommentsOrLiterals = []
if opts.check_rejected_identifiers:
checkersNoCommentsOrLiterals.append(checkIdentifiers)
checkersNoExpectStrings = []
if opts.check_trailing_whitespace:
checkersNoExpectStrings.append(checkTrailingWhitespace)
if opts.check_mixed_indent:
checkersNoExpectStrings.append(checkMixedIndent)
if opts.check_tab_indent:
checkersNoExpectStrings.append(checkTabIndent)
if opts.check_nonleading_tab:
checkersNoExpectStrings.append(checkNonLeadingTab)
for filename in args:
processFile(filename, checkersRaw, checkersNoCommentsOrLiterals, checkersNoCCommentsOrLiterals, checkersNoExpectStrings)
if len(problems) > 0:
for i in problems:
tmp = 'vim +' + str(i.linenumber)
while len(tmp) < 10:
tmp = tmp + ' '
tmp += ' ' + str(i.filename) + ' : ' + str(i.reason)
while len(tmp) < 80:
tmp = tmp + ' '
tmp += ' - ' + asciiOnly(i.line.strip())
print(tmp)
print '*** Total: %d problems' % len(problems)
if opts.dump_vim_commands:
cmds = []
for i in problems:
cmds.append('vim +' + str(i.linenumber) + ' "' + i.filename + '"')
print ''
print('; '.join(cmds))
if opts.fail_on_errors:
sys.exit(1)
sys.exit(0)
if __name__ == '__main__':
main()
main()

257
util/combine_src.py

@ -1,257 +0,0 @@
#!/usr/bin/env python2
#
# Combine a set of a source files into a single C file.
#
# Overview of the process:
#
# * Parse user supplied C files. Add automatic #undefs at the end
# of each C file to avoid defined bleeding from one file to another.
#
# * Combine the C files in specified order. If sources have ordering
# dependencies (depends on application), order may matter.
#
# * Process #include statements in the combined source, categorizing
# them either as "internal" (found in specified include path) or
# "external". Internal includes, unless explicitly excluded, are
# inlined into the result while extenal includes are left as is.
# Duplicate #include statements are replaced with a comment.
#
# At every step, source and header lines are represented with explicit
# line objects which keep track of original filename and line. The
# output contains #line directives, if necessary, to ensure error
# throwing and other diagnostic info will work in a useful manner when
# deployed. It's also possible to generate a combined source with no
# #line directives.
#
# Making the process deterministic is important, so that if users have
# diffs that they apply to the combined source, such diffs would apply
# for as long as possible.
#
# Limitations and notes:
#
# * While there are automatic #undef's for #define's introduced in each
# C file, it's not possible to "undefine" structs, unions, etc. If
# there are structs/unions/typedefs with conflicting names, these
# have to be resolved in the source files first.
#
# * Because duplicate #include statements are suppressed, currently
# assumes #include statements are not conditional.
#
# * A system header might be #include'd in multiple source files with
# different feature defines (like _BSD_SOURCE). Because the #include
# file will only appear once in the resulting source, the first
# occurrence wins. The result may not work correctly if the feature
# defines must actually be different between two or more source files.
#
import os
import sys
import re
import json
import optparse
# Include path for finding include files which are amalgamated.
include_paths = []
# Include files specifically excluded from being inlined.
include_excluded = []
class File:
filename_full = None
filename = None
lines = None
def __init__(self, filename, lines):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lines = lines
class Line:
filename_full = None
filename = None
lineno = None
data = None
def __init__(self, filename, lineno, data):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lineno = lineno
self.data = data
def readFile(filename):
lines = []
with open(filename, 'rb') as f:
lineno = 0
for line in f:
lineno += 1
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
lines.append(Line(filename, lineno, line))
return File(filename, lines)
def lookupInclude(incfn):
re_sep = re.compile(r'/|\\')
inccomp = re.split(re_sep, incfn) # split include path, support / and \
for path in include_paths:
fn = apply(os.path.join, [ path ] + inccomp)
if os.path.exists(fn):
return fn # Return full path to first match
return None
def addAutomaticUndefs(f):
defined = {}
re_def = re.compile(r'#define\s+(\w+).*$')
re_undef = re.compile(r'#undef\s+(\w+).*$')
for line in f.lines:
m = re_def.match(line.data)
if m is not None:
#print('DEFINED: %s' % repr(m.group(1)))
defined[m.group(1)] = True
m = re_undef.match(line.data)
if m is not None:
# Could just ignore #undef's here: we'd then emit
# reliable #undef's (though maybe duplicates) at
# the end.
#print('UNDEFINED: %s' % repr(m.group(1)))
if defined.has_key(m.group(1)):
del defined[m.group(1)]
# Undefine anything that seems to be left defined. This not a 100%
# process because some #undef's might be conditional which we don't
# track at the moment. Note that it's safe to #undef something that's
# not defined.
keys = sorted(defined.keys()) # deterministic order
if len(keys) > 0:
#print('STILL DEFINED: %r' % repr(defined.keys()))
f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
for k in keys:
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))
def createCombined(files, prologue_filename, line_directives):
res = []
line_map = [] # indicate combined source lines where uncombined file/line would change
metadata = {
'line_map': line_map
}
emit_state = [ None, None ] # curr_filename, curr_lineno
def emit(line):
if isinstance(line, (str, unicode)):
res.append(line)
emit_state[1] += 1
else:
if line.filename != emit_state[0] or line.lineno != emit_state[1]:
if line_directives:
res.append('#line %d "%s"' % (line.lineno, line.filename))
line_map.append({ 'original_file': line.filename,
'original_line': line.lineno,
'combined_line': len(res) + 1 })
res.append(line.data)
emit_state[0] = line.filename
emit_state[1] = line.lineno + 1
included = {} # headers already included
if prologue_filename is not None:
with open(prologue_filename, 'rb') as f:
for line in f.read().split('\n'):
res.append(line)
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')
# Process a file, appending it to the result; the input may be a
# source or an include file. #include directives are handled
# recursively.
def processFile(f):
#print('Process file: ' + f.filename)
for line in f.lines:
if not line.data.startswith('#include'):
emit(line)
continue
m = re_inc.match(line.data)
if m is None:
raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
incpath = m.group(2)
if incpath in include_excluded:
# Specific include files excluded from the
# inlining / duplicate suppression process.
emit(line) # keep as is
continue
if included.has_key(incpath):
# We suppress duplicate includes, both internal and
# external, based on the assumption that includes are
# not behind #ifdef checks. This is the case for
# Duktape (except for the include files excluded).
emit('/* #include %s -> already included */' % incpath)
continue
included[incpath] = True
# An include file is considered "internal" and is amalgamated
# if it is found in the include path provided by the user.
incfile = lookupInclude(incpath)
if incfile is not None:
#print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
emit('/* #include %s */' % incpath)
processFile(readFile(incfile))
else:
#print('Include considered external: %s' % repr(line.data))
emit(line) # keep as is
for f in files:
processFile(f)
return '\n'.join(res) + '\n', metadata
def main():
global include_paths, include_excluded
parser = optparse.OptionParser()
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
parser.add_option('--output-source', dest='output_source', help='Output source filename')
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
(opts, args) = parser.parse_args()
assert(opts.include_paths is not None)
include_paths = opts.include_paths # global for easy access
include_excluded = opts.include_excluded
assert(opts.output_source)
assert(opts.output_metadata)
print('Read input files, add automatic #undefs')
sources = args
files = []
for fn in sources:
res = readFile(fn)
#print('Add automatic undefs for: ' + fn)
addAutomaticUndefs(res)
files.append(res)
print('Create combined source file from %d source files' % len(files))
combined_source, metadata = \
createCombined(files, opts.prologue, opts.line_directives)
with open(opts.output_source, 'wb') as f:
f.write(combined_source)
with open(opts.output_metadata, 'wb') as f:
f.write(json.dumps(metadata, indent=4))
print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source))
if __name__ == '__main__':
main()

246
util/create_spdx_license.py

@ -1,246 +0,0 @@
#!/usr/bin/env python2
#
# Helper to create an SPDX license file (http://spdx.org)
#
# This must be executed when the dist/ directory is otherwise complete,
# except for the SPDX license, so that the file lists and such contained
# in the SPDX license will be correct.
#
# The utility outputs RDF/XML to specified file:
#
# $ python create_spdx_license.py /tmp/license.spdx
#
# Then, validate with SPDXViewer and SPDXTools:
#
# $ java -jar SPDXViewer.jar /tmp/license.spdx
# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html
#
# Finally, copy to dist:
#
# $ cp /tmp/license.spdx dist/license.spdx
#
# SPDX FAQ indicates there is no standard extension for an SPDX license file
# but '.spdx' is a common practice.
#
# The algorithm to compute a "verification code", implemented in this file,
# can be verified as follows:
#
# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz
# $ cd /tmp
# $ tar xvfJ duktape-N.N.N.tar.xz
# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code
# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/
#
# Compare the resulting verification code manually with the one in license.spdx.
#
# Resources:
#
# - http://spdx.org/about-spdx/faqs
# - http://wiki.spdx.org/view/Technical_Team/Best_Practices
#
import os
import sys
import re
import datetime
import sha
import rdflib
from rdflib import URIRef, BNode, Literal, Namespace
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
XSD = Namespace('http://www.w3.org/2001/XMLSchema#')
SPDX = Namespace('http://spdx.org/rdf/terms#')
DOAP = Namespace('http://usefulinc.com/ns/doap#')
DUKTAPE = Namespace('http://duktape.org/rdf/terms#')
def checksumFile(g, filename):
f = open(filename, 'rb')
d = f.read()
f.close()
shasum = sha.sha(d).digest().encode('hex').lower()
csum_node = BNode()
g.add((csum_node, RDF.type, SPDX.Checksum))
g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1))
g.add((csum_node, SPDX.checksumValue, Literal(shasum)))
return csum_node
def computePackageVerification(g, dirname, excluded):
# SPDX 1.2 Section 4.7
# The SPDXTools command "GenerateVerificationCode" can be used to
# check the verification codes created. Note that you must manually
# remove "license.spdx" from the unpacked dist directory before
# computing the verification code.
verify_node = BNode()
hashes = []
for dirpath, dirnames, filenames in os.walk(dirname):
for fn in filenames:
full_fn = os.path.join(dirpath, fn)
f = open(full_fn, 'rb')
d = f.read()
f.close()
if full_fn in excluded:
#print('excluded in verification: ' + full_fn)
continue
#print('included in verification: ' + full_fn)
file_sha1 = sha.sha(d).digest().encode('hex').lower()
hashes.append(file_sha1)
#print(repr(hashes))
hashes.sort()
#print(repr(hashes))
verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower()
for fn in excluded:
g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn)))
g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code)))
return verify_node
def fileType(filename):
ign, ext = os.path.splitext(filename)
if ext in [ '.c', '.h', '.js' ]:
return SPDX.fileType_source
else:
return SPDX.fileType_other
def getDuktapeVersion():
f = open('./src/duktape.h')
re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$')
for line in f:
line = line.strip()
m = re_ver.match(line)
if m is None:
continue
ver = int(m.group(1))
return '%d.%d.%d' % ((ver / 10000) % 100,
(ver / 100) % 100,
ver % 100)
raise Exception('could not figure out Duktape version')
def main():
outfile = sys.argv[1]
if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'):
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built')
sys.exit(1)
os.chdir('dist')
if not os.path.exists('Makefile.cmdline'):
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built')
sys.exit(1)
duktape_version = getDuktapeVersion()
duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz'
now = datetime.datetime.utcnow()
now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second)
creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime)
duktape_org = Literal('Organization: duktape.org')
mit_license = URIRef('http://spdx.org/licenses/MIT')
duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)')
g = rdflib.Graph()
crea_node = BNode()
g.add((crea_node, RDF.type, SPDX.CreationInfo))
g.add((crea_node, RDFS.comment, Literal('')))
g.add((crea_node, SPDX.creator, duktape_org))
g.add((crea_node, SPDX.created, creation_date))
g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/
# 'name' should not include a version number (see best practices)
pkg_node = BNode()
g.add((pkg_node, RDF.type, SPDX.Package))
g.add((pkg_node, SPDX.name, Literal('Duktape')))
g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version)))
g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname)))
g.add((pkg_node, SPDX.supplier, duktape_org))
g.add((pkg_node, SPDX.originator, duktape_org))
g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI)))
g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI)))
verify_node = computePackageVerification(g, '.', [ './license.spdx' ])
g.add((pkg_node, SPDX.packageVerificationCode, verify_node))
# SPDX.checksum: omitted because license is inside the package
g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.')))
# NOTE: MIT license alone is sufficient for now, because Duktape, Lua,
# Murmurhash2, and CommonJS (though probably not even relevant for
# licensing) are all MIT.
g.add((pkg_node, SPDX.licenseConcluded, mit_license))
g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license))
g.add((pkg_node, SPDX.licenseDeclared, mit_license))
g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.')))
g.add((pkg_node, SPDX.copyrightText, duktape_copyright))
g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter')))
g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint')))
# hasFile properties added separately below
#reviewed_node = BNode()
#g.add((reviewed_node, RDF.type, SPDX.Review))
#g.add((reviewed_node, SPDX.reviewer, XXX))
#g.add((reviewed_node, SPDX.reviewDate, XXX))
#g.add((reviewed_node, RDFS.comment, ''))
spdx_doc = BNode()
g.add((spdx_doc, RDF.type, SPDX.SpdxDocument))
g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2')))
g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0')))
g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version)))
g.add((spdx_doc, SPDX.creationInfo, crea_node))
g.add((spdx_doc, SPDX.describesPackage, pkg_node))
# SPDX.hasExtractedLicensingInfo
# SPDX.reviewed
# SPDX.referencesFile: added below
for dirpath, dirnames, filenames in os.walk('.'):
for fn in filenames:
full_fn = os.path.join(dirpath, fn)
#print('# file: ' + full_fn)
file_node = BNode()
g.add((file_node, RDF.type, SPDX.File))
g.add((file_node, SPDX.fileName, Literal(full_fn)))
g.add((file_node, SPDX.fileType, fileType(full_fn)))
g.add((file_node, SPDX.checksum, checksumFile(g, full_fn)))
# Here we assume that LICENSE.txt provides the actual "in file"
# licensing information, and everything else is implicitly under
# MIT license.
g.add((file_node, SPDX.licenseConcluded, mit_license))
if full_fn == './LICENSE.txt':
g.add((file_node, SPDX.licenseInfoInFile, mit_license))
else:
g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none)))
# SPDX.licenseComments
g.add((file_node, SPDX.copyrightText, duktape_copyright))
# SPDX.noticeText
# SPDX.artifactOf
# SPDX.fileDependency
# SPDX.fileContributor
# XXX: should referencesFile include all files?
g.add((spdx_doc, SPDX.referencesFile, file_node))
g.add((pkg_node, SPDX.hasFile, file_node))
# Serialize into RDF/XML directly. We could also serialize into
# N-Triples and use external tools (like 'rapper') to get cleaner,
# abbreviated output.
#print('# Duktape SPDX license file (autogenerated)')
#print(g.serialize(format='turtle'))
#print(g.serialize(format='nt'))
f = open(outfile, 'wb')
#f.write(g.serialize(format='rdf/xml'))
f.write(g.serialize(format='xml'))
f.close()
if __name__ == '__main__':
main()

30
util/ditz_hack.py

@ -1,30 +0,0 @@
#!/usr/bin/env python2
#
# Throwaway utility to dump Ditz issues for grooming.
#
import os
import sys
import yaml
def main():
def issueConstructor(loader, node):
return node
yaml.add_constructor('!ditz.rubyforge.org,2008-03-06/issue', issueConstructor)
for fn in os.listdir(sys.argv[1]):
if fn[0:6] != 'issue-':
continue
with open(os.path.join(sys.argv[1], fn), 'rb') as f:
doc = yaml.load(f)
tmp = {}
for k,v in doc.value:
tmp[k.value] = v.value
if tmp.get('status', '') != ':closed':
print('*** ' + fn)
print(tmp.get('title', u'NOTITLE').encode('utf-8') + '\n')
print(tmp.get('desc', u'').encode('utf-8') + '\n')
if __name__ == '__main__':
main()

49
util/duk_meta_to_strarray.py

@ -1,49 +0,0 @@
#!/usr/bin/env python2
#
# Create an array of C strings with Duktape built-in strings.
# Useful when using external strings.
#
import os
import sys
import json
def to_c_string(x):
res = '"'
term = False
for i, c in enumerate(x):
if term:
term = False
res += '" "'
o = ord(c)
if o < 0x20 or o > 0x7e or c in '\'"\\':
# Terminate C string so that escape doesn't become
# ambiguous
res += '\\x%02x' % o
term = True
else:
res += c
res += '"'
return res
def main():
f = open(sys.argv[1], 'rb')
d = f.read()
f.close()
meta = json.loads(d)
print('const char *duk_builtin_strings[] = {')
strlist = meta['builtin_strings_base64']
for i in xrange(len(strlist)):
s = strlist[i]
if i == len(strlist) - 1:
print(' %s' % to_c_string(s.decode('base64')))
else:
print(' %s,' % to_c_string(s.decode('base64')))
print('};')
if __name__ == '__main__':
main()

130
util/dump_bytecode.py

@ -1,130 +0,0 @@
#!/usr/bin/env python2
#
# Utility to dump bytecode into a human readable form.
#
import os
import sys
import struct
import optparse
def decode_string(buf, off):
strlen, = struct.unpack('>L', buf[off:off+4])
off += 4
strdata = buf[off:off+strlen]
off += strlen
return off, strdata
def sanitize_string(val):
# Don't try to UTF-8 decode, just escape non-printable ASCII.
def f(c):
if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"':
return '\\x%02x' % ord(c)
else:
return c
return "'" + ''.join(map(f, val)) + "'"
def decode_sanitize_string(buf, off):
off, val = decode_string(buf, off)
return off, sanitize_string(val)
def dump_function(buf, off, ind):
count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12])
off += 12
print '%sInstructions: %d' % (ind, count_inst)
print '%sConstants: %d' % (ind, count_const)
print '%sInner functions: %d' % (ind, count_funcs)
nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12])
off += 12
print '%sNregs: %d' % (ind, nregs)
print '%sNargs: %d' % (ind, nargs)
print '%sStart line number: %d' % (ind, start_line)
print '%sEnd line number: %d' % (ind, end_line)
compfunc_flags, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags)
for i in xrange(count_inst):
ins, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s %06d: %08lx' % (ind, i, ins)
print '%sConstants:' % ind
for i in xrange(count_const):
const_type, = struct.unpack('B', buf[off:off+1])
off += 1
if const_type == 0x00:
off, strdata = decode_sanitize_string(buf, off)
print '%s %06d: %s' % (ind, i, strdata)
elif const_type == 0x01:
num, = struct.unpack('>d', buf[off:off+8])
off += 8
print '%s %06d: %f' % (ind, i, num)
else:
raise Exception('invalid constant type: %d' % const_type)
for i in xrange(count_funcs):
print '%sInner function %d:' % (ind, i)
off = dump_function(buf, off, ind + ' ')
val, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s.length: %d' % (ind, val)
off, val = decode_sanitize_string(buf, off)
print '%s.name: %s' % (ind, val)
off, val = decode_sanitize_string(buf, off)
print '%s.fileName: %s' % (ind, val)
off, val = decode_string(buf, off) # actually a buffer
print '%s._Pc2line: %s' % (ind, val.encode('hex'))
while True:
off, name = decode_string(buf, off)
if name == '':
break
name = sanitize_string(name)
val, = struct.unpack('>L', buf[off:off+4])
off += 4
print '%s_Varmap[%s] = %d' % (ind, name, val)
idx = 0
while True:
off, name = decode_string(buf, off)
if name == '':
break
name = sanitize_string(name)
print '%s_Formals[%d] = %s' % (ind, idx, name)
idx += 1
return off
def dump_bytecode(buf, off, ind):
sig, ver = struct.unpack('BB', buf[off:off+2])
off += 2
if sig != 0xff:
raise Exception('invalid signature byte: %d' % sig)
if ver != 0x00:
raise Exception('unsupported bytecode version: %d' % ver)
print '%sBytecode version: 0x%02x' % (ind, ver)
off = dump_function(buf, off, ind + ' ')
return off
def main():
parser = optparse.OptionParser()
parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump')
(opts, args) = parser.parse_args()
with open(args[0], 'rb') as f:
d = f.read()
if opts.hex_decode:
d = d.strip()
d = d.decode('hex')
dump_bytecode(d, 0, '')
if __name__ == '__main__':
main()

43
util/example_rombuild.sh

@ -6,28 +6,26 @@ set -e
PYTHON=`which python2 python | head -1`
# Run dist manually, ROM support is not enabled by default so add --rom-support.
# User builtin metadata can be provided through one or more YAML files (applied
make clean dist
# Prepare-and-config sources manually to enable ROM support. User builtin
# metadata can be provided through one or more YAML files (which are applied
# in sequence).
make clean
$PYTHON util/make_dist.py \
rm -rf dist/src dist/src-noline dist/src-separate
$PYTHON dist/tools/prepare_sources.py \
--source-directory dist/src-input \
--output-directory dist \
--rom-support \
--rom-auto-lightfunc \
--user-builtin-metadata util/example_user_builtins1.yaml \
--user-builtin-metadata util/example_user_builtins2.yaml
# Run genconfig.py and create a custom duk_config.h with ROM support etc.
$PYTHON config/genconfig.py \
--metadata config \
--output dist/src/duk_config.h \
--user-builtin-metadata util/example_user_builtins2.yaml \
--config-metadata dist/config/genconfig_metadata.tar.gz \
-DDUK_USE_ROM_STRINGS \
-DDUK_USE_ROM_OBJECTS \
-DDUK_USE_ROM_GLOBAL_INHERIT \
-DDUK_USE_DEBUG -DDUK_USE_DEBUG_LEVEL=0 \
--option-yaml 'DUK_USE_DEBUG_WRITE: { "verbatim": "#define DUK_USE_DEBUG_WRITE(level,file,line,func,msg) do {fprintf(stderr, \"%ld %s:%ld (%s): %s\\n\", (long) (level), (file), (long) (line), (func), (msg)); } while(0)" }' \
-DDUK_USE_ASSERTIONS \
autodetect-header
cp dist/src/duk_config.h dist/src-separate/
-DDUK_USE_ASSERTIONS
#gcc -std=c99 -Wall -Wextra -Os -Idist/src-separate/ -Idist/examples/cmdline dist/src-separate/*.c dist/examples/cmdline/duk_cmdline.c -o _duk -lm
make duk dukd # XXX: currently fails to start, DUK_CMDLINE_LOGGING_SUPPORT, DUK_CMDLINE_MODULE_SUPPORT modify Duktape object (doesn't work with ROM built-ins)
@ -35,15 +33,20 @@ make duk dukd # XXX: currently fails to start, DUK_CMDLINE_LOGGING_SUPPORT, DUK
# This would ideally be done directly using genconfig.py without
# --support-feature-options by moving the options into a genconfig
# YAML config file.
$PYTHON config/genconfig.py \
--metadata config \
--output dist/src/duk_config.h \
--option-file config/examples/low_memory.yaml \
rm -rf dist/src dist/src-noline dist/src-separate
$PYTHON dist/tools/prepare_sources.py \
--source-directory dist/src-input \
--output-directory dist \
--rom-support \
--rom-auto-lightfunc \
--user-builtin-metadata util/example_user_builtins1.yaml \
--user-builtin-metadata util/example_user_builtins2.yaml \
--config-metadata dist/config/genconfig_metadata.tar.gz \
--support-feature-options \
-DDUK_USE_ROM_STRINGS \
-DDUK_USE_ROM_OBJECTS \
-DDUK_USE_ROM_GLOBAL_INHERIT \
--support-feature-options \
autodetect-header
cp dist/src/duk_config.h dist/src-separate/
-DDUK_USE_ASSERTIONS \
-UDUK_USE_DEBUG
#gcc -std=c99 -Wall -Wextra -Os -Idist/src-separate/ -Idist/examples/cmdline dist/src-separate/*.c dist/examples/cmdline/duk_cmdline.c -o _duk -lm
make ajduk

6
util/example_user_builtins1.yaml

@ -8,9 +8,9 @@
#
# See examples below for details on how to use these.
#
# Note that genbuiltins.py (and make_dist.py) accepts multiple user built-in
# YAML files, so that you can manage your custom strings and objects in
# individual YAML files for modularity.
# Note that genbuiltins.py (and prepare_sources.py) accepts multiple user
# built-in YAML files, so that you can manage your custom strings and
# objects in individual YAML files for modularity.
#
# When using pointer compression, all ROM strings and objects need a number
# from the ROM pointer compression range (e.g. [0xf800,0xffff]). By default

102
util/fastint_reps.py

@ -3,68 +3,66 @@
# Print out a few IEEE double representations related to the Duktape fastint
# number model.
#
# NOTE: signed zero does not work correctly here.
#
import struct
import math
def isFastint(x):
if math.floor(x) == x and \
x >= -(2**47) and \
x < (2**47) and \
True: # FIXME: not neg zero
return True
return False
if math.floor(x) == x and \
x >= -(2**47) and \
x < (2**47) and \
(x != 0 or math.copysign(1.0, x) == 1.0):
return True
return False
def stringRep(x):
tmp = struct.pack('>d', x)
tmphex = tmp.encode('hex')
tmp = struct.pack('>d', x)
tmphex = tmp.encode('hex')
sgnexp = (ord(tmp[0]) << 8) + ord(tmp[1])
sgn = (sgnexp) >> 15
exp = (sgnexp & 0x7ff0) >> 4
manthex = tmphex[3:]
sgnexp = (ord(tmp[0]) << 8) + ord(tmp[1])
sgn = (sgnexp) >> 15
exp = (sgnexp & 0x7ff0) >> 4
manthex = tmphex[3:]
return '%s sgn=%d exp=%d sgnexp=%x manthex=%s' % (tmphex, sgn, exp, sgnexp, manthex)
return '%s sgn=%d exp=%d sgnexp=%x manthex=%s' % (tmphex, sgn, exp, sgnexp, manthex)
def main():
for i in [ -(2**47) - 1,
-(2**47),
-(2**47) + 1,
-(2**32) - 1,
-(2**32),
-(2**32) + 1,
-(long(0xdeadbeef)),
-9,
-8,
-8,
-7,
-6,
-5,
-4,
-3,
-2,
-1,
-0,
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
long(0xdeadbeef),
(2**32) - 1,
(2**32),
(2**32) + 1,
(2**47) - 1,
(2**47)
]:
print('%d %x (fastint=%s): %s' % (i, i, str(isFastint(i)), stringRep(i)))
for i in [ -(2**47) - 1,
-(2**47),
-(2**47) + 1,
-(2**32) - 1,
-(2**32),
-(2**32) + 1,
-(long(0xdeadbeef)),
-9,
-8,
-8,
-7,
-6,
-5,
-4,
-3,
-2,
-1,
-0.0, # must use float to get neg zero
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
long(0xdeadbeef),
(2**32) - 1,
(2**32),
(2**32) + 1,
(2**47) - 1,
(2**47)
]:
print('%f %x (fastint=%s): %s' % (float(i), int(i), str(isFastint(i)), stringRep(i)))
if __name__ == '__main__':
main()
main()

216
util/filter_test262_log.py

@ -6,114 +6,114 @@ import json
import yaml
def main():
with open(sys.argv[1], 'rb') as f:
known_issues = yaml.load(f.read())
skipstrings = [
'passed in strict mode',
'passed in non-strict mode',
'failed in strict mode as expected',
'failed in non-strict mode as expected'
]
in_failed_tests = False
tofix_count = 0 # count of bugs that will be fixed (no uncertainty about proper behavior etc)
known_errors = []
diagnosed_errors = []
unknown_errors = []
other_errors = []
for line in sys.stdin:
if len(line) > 1 and line[-1] == '\n':
line = line[:-1]
# Skip success cases
skip = False
for sk in skipstrings:
if sk in line:
skip = True
if skip:
continue
# Augment error list with "known bugs"
print(line) # print error list as is, then refined version later
if 'failed tests' in line.lower():
in_failed_tests = True
continue
if in_failed_tests and line.strip() == '':
in_failed_tests = False
continue
if in_failed_tests:
# " intl402/ch12/12.2/12.2.3_c in non-strict mode"
tmp = line.strip().split(' ')
test = tmp[0]
matched = False
for kn in known_issues:
if kn.get('test', None) != test:
continue
if kn.has_key('diagnosed'):
tofix_count += 1
diagnosed_errors.append(line + ' // diagnosed: ' + kn['diagnosed'])
elif kn.has_key('knownissue'):
# don't bump tofix_count, as testcase expected result is not certain
known_errors.append(line + ' // KNOWN: ' + kn['knownissue'])
else:
tofix_count += 1
unknown_errors.append(line + ' // ??? (rule matches)')
kn['used'] = True # mark rule used
matched = True
break
if matched:
continue
# no match, to fix
other_errors.append(line)
tofix_count += 1
print('')
print('=== CATEGORISED ERRORS ===')
print('')
for i in known_errors:
print(i)
for i in diagnosed_errors:
print(i)
for i in unknown_errors:
print(i)
for i in other_errors:
print(i)
# Check for unused rules (e.g. bugs fixed)
print('')
for kn in known_issues:
if not kn.has_key('used'):
print('WARNING: unused rule: ' + json.dumps(kn))
# Used by testclient
if len(unknown_errors) > 0 or len(other_errors) > 0:
print('TEST262 FAILED')
elif len(known_errors) > 0 or len(diagnosed_errors) > 0:
# Known and diagnosed errors don't indicate test failure
# as far as Github status is concerned.
print('TEST262 SUCCESS')
else:
print('TEST262 SUCCESS')
# To fix count
print('')
print('TO-FIX COUNT: ' + str(tofix_count))
print(' = test case failures which need fixing (Duktape bugs, uninvestigated)')
with open(sys.argv[1], 'rb') as f:
known_issues = yaml.load(f.read())
skipstrings = [
'passed in strict mode',
'passed in non-strict mode',
'failed in strict mode as expected',
'failed in non-strict mode as expected'
]
in_failed_tests = False
tofix_count = 0 # count of bugs that will be fixed (no uncertainty about proper behavior etc)
known_errors = []
diagnosed_errors = []
unknown_errors = []
other_errors = []
for line in sys.stdin:
if len(line) > 1 and line[-1] == '\n':
line = line[:-1]
# Skip success cases
skip = False
for sk in skipstrings:
if sk in line:
skip = True
if skip:
continue
# Augment error list with "known bugs"
print(line) # print error list as is, then refined version later
if 'failed tests' in line.lower():
in_failed_tests = True
continue
if in_failed_tests and line.strip() == '':
in_failed_tests = False
continue
if in_failed_tests:
# " intl402/ch12/12.2/12.2.3_c in non-strict mode"
tmp = line.strip().split(' ')
test = tmp[0]
matched = False
for kn in known_issues:
if kn.get('test', None) != test:
continue
if kn.has_key('diagnosed'):
tofix_count += 1
diagnosed_errors.append(line + ' // diagnosed: ' + kn['diagnosed'])
elif kn.has_key('knownissue'):
# don't bump tofix_count, as testcase expected result is not certain
known_errors.append(line + ' // KNOWN: ' + kn['knownissue'])
else:
tofix_count += 1
unknown_errors.append(line + ' // ??? (rule matches)')
kn['used'] = True # mark rule used
matched = True
break
if matched:
continue
# no match, to fix
other_errors.append(line)
tofix_count += 1
print('')
print('=== CATEGORISED ERRORS ===')
print('')
for i in known_errors:
print(i)
for i in diagnosed_errors:
print(i)
for i in unknown_errors:
print(i)
for i in other_errors:
print(i)
# Check for unused rules (e.g. bugs fixed)
print('')
for kn in known_issues:
if not kn.has_key('used'):
print('WARNING: unused rule: ' + json.dumps(kn))
# Used by testclient
if len(unknown_errors) > 0 or len(other_errors) > 0:
print('TEST262 FAILED')
elif len(known_errors) > 0 or len(diagnosed_errors) > 0:
# Known and diagnosed errors don't indicate test failure
# as far as Github status is concerned.
print('TEST262 SUCCESS')
else:
print('TEST262 SUCCESS')
# To fix count
print('')
print('TO-FIX COUNT: ' + str(tofix_count))
print(' = test case failures which need fixing (Duktape bugs, uninvestigated)')
if __name__ == '__main__':
main()
main()

76
util/find_func_calls.py

@ -24,57 +24,57 @@ re_func_call = re.compile(r'([A-Za-z_][A-Za-z0-9_]+)\(')
re_string = re.compile(r'"(\\"|[^"])*"')
def stripLineContinuations(x):
res = re.sub(re_linecont, ' ', x)
#print(res)
return res
res = re.sub(re_linecont, ' ', x)
#print(res)
return res
def stripComments(x):
res = re.sub(re_comment, '/*omit*/', x)
#print(res)
return res
res = re.sub(re_comment, '/*omit*/', x)
#print(res)
return res
def stripStrings(x):
res = re.sub(re_string, '"..."', x)
#print(res)
return res
res = re.sub(re_string, '"..."', x)
#print(res)
return res
def findFuncCalls(d, fn):
res = []
for line in d.split('\n'):
if len(line) >= 1 and line[0] == '#':
# Preprocessor lines contain function call like
# syntax but are not function calls.
continue
res = []
for line in d.split('\n'):
if len(line) >= 1 and line[0] == '#':
# Preprocessor lines contain function call like
# syntax but are not function calls.
continue
for m in re_func_call.finditer(line):
res.append({
'name': m.group(1),
'filename': fn
})
return res
for m in re_func_call.finditer(line):
res.append({
'name': m.group(1),
'filename': fn
})
return res
def main():
# Duktape code does not have a space between a function name and
# an open parenthesis. If the regexp includes an optional space,
# it will provide a lot of false matches.
# Duktape code does not have a space between a function name and
# an open parenthesis. If the regexp includes an optional space,
# it will provide a lot of false matches.
for fn in sys.argv[1:]:
f = open(fn, 'rb')
d = f.read()
f.close()
for fn in sys.argv[1:]:
f = open(fn, 'rb')
d = f.read()
f.close()
# Strip line continuations, comments, and strings so that
# we minimize false matches.
# Strip line continuations, comments, and strings so that
# we minimize false matches.
d = stripLineContinuations(d)
d = stripComments(d)
d = stripStrings(d)
d = stripLineContinuations(d)
d = stripComments(d)
d = stripStrings(d)
# Find function calls (close enough).
# Find function calls (close enough).
for i in findFuncCalls(d, fn):
#print '%s' % i['name']
print '%-25s%s' % (i['name'], i['filename'])
for i in findFuncCalls(d, fn):
#print '%s' % i['name']
print '%-25s%s' % (i['name'], i['filename'])
if __name__ == '__main__':
main()
main()

24
util/find_non_ascii.py

@ -8,18 +8,18 @@
import os, sys
def main():
f = open(sys.argv[1], 'rb')
data = f.read()
f.close()
f = open(sys.argv[1], 'rb')
data = f.read()
f.close()
for linenum, linedata in enumerate(data.split('\n')):
non_ascii = False
for i in xrange(len(linedata)):
x = ord(linedata[i])
if x >= 0x80:
print '%s: non-ascii data on line %d, char index %d, value %d (0x%02x)' % \
(sys.argv[1], linenum + 1, i + 1, x, x)
non_ascii = True
for linenum, linedata in enumerate(data.split('\n')):
non_ascii = False
for i in xrange(len(linedata)):
x = ord(linedata[i])
if x >= 0x80:
print '%s: non-ascii data on line %d, char index %d, value %d (0x%02x)' % \
(sys.argv[1], linenum + 1, i + 1, x, x)
non_ascii = True
if __name__ == '__main__':
main()
main()

64
util/fix_emscripten.py

@ -12,45 +12,45 @@ import sys
fix_count = 0
replacements = {
# RegExp fixes for non-compliant regexps (typically literal brace
# without a backslash escape). These fixes are no longer needed
# with Duktape 1.5.0 which adds support for parsing non-standard
# regexp curly braces.
#r"""if (/<?{ ?[^}]* ?}>?/.test(type)) return true""":
# r"""if (/<?\{ ?[^}]* ?\}>?/.test(type)) return true""",
#r"""var sourceRegex = /^function\s\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""":
# r"""var sourceRegex = /^function\s\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""",
#r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""":
# r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""",
#r"""/^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/""":
# r"""/^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/""",
# GH-11: Attempt to parse a function's toString() output with a RegExp.
# The RegExp assumes more of toString() output format than what is
# guaranteed by the specification, and won't parse Duktape 1.4.0 (and
# before) function toString() output ("function empty() {/* source code*/)}").
# No longer needed in Duktape 1.5.0 which changed the .toString() format.
#r"""var parsed = jsfunc.toString().match(sourceRegex).slice(1);""":
# r"""var parsed = (jsfunc.toString().match(sourceRegex) || []).slice(1);""",
#r"""jsfunc.toString().match(sourceRegex).slice(1);""":
# r"""(jsfunc.toString().match(sourceRegex) || []).slice(1);""",
# RegExp fixes for non-compliant regexps (typically literal brace
# without a backslash escape). These fixes are no longer needed
# with Duktape 1.5.0 which adds support for parsing non-standard
# regexp curly braces.
#r"""if (/<?{ ?[^}]* ?}>?/.test(type)) return true""":
# r"""if (/<?\{ ?[^}]* ?\}>?/.test(type)) return true""",
#r"""var sourceRegex = /^function\s\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""":
# r"""var sourceRegex = /^function\s\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""",
#r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;""":
# r"""var sourceRegex = /^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/;""",
#r"""/^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/""":
# r"""/^function\s*\(([^)]*)\)\s*\{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?\}$/""",
# GH-11: Attempt to parse a function's toString() output with a RegExp.
# The RegExp assumes more of toString() output format than what is
# guaranteed by the specification, and won't parse Duktape 1.4.0 (and
# before) function toString() output ("function empty() {/* source code*/)}").
# No longer needed in Duktape 1.5.0 which changed the .toString() format.
#r"""var parsed = jsfunc.toString().match(sourceRegex).slice(1);""":
# r"""var parsed = (jsfunc.toString().match(sourceRegex) || []).slice(1);""",
#r"""jsfunc.toString().match(sourceRegex).slice(1);""":
# r"""(jsfunc.toString().match(sourceRegex) || []).slice(1);""",
}
repl_keys = replacements.keys()
repl_keys.sort()
for line in sys.stdin:
if len(line) > 1 and line[-1] == '\n':
line = line[:-1]
if len(line) > 1 and line[-1] == '\n':
line = line[:-1]
for k in repl_keys:
line_fix = line.replace(k, replacements[k])
if line_fix != line:
fix_count += 1
line = line_fix
for k in repl_keys:
line_fix = line.replace(k, replacements[k])
if line_fix != line:
fix_count += 1
line = line_fix
print(line)
print(line)
if fix_count > 0:
sys.stderr.write('Emscripten fixes needed (fix_emscripten.py): fix_count=%d\n' % fix_count)
sys.stderr.flush()
sys.stderr.write('Emscripten fixes needed (fix_emscripten.py): fix_count=%d\n' % fix_count)
sys.stderr.flush()

78
util/format_perftest.py

@ -7,51 +7,51 @@ import sys
import re
def main():
# test-try-catch-throw.js : duk.O2.alt0 40.70 duk.O2.alt0f 40.74 duk.O2.alt1 40.10 duk.O2.alt1a 39.91 duk.O2.alt2 40.10 duk.O2.alt3 39.77 duk.O2.master 40.01 duk.O2.130 38.08
# test-try-catch-throw.js : duk.O2.alt0 40.70 duk.O2.alt0f 40.74 duk.O2.alt1 40.10 duk.O2.alt1a 39.91 duk.O2.alt2 40.10 duk.O2.alt3 39.77 duk.O2.master 40.01 duk.O2.130 38.08
re_line = re.compile(r'^(\S+)\s*:\s*(.*?)$')
re_part = re.compile(r'\S+')
first = True
re_line = re.compile(r'^(\S+)\s*:\s*(.*?)$')
re_part = re.compile(r'\S+')
first = True
with open(sys.argv[1], 'rb') as f_in, open(sys.argv[2], 'wb') as f_out:
f_out.write('<!DOCTYPE html>\n')
f_out.write('<html>\n')
f_out.write('<head>\n')
f_out.write("""\
with open(sys.argv[1], 'rb') as f_in, open(sys.argv[2], 'wb') as f_out:
f_out.write('<!DOCTYPE html>\n')
f_out.write('<html>\n')
f_out.write('<head>\n')
f_out.write("""\
<style>
th, td { margin: 0; padding: 6pt; text-align: right; }
tr:nth-child(odd) { background: #eeeeee; }
</style>
""")
f_out.write('</head>\n')
f_out.write('<body>\n')
f_out.write('<table>\n')
for line in f_in:
line = line.strip()
m = re_line.match(line)
if m is None:
continue
testname = m.group(1)
parts = re_part.findall(m.group(2))
if first:
first = False
f_out.write('<tr>')
f_out.write('<th></th>')
for idx in xrange(0, len(parts), 2):
f_out.write('<th>' + parts[idx] + '</th>')
f_out.write('</tr>\n')
f_out.write('<tr>')
f_out.write('<td>' + testname + '</td>')
for idx in xrange(1, len(parts), 2):
f_out.write('<td>' + parts[idx] + '</td>')
f_out.write('</tr>\n')
f_out.write('</table>\n')
f_out.write('</body>\n')
f_out.write('</html>\n')
f_out.write('</head>\n')
f_out.write('<body>\n')
f_out.write('<table>\n')
for line in f_in:
line = line.strip()
m = re_line.match(line)
if m is None:
continue
testname = m.group(1)
parts = re_part.findall(m.group(2))
if first:
first = False
f_out.write('<tr>')
f_out.write('<th></th>')
for idx in xrange(0, len(parts), 2):
f_out.write('<th>' + parts[idx] + '</th>')
f_out.write('</tr>\n')
f_out.write('<tr>')
f_out.write('<td>' + testname + '</td>')
for idx in xrange(1, len(parts), 2):
f_out.write('<td>' + parts[idx] + '</td>')
f_out.write('</tr>\n')
f_out.write('</table>\n')
f_out.write('</body>\n')
f_out.write('</html>\n')
if __name__ == '__main__':
main()
main()

40
src/gendoubleconsts.py → util/gendoubleconsts.py

@ -8,30 +8,30 @@ import struct
import mpmath
def create_double_constants_mpmath():
# Just a helper to use manually
# http://mpmath.googlecode.com/svn/trunk/doc/build/basics.html
mpmath.mp.prec = 1000 # 1000 bits
def printhex(name, x):
# to hex string, ready for create_double()
hex = struct.pack('>d', float(str(x))).encode('hex')
flt = struct.unpack('>d', hex.decode('hex'))[0]
print '%-11s -> %s (= %.20f)' % (name, hex, flt)
printhex('DBL_E', mpmath.mpf(mpmath.e))
printhex('DBL_LN10', mpmath.log(10))
printhex('DBL_LN2', mpmath.log(2))
printhex('DBL_LOG2E', mpmath.log(mpmath.e) / mpmath.log(2))
printhex('DBL_LOG10E', mpmath.log(mpmath.e) / mpmath.log(10))
printhex('DBL_PI', mpmath.mpf(mpmath.pi))
printhex('DBL_SQRT1_2', mpmath.mpf(1) / mpmath.sqrt(2))
printhex('DBL_SQRT2', mpmath.sqrt(2))
# Just a helper to use manually
# http://mpmath.googlecode.com/svn/trunk/doc/build/basics.html
mpmath.mp.prec = 1000 # 1000 bits
def printhex(name, x):
# to hex string, ready for create_double()
hex = struct.pack('>d', float(str(x))).encode('hex')
flt = struct.unpack('>d', hex.decode('hex'))[0]
print '%-11s -> %s (= %.20f)' % (name, hex, flt)
printhex('DBL_E', mpmath.mpf(mpmath.e))
printhex('DBL_LN10', mpmath.log(10))
printhex('DBL_LN2', mpmath.log(2))
printhex('DBL_LOG2E', mpmath.log(mpmath.e) / mpmath.log(2))
printhex('DBL_LOG10E', mpmath.log(mpmath.e) / mpmath.log(10))
printhex('DBL_PI', mpmath.mpf(mpmath.pi))
printhex('DBL_SQRT1_2', mpmath.mpf(1) / mpmath.sqrt(2))
printhex('DBL_SQRT2', mpmath.sqrt(2))
create_double_constants_mpmath()
def create_double(x):
return struct.unpack('>d', x.decode('hex'))[0]
return struct.unpack('>d', x.decode('hex'))[0]
DBL_NAN = create_double('7ff8000000000000') # a NaN matching our "normalized NAN" definition (see duk_tval.h)
DBL_POSITIVE_INFINITY = create_double('7ff0000000000000') # positive infinity (unique)

57
util/genequivyear.py

@ -0,0 +1,57 @@
#!/usr/bin/env python2
#
# Generate equivalent year table needed by duk_bi_date.c. Based on:
#
# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146
#
import datetime
import pytz
def isleapyear(year):
if (year % 4) != 0:
return False
if (year % 100) != 0:
return True
if (year % 400) != 0:
return False
return True
def eqyear(weekday, isleap):
# weekday: 0=Sunday, 1=Monday, ...
if isleap:
recent_year = 1956
else:
recent_year = 1967
recent_year += (weekday * 12) % 28
year = 2008 + (recent_year + 3 * 28 - 2008) % 28
# some assertions
#
# Note that Ecmascript internal weekday (0=Sunday) matches neither
# Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday).
# Python isoweekday() % 7 matches the Ecmascript weekday.
# https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday
dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC
#print(weekday, isleap, year, dt.isoweekday(), isleapyear(year))
#print(repr(dt))
#print(dt.isoformat())
if isleap != isleapyear(year):
raise Exception('internal error: equivalent year does not have same leap-year-ness')
pass
if weekday != dt.isoweekday() % 7:
raise Exception('internal error: equivalent year does not begin with the same weekday')
pass
return year
def main():
for i in xrange(14):
print(eqyear(i % 7, i >= 7))
if __name__ == '__main__':
main()

164
util/genexesizereport.py

@ -0,0 +1,164 @@
#!/usr/bin/env python2
#
# Generate a size report from a Duktape library / executable.
# Write out useful information about function sizes in a variety
# of forms.
#
import os
import sys
import re
import subprocess
#000000000040d200 <duk_to_hstring>:
# 40d200: 55 push %rbp
# 40d201: 89 f5 mov %esi,%ebp
re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$')
re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$')
def objdump(filename):
proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE)
curr_func = None
func_start = None
func_end = None
ret = {}
def storeFunc():
if curr_func is None or func_start is None or func_end is None:
return
ret[curr_func] = {
'name': curr_func,
'start': func_start,
'end': func_end, # exclusive
'length': func_end - func_start
}
for line in proc.stdout:
line = line.strip()
m = re_funcstart.match(line)
if m is not None:
if curr_func is not None:
storeFunc()
curr_func = m.group(1)
func_start = None
func_end = None
m = re_codeline.match(line)
if m is not None:
func_addr = long(m.group(1), 16)
func_bytes = m.group(2)
func_nbytes = len(func_bytes.split(' '))
func_instr = m.group(3)
if func_start is None:
func_start = func_addr
func_end = func_addr + func_nbytes
storeFunc()
return ret
def filterFuncs(funcs):
todo = [] # avoid mutation while iterating
def accept(fun):
n = fun['name']
if n in [ '.comment',
'.dynstr',
'.dynsym',
'.eh_frame_hdr',
'.interp',
'.rela.dyn',
'.rela.plt',
'_DYNAMIC',
'_GLOBAL_OFFSET_TABLE_',
'_IO_stdin_used',
'__CTOR_LIST__',
'__DTOR_LIST__',
'_fini',
'_init',
'_start',
'' ]:
return False
for pfx in [ '.debug', '.gnu', '.note',
'__FRAME_', '__' ]:
if n.startswith(pfx):
return False
return True
for k in funcs.keys():
if not accept(funcs[k]):
todo.append(k)
for k in todo:
del funcs[k]
def main():
funcs = objdump(sys.argv[1])
filterFuncs(funcs)
funcs_keys = funcs.keys()
funcs_keys.sort()
combined_size_all = 0
combined_size_duk = 0
for k in funcs_keys:
fun = funcs[k]
combined_size_all += fun['length']
if fun['name'].startswith('duk_'):
combined_size_duk += fun['length']
f = sys.stdout
f.write('<html>')
f.write('<head>')
f.write('<title>Size dump for %s</title>' % sys.argv[1])
f.write("""\
<style type="text/css">
tr:nth-child(2n) {
background: #eeeeee;
}
tr:nth-child(2n+1) {
background: #dddddd;
}
</style>
""")
f.write('</head>')
f.write('<body>')
f.write('<h1>Summary</h1>')
f.write('<table>')
f.write('<tr><td>Entries</td><td>%d</td></tr>' % len(funcs_keys))
f.write('<tr><td>Combined size (all)</td><td>%d</td></tr>' % combined_size_all)
f.write('<tr><td>Combined size (duk_*)</td><td>%d</td></tr>' % combined_size_duk)
f.write('</table>')
f.write('<h1>Sorted by function name</h1>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Bytes</th></tr>')
funcs_keys = funcs.keys()
funcs_keys.sort()
for k in funcs_keys:
fun = funcs[k]
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length']))
f.write('</table>')
f.write('<h1>Sorted by size</h1>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Bytes</th></tr>')
funcs_keys = funcs.keys()
def cmpSize(a,b):
return cmp(funcs[a]['length'], funcs[b]['length'])
funcs_keys.sort(cmp=cmpSize)
for k in funcs_keys:
fun = funcs[k]
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length']))
f.write('</table>')
f.write('</body>')
f.write('</html>')
if __name__ == '__main__':
main()

124
util/genhashsizes.py

@ -0,0 +1,124 @@
#!/usr/bin/env python2
#
# Find a sequence of duk_hobject hash sizes which have a desired 'ratio'
# and are primes. Prime hash sizes ensure that all probe sequence values
# (less than hash size) are relatively prime to hash size, i.e. cover the
# entire hash. Prime data is packed into about 1 byte/prime using a
# prediction-correction model.
#
# Also generates a set of probe steps which are relatively prime to every
# hash size.
import sys
import math
def is_prime(n):
if n == 0:
return False
if n == 1 or n == 2:
return True
n_limit = int(math.ceil(float(n) ** 0.5)) + 1
n_limit += 100 # paranoia
if n_limit >= n:
n_limit = n - 1
for i in xrange(2,n_limit + 1):
if (n % i) == 0:
return False
return True
def next_prime(n):
while True:
n += 1
if is_prime(n):
return n
def generate_sizes(min_size, max_size, step_ratio):
"Generate a set of hash sizes following a nice ratio."
sizes = []
ratios = []
curr = next_prime(min_size)
next = curr
sizes.append(curr)
step_ratio = float(step_ratio) / 1024
while True:
if next > max_size:
break
ratio = float(next) / float(curr)
if ratio < step_ratio:
next = next_prime(next)
continue
sys.stdout.write('.'); sys.stdout.flush()
sizes.append(next)
ratios.append(ratio)
curr = next
next = next_prime(int(next * step_ratio))
sys.stdout.write('\n'); sys.stdout.flush()
return sizes, ratios
def generate_corrections(sizes, step_ratio):
"Generate a set of correction from a ratio-based predictor."
# Generate a correction list for size list, assuming steps follow a certain
# ratio; this allows us to pack size list into one byte per size
res = []
res.append(sizes[0]) # first entry is first size
for i in xrange(1, len(sizes)):
prev = sizes[i - 1]
pred = int(prev * step_ratio) >> 10
diff = int(sizes[i] - pred)
res.append(diff)
if diff < 0 or diff > 127:
raise Exception('correction does not fit into 8 bits')
res.append(-1) # negative denotes last end of list
return res
def generate_probes(count, sizes):
res = []
# Generate probe values which are guaranteed to be relatively prime to
# all generated hash size primes. These don't have to be primes, but
# we currently use smallest non-conflicting primes here.
i = 2
while len(res) < count:
if is_prime(i) and (i not in sizes):
if i > 255:
raise Exception('probe step does not fit into 8 bits')
res.append(i)
i += 1
continue
i += 1
return res
# NB: these must match duk_hobject defines and code
step_ratio = 1177 # approximately (1.15 * (1 << 10))
min_size = 16
max_size = 2**32 - 1
sizes, ratios = generate_sizes(min_size, max_size, step_ratio)
corrections = generate_corrections(sizes, step_ratio)
probes = generate_probes(32, sizes)
print len(sizes)
print 'SIZES: ' + repr(sizes)
print 'RATIOS: ' + repr(ratios)
print 'CORRECTIONS: ' + repr(corrections)
print 'PROBES: ' + repr(probes)
# highest 32-bit prime
i = 2**32
while True:
i -= 1
if is_prime(i):
print 'highest 32-bit prime is: %d (0x%08x)' % (i, i)
break

24
src/gennumdigits.py → util/gennumdigits.py

@ -24,21 +24,21 @@ digits_table = []
limits_table = []
for radix in xrange(2, 36+1):
bits_per_digit = math.log(radix, 2)
bits_per_digit = math.log(radix, 2)
if radix == 10:
prec_digits = 20
else:
target_bits = math.ceil(math.log(10, 2) * 20) + 2 # +2 is extra, just in case
prec_digits = int(math.ceil(target_bits / bits_per_digit))
digits_table.append(prec_digits)
if radix == 10:
prec_digits = 20
else:
target_bits = math.ceil(math.log(10, 2) * 20) + 2 # +2 is extra, just in case
prec_digits = int(math.ceil(target_bits / bits_per_digit))
digits_table.append(prec_digits)
# these are conservative (details are off by one etc); +/- 2 is the extra
overflow_limit = int(math.ceil(1024.0 / bits_per_digit)) + 2 - prec_digits
underflow_limit = int(math.floor((-1024.0 - 52.0) / bits_per_digit)) - 2 - prec_digits
# these are conservative (details are off by one etc); +/- 2 is the extra
overflow_limit = int(math.ceil(1024.0 / bits_per_digit)) + 2 - prec_digits
underflow_limit = int(math.floor((-1024.0 - 52.0) / bits_per_digit)) - 2 - prec_digits
limits_table.append(overflow_limit)
limits_table.append(underflow_limit)
limits_table.append(overflow_limit)
limits_table.append(underflow_limit)
print repr(digits_table)
print repr(limits_table)

73
util/genobjsizereport.py

@ -0,0 +1,73 @@
#!/usr/bin/env python2
#
# Size report of (stripped) object and source files.
#
import os
import sys
def getsize(fname):
return os.stat(fname).st_size
def getlines(fname):
f = None
try:
f = open(fname, 'rb')
lines = f.read().split('\n')
return len(lines)
finally:
if f is not None:
f.close()
f = None
def process(srcfile, objfile):
srcsize = getsize(srcfile)
srclines = getlines(srcfile)
srcbpl = float(srcsize) / float(srclines)
objsize = getsize(objfile)
objbpl = float(objsize) / float(srclines)
return objsize, objbpl, srcsize, srclines, srcbpl
def main():
tot_srcsize = 0
tot_srclines = 0
tot_objsize = 0
tmp = []
for i in sys.argv[1:]:
objfile = i
if i.endswith('.strip'):
objname = i[:-6]
else:
objname = i
base, ext = os.path.splitext(objname)
srcfile = base + '.c'
objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile)
srcbase = os.path.basename(srcfile)
objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o
tot_srcsize += srcsize
tot_srclines += srclines
tot_objsize += objsize
tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl))
def mycmp(a,b):
return cmp(a[5], b[5])
tmp.sort(cmp=mycmp, reverse=True) # sort by object size
fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f'
for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp:
print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl))
print('========================================================================')
print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines),
'', tot_objsize, float(tot_objsize) / float(tot_srclines)))
if __name__ == '__main__':
# Usage:
#
# $ strip *.o
# $ python genobjsizereport.py *.o
main()

5
util/json2yaml.py

@ -1,5 +0,0 @@
import os, sys, json, yaml
if __name__ == '__main__':
# Use safe_dump() instead of dump() to avoid tags like "!!python/unicode"
print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False))

8
util/make_ascii.py

@ -7,7 +7,7 @@ import os, sys
inp = sys.stdin.read().decode('utf-8')
for c in inp:
if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'):
sys.stdout.write(c)
else:
sys.stdout.write('\\u%04x' % ord(c))
if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'):
sys.stdout.write(c)
else:
sys.stdout.write('\\u%04x' % ord(c))

1316
util/make_dist.py

File diff suppressed because it is too large

933
util/matrix_compile.py

@ -25,421 +25,420 @@ import StringIO # no need for cStringIO
# Select one: Select([ 1, 2, 3 ]) -> [ 1 ], [ 2 ], [ 3 ]
class Select:
val = None
val = None
def __init__(self, val):
self.val = val
def __init__(self, val):
self.val = val
# Combine: Combine([ 1, 2 ], 'foo') -> [ 1 'foo' ], [ 2 'foo' ]
class Combine:
val = None
val = None
def __init__(self, val):
self.val = val
def __init__(self, val):
self.val = val
# Subset: Subset([ 'foo', 'bar' ]) -> Combine([ [ '', 'foo' ], [ '', 'bar' ] ])
# -> [ '' '' ], [ 'foo' '' ], [ '' 'bar' ], [ 'foo' 'bar' ]
class Subset:
val = None
val = None
def __init__(self, val):
self.val = val
def __init__(self, val):
self.val = val
# Sequence: Sequence([ 'foo', 'bar', 'quux' ]) -> [ 'foo', 'bar', 'quux' ]
# Plain list is also interpreted as a Sequence.
class Sequence:
val = None
val = None
def __init__(self, val):
self.val = val
def __init__(self, val):
self.val = val
# Prepare a combination lookup structure.
def prepcomb(val):
if isinstance(val, (str, unicode)):
return { 'size': 1, 'value': val, 'type': 'terminal' }
if isinstance(val, Sequence):
return { 'size': 1, 'value': val.val, 'type': 'sequence' }
if isinstance(val, list):
# interpret as Sequence
return { 'size': 1, 'value': val, 'type': 'sequence' }
if isinstance(val, Select):
nodes = []
size = 0
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size += node['size']
return { 'size': size, 'value': nodes, 'type': 'select' }
if isinstance(val, Combine):
nodes = []
size = 1
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size *= node['size']
return { 'size': size, 'value': nodes, 'type': 'combine' }
if isinstance(val, Subset):
nodes = []
size = 1
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size *= (node['size'] + 1) # value or not present
return { 'size': size, 'value': nodes, 'type': 'subset' }
raise Exception('invalid argument')
if isinstance(val, (str, unicode)):
return { 'size': 1, 'value': val, 'type': 'terminal' }
if isinstance(val, Sequence):
return { 'size': 1, 'value': val.val, 'type': 'sequence' }
if isinstance(val, list):
# interpret as Sequence
return { 'size': 1, 'value': val, 'type': 'sequence' }
if isinstance(val, Select):
nodes = []
size = 0
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size += node['size']
return { 'size': size, 'value': nodes, 'type': 'select' }
if isinstance(val, Combine):
nodes = []
size = 1
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size *= node['size']
return { 'size': size, 'value': nodes, 'type': 'combine' }
if isinstance(val, Subset):
nodes = []
size = 1
for i in val.val:
node = prepcomb(i)
nodes.append(node)
size *= (node['size'] + 1) # value or not present
return { 'size': size, 'value': nodes, 'type': 'subset' }
raise Exception('invalid argument')
# Return number of combinations for input lists.
def countcombinations(prepped):
return prepped['size']
return prepped['size']
# Return a combination for index, for index in [0,countcombinations(lists)[.
# This allows random selection of combinations using a PRNG.
def getcomb(prepped, index):
if prepped['type'] == 'terminal':
return [ prepped['value'] ], index
if prepped['type'] == 'sequence':
return prepped['value'], index
if prepped['type'] == 'select':
idx = index % prepped['size']
index = index / prepped['size']
for i in prepped['value']:
if idx >= i['size']:
idx -= i['size']
continue
ret, ign_index = getcomb(i, idx)
return ret, index
raise Exception('should not be here')
if prepped['type'] == 'combine':
ret = []
for i in prepped['value']:
idx = index % i['size']
index = index / i['size']
tmp, tmp_index = getcomb(i, idx)
ret.append(tmp)
return ret, index
if prepped['type'] == 'subset':
ret = []
for i in prepped['value']:
idx = index % (i['size'] + 1)
index = index / (i['size'] + 1)
if idx == 0:
# no value
ret.append('')
else:
tmp, tmp_index = getcomb(i, idx - 1)
ret.append(tmp)
return ret, index
raise Exception('invalid prepped value')
if prepped['type'] == 'terminal':
return [ prepped['value'] ], index
if prepped['type'] == 'sequence':
return prepped['value'], index
if prepped['type'] == 'select':
idx = index % prepped['size']
index = index / prepped['size']
for i in prepped['value']:
if idx >= i['size']:
idx -= i['size']
continue
ret, ign_index = getcomb(i, idx)
return ret, index
raise Exception('should not be here')
if prepped['type'] == 'combine':
ret = []
for i in prepped['value']:
idx = index % i['size']
index = index / i['size']
tmp, tmp_index = getcomb(i, idx)
ret.append(tmp)
return ret, index
if prepped['type'] == 'subset':
ret = []
for i in prepped['value']:
idx = index % (i['size'] + 1)
index = index / (i['size'] + 1)
if idx == 0:
# no value
ret.append('')
else:
tmp, tmp_index = getcomb(i, idx - 1)
ret.append(tmp)
return ret, index
raise Exception('invalid prepped value')
def flatten(v):
if isinstance(v, (str, unicode)):
return [ v ]
if isinstance(v, list):
ret = []
for i in v:
ret += flatten(i)
return ret
raise Exception('invalid value: %s' % repr(v))
if isinstance(v, (str, unicode)):
return [ v ]
if isinstance(v, list):
ret = []
for i in v:
ret += flatten(i)
return ret
raise Exception('invalid value: %s' % repr(v))
def getcombination(val, index):
res, res_index = getcomb(val, index)
if res_index != 0:
sys.stderr.write('WARNING: index not consumed entirely, invalid index? (input index %d, output index %d)\n' % (index, res_index))
res, res_index = getcomb(val, index)
if res_index != 0:
sys.stderr.write('WARNING: index not consumed entirely, invalid index? (input index %d, output index %d)\n' % (index, res_index))
return res
return res
# Generate all combinations.
def getcombinations(val):
res = []
for i in xrange(countcombinations(val)):
res.append(getcombination(val, i))
return res
res = []
for i in xrange(countcombinations(val)):
res.append(getcombination(val, i))
return res
#
# Test matrix
#
def create_matrix(fn_duk):
# A lot of compiler versions are used, must install at least:
#
# gcc-4.6
# gcc-4.7
# gcc-4.8
# gcc-4.6-multilib
# g++-4.6-multilib
# gcc-4.7-multilib
# g++-4.7-multilib
# gcc-4.8-multilib
# g++-4.8-multilib
# gcc-multilib
# g++-multilib
# llvm-gcc-4.6
# llvm-gcc-4.7
# llvm-3.4
# clang
#
# The set of compilers tested is distribution specific and not ery
# stable, so you may need to edit the compilers manually.
gcc_cmd_dialect_options = Select([
# Some dialects and architectures are only available for newer g++ versions
Combine([
# -m32 with older llvm causes self test failure (double union)
Select([ 'llvm-gcc' ]),
Select([ '-m64' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
Combine([
Select([ 'gcc', 'gcc-4.6' ]),
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
Combine([
Select([ 'gcc-4.7', 'gcc-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
])
gxx_cmd_dialect_options = Select([
# Some dialects and architectures are only available for newer g++ versions
Combine([
Select([ 'llvm-g++' ]),
Select([ '-m64' ]),
Select([
'',
'-std=c++98',
[ '-std=c++11', '-pedantic' ]
])
]),
Combine([
Select([ 'g++', 'g++-4.6' ]),
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c++98',
])
]),
Combine([
Select([ 'g++-4.7', 'g++-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'',
'-std=c++98',
[ '-std=c++11', '-pedantic' ]
])
]),
Combine([
Select([ 'g++', 'g++-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'-std=c++1y',
'-std=gnu++1y'
])
]),
])
gcc_gxx_debug_options = Select([
'',
[ '-g', '-ggdb' ]
])
gcc_gxx_warning_options = Select([
'',
#'-Wall',
[ '-Wall', '-Wextra' ]
#XXX: -Wfloat-equal
# [ '-Wall', '-Wextra', '-Werror' ]
])
gcc_gxx_optimization_options = Select([
'-O0',
'-O1',
'-O2',
# -O3 and -O4 produces spurious warnings on gcc 4.8.1, e.g. "error: assuming signed overflow does not occur when assuming that (X - c) > X is always false [-Werror=strict-overflow]"
# Not sure what causes these, but perhaps GCC converts signed comparisons into subtractions and then runs into: https://gcc.gnu.org/wiki/FAQ#signed_overflow
[ '-O3', '-fno-strict-overflow' ],
#'-O3'
[ '-O4', '-fno-strict-overflow' ],
#'-O4'
'-Os'
])
clang_cmd_dialect_options = Select([
Combine([
'clang',
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
])
])
clang_debug_options = Select([
'',
[ '-g', '-ggdb' ]
])
clang_warning_options = Select([
'',
[ '-Wall', '-Wextra' ],
[ '-Wall', '-Wextra', '-Wcast-align' ]
#XXX: -Wfloat-equal
#[ '-Wall', '-Wextra', '-Werror' ]
])
clang_optimization_options = Select([
'-O0',
'-O1',
'-O2',
'-O3',
#'-O4',
'-Os'
])
# Feature options in suitable chunks that can be subsetted arbitrarily.
duktape_options = Subset([
Select([ '-DDUK_OPT_NO_REFERENCE_COUNTING',
'-DDUK_OPT_NO_MARK_AND_SWEEP',
'-DDUK_OPT_GC_TORTURE' ]),
'-DDUK_OPT_SHUFFLE_TORTURE',
'-DDUK_OPT_NO_VOLUNTARY_GC',
'-DDUK_OPT_NO_PACKED_TVAL',
Select([ '', '-DDUK_OPT_FORCE_ALIGN=4', '-DDUK_OPT_FORCE_ALIGN=8' ]),
'-DDUK_OPT_NO_TRACEBACKS',
'-DDUK_OPT_NO_VERBOSE_ERRORS',
'-DDUK_OPT_PARANOID_ERRORS',
'-DDUK_OPT_NO_MS_RESIZE_STRINGTABLE',
'-DDUK_OPT_NO_STRICT_DECL',
'-DDUK_OPT_NO_REGEXP_SUPPORT',
'-DDUK_OPT_NO_ES6_REGEXP_SYNTAX',
'-DDUK_OPT_NO_OCTAL_SUPPORT',
'-DDUK_OPT_NO_SOURCE_NONBMP',
'-DDUK_OPT_STRICT_UTF8_SOURCE',
#'-DDUK_OPT_NO_FILE_IO', # FIXME: no print()
'-DDUK_OPT_NO_SECTION_B',
'-DDUK_OPT_NO_JX',
'-DDUK_OPT_NO_JC',
'-DDUK_OPT_NO_NONSTD_ACCESSOR_KEY_ARGUMENT',
'-DDUK_OPT_NO_NONSTD_FUNC_STMT',
'-DDUK_OPT_NONSTD_FUNC_CALLER_PROPERTY',
'-DDUK_OPT_NONSTD_FUNC_SOURCE_PROPERTY',
'-DDUK_OPT_NO_NONSTD_ARRAY_SPLICE_DELCOUNT',
'-DDUK_OPT_NO_NONSTD_ARRAY_CONCAT_TRAILER',
'-DDUK_OPT_NO_NONSTD_ARRAY_MAP_TRAILER',
'-DDUK_OPT_NO_NONSTD_JSON_ESC_U2028_U2029',
'-DDUK_OPT_NO_BYTECODE_DUMP_SUPPORT',
'-DDUK_OPT_NO_ES6_OBJECT_PROTO_PROPERTY',
'-DDUK_OPT_NO_ES6_OBJECT_SETPROTOTYPEOF',
'-DDUK_OPT_NO_ES6_PROXY',
'-DDUK_OPT_NO_ZERO_BUFFER_DATA',
'-DDUK_OPT_LIGHTFUNC_BUILTINS',
'-DDUK_OPT_ASSERTIONS',
[ '-DDUK_OPT_DEBUG', '-DDUK_OPT_DEBUG_WRITE(level,file,line,func,msg)=do {fprintf(stderr, "%ld %s %ld %s %s\\n", (long) (level), (file), (long) (line), (func), (msg));} while(0)', '-DDUK_OPT_DPRINT', '-DDUK_OPT_DDDPRINT' ],
'-DDUK_OPT_SELF_TESTS',
[ '-DDUK_OPT_STRTAB_CHAIN', '-DDUK_OPT_STRTAB_CHAIN_SIZE=64' ],
# DUK_OPT_DEBUGGER_SUPPORT depends on having pc2line and
# interrupt counter, so avoid invalid combinations.
Select([
Subset([ '-DDUK_OPT_NO_PC2LINE', '-DDUK_OPT_INTERRUPT_COUNTER' ]),
[ '-DDUK_OPT_DEBUGGER_SUPPORT', '-DDUK_OPT_INTERRUPT_COUNTER' ]
]),
'-DDUK_OPT_DEBUGGER_FWD_LOGGING',
'-DDUK_OPT_DEBUGGER_DUMPHEAP',
'-DDUK_OPT_DEBUGGER_INSPECT',
'-DDUK_OPT_NO_DEBUGGER_THROW_NOTIFY',
'-DDUK_OPT_DEBUGGER_PAUSE_UNCAUGHT',
'-DDUK_OPT_JSON_STRINGIFY_FASTPATH'
# XXX: 16-bit options
])
# FIXME: DUK_USE_LEXER_SLIDING_WINDOW
# The final command is compiler specific because e.g. include path
# and link option syntax could (in principle) differ between compilers.
gcc_cmd_matrix = Combine([
gcc_cmd_dialect_options,
gcc_gxx_debug_options,
gcc_gxx_warning_options,
gcc_gxx_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
gxx_cmd_matrix = Combine([
gxx_cmd_dialect_options,
gcc_gxx_debug_options,
gcc_gxx_warning_options,
gcc_gxx_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
clang_cmd_matrix = Combine([
clang_cmd_dialect_options,
clang_debug_options,
clang_warning_options,
clang_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
matrix = Select([ gcc_cmd_matrix, gxx_cmd_matrix, clang_cmd_matrix ])
return matrix
# A lot of compiler versions are used, must install at least:
#
# gcc-4.6
# gcc-4.7
# gcc-4.8
# gcc-4.6-multilib
# g++-4.6-multilib
# gcc-4.7-multilib
# g++-4.7-multilib
# gcc-4.8-multilib
# g++-4.8-multilib
# gcc-multilib
# g++-multilib
# llvm-gcc-4.6
# llvm-gcc-4.7
# llvm-3.4
# clang
#
# The set of compilers tested is distribution specific and not ery
# stable, so you may need to edit the compilers manually.
gcc_cmd_dialect_options = Select([
# Some dialects and architectures are only available for newer g++ versions
Combine([
# -m32 with older llvm causes self test failure (double union)
Select([ 'llvm-gcc' ]),
Select([ '-m64' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
Combine([
Select([ 'gcc', 'gcc-4.6' ]),
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
Combine([
Select([ 'gcc-4.7', 'gcc-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
]),
])
gxx_cmd_dialect_options = Select([
# Some dialects and architectures are only available for newer g++ versions
Combine([
Select([ 'llvm-g++' ]),
Select([ '-m64' ]),
Select([
'',
'-std=c++98',
[ '-std=c++11', '-pedantic' ]
])
]),
Combine([
Select([ 'g++', 'g++-4.6' ]),
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c++98',
])
]),
Combine([
Select([ 'g++-4.7', 'g++-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'',
'-std=c++98',
[ '-std=c++11', '-pedantic' ]
])
]),
Combine([
Select([ 'g++', 'g++-4.8' ]),
Select([ '-m64', '-m32', '-mx32' ]),
Select([
'-std=c++1y',
'-std=gnu++1y'
])
]),
])
gcc_gxx_debug_options = Select([
'',
[ '-g', '-ggdb' ]
])
gcc_gxx_warning_options = Select([
'',
#'-Wall',
[ '-Wall', '-Wextra' ]
#XXX: -Wfloat-equal
# [ '-Wall', '-Wextra', '-Werror' ]
])
gcc_gxx_optimization_options = Select([
'-O0',
'-O1',
'-O2',
# -O3 and -O4 produces spurious warnings on gcc 4.8.1, e.g. "error: assuming signed overflow does not occur when assuming that (X - c) > X is always false [-Werror=strict-overflow]"
# Not sure what causes these, but perhaps GCC converts signed comparisons into subtractions and then runs into: https://gcc.gnu.org/wiki/FAQ#signed_overflow
[ '-O3', '-fno-strict-overflow' ],
#'-O3'
[ '-O4', '-fno-strict-overflow' ],
#'-O4'
'-Os'
])
clang_cmd_dialect_options = Select([
Combine([
'clang',
Select([ '-m64', '-m32' ]),
Select([
'',
'-std=c89',
'-std=c99',
[ '-std=c99', '-pedantic' ]
])
])
])
clang_debug_options = Select([
'',
[ '-g', '-ggdb' ]
])
clang_warning_options = Select([
'',
[ '-Wall', '-Wextra' ],
[ '-Wall', '-Wextra', '-Wcast-align' ]
#XXX: -Wfloat-equal
#[ '-Wall', '-Wextra', '-Werror' ]
])
clang_optimization_options = Select([
'-O0',
'-O1',
'-O2',
'-O3',
#'-O4',
'-Os'
])
# Feature options in suitable chunks that can be subsetted arbitrarily.
duktape_options = Subset([
Select([ '-DDUK_OPT_NO_REFERENCE_COUNTING',
'-DDUK_OPT_NO_MARK_AND_SWEEP',
'-DDUK_OPT_GC_TORTURE' ]),
'-DDUK_OPT_SHUFFLE_TORTURE',
'-DDUK_OPT_NO_VOLUNTARY_GC',
'-DDUK_OPT_NO_PACKED_TVAL',
Select([ '', '-DDUK_OPT_FORCE_ALIGN=4', '-DDUK_OPT_FORCE_ALIGN=8' ]),
'-DDUK_OPT_NO_TRACEBACKS',
'-DDUK_OPT_NO_VERBOSE_ERRORS',
'-DDUK_OPT_PARANOID_ERRORS',
'-DDUK_OPT_NO_MS_RESIZE_STRINGTABLE',
'-DDUK_OPT_NO_STRICT_DECL',
'-DDUK_OPT_NO_REGEXP_SUPPORT',
'-DDUK_OPT_NO_ES6_REGEXP_SYNTAX',
'-DDUK_OPT_NO_OCTAL_SUPPORT',
'-DDUK_OPT_NO_SOURCE_NONBMP',
'-DDUK_OPT_STRICT_UTF8_SOURCE',
'-DDUK_OPT_NO_SECTION_B',
'-DDUK_OPT_NO_JX',
'-DDUK_OPT_NO_JC',
'-DDUK_OPT_NO_NONSTD_ACCESSOR_KEY_ARGUMENT',
'-DDUK_OPT_NO_NONSTD_FUNC_STMT',
'-DDUK_OPT_NONSTD_FUNC_CALLER_PROPERTY',
'-DDUK_OPT_NONSTD_FUNC_SOURCE_PROPERTY',
'-DDUK_OPT_NO_NONSTD_ARRAY_SPLICE_DELCOUNT',
'-DDUK_OPT_NO_NONSTD_ARRAY_CONCAT_TRAILER',
'-DDUK_OPT_NO_NONSTD_ARRAY_MAP_TRAILER',
'-DDUK_OPT_NO_NONSTD_JSON_ESC_U2028_U2029',
'-DDUK_OPT_NO_BYTECODE_DUMP_SUPPORT',
'-DDUK_OPT_NO_ES6_OBJECT_PROTO_PROPERTY',
'-DDUK_OPT_NO_ES6_OBJECT_SETPROTOTYPEOF',
'-DDUK_OPT_NO_ES6_PROXY',
'-DDUK_OPT_NO_ZERO_BUFFER_DATA',
'-DDUK_OPT_LIGHTFUNC_BUILTINS',
'-DDUK_OPT_ASSERTIONS',
[ '-DDUK_OPT_DEBUG', '-DDUK_OPT_DEBUG_WRITE(level,file,line,func,msg)=do {fprintf(stderr, "%ld %s %ld %s %s\\n", (long) (level), (file), (long) (line), (func), (msg));} while(0)', '-DDUK_OPT_DPRINT', '-DDUK_OPT_DDDPRINT' ],
'-DDUK_OPT_SELF_TESTS',
[ '-DDUK_OPT_STRTAB_CHAIN', '-DDUK_OPT_STRTAB_CHAIN_SIZE=64' ],
# DUK_OPT_DEBUGGER_SUPPORT depends on having pc2line and
# interrupt counter, so avoid invalid combinations.
Select([
Subset([ '-DDUK_OPT_NO_PC2LINE', '-DDUK_OPT_INTERRUPT_COUNTER' ]),
[ '-DDUK_OPT_DEBUGGER_SUPPORT', '-DDUK_OPT_INTERRUPT_COUNTER' ]
]),
'-DDUK_OPT_DEBUGGER_FWD_LOGGING',
'-DDUK_OPT_DEBUGGER_DUMPHEAP',
'-DDUK_OPT_DEBUGGER_INSPECT',
'-DDUK_OPT_NO_DEBUGGER_THROW_NOTIFY',
'-DDUK_OPT_DEBUGGER_PAUSE_UNCAUGHT',
'-DDUK_OPT_JSON_STRINGIFY_FASTPATH'
# XXX: 16-bit options
])
# XXX: DUK_USE_LEXER_SLIDING_WINDOW
# The final command is compiler specific because e.g. include path
# and link option syntax could (in principle) differ between compilers.
gcc_cmd_matrix = Combine([
gcc_cmd_dialect_options,
gcc_gxx_debug_options,
gcc_gxx_warning_options,
gcc_gxx_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
gxx_cmd_matrix = Combine([
gxx_cmd_dialect_options,
gcc_gxx_debug_options,
gcc_gxx_warning_options,
gcc_gxx_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
clang_cmd_matrix = Combine([
clang_cmd_dialect_options,
clang_debug_options,
clang_warning_options,
clang_optimization_options,
duktape_options,
[ '-DDUK_CMDLINE_PRINTALERT_SUPPORT', '-Isrc', '-Iextras/print-alert', 'src/duktape.c', 'extras/print-alert/duk_print_alert.c', 'examples/cmdline/duk_cmdline.c', '-o', fn_duk, '-lm' ]
])
matrix = Select([ gcc_cmd_matrix, gxx_cmd_matrix, clang_cmd_matrix ])
return matrix
#
# Main
#
def check_unlink(filename):
if os.path.exists(filename):
os.unlink(filename)
if os.path.exists(filename):
os.unlink(filename)
def main():
# XXX: add option for testcase(s) to run?
# XXX: add valgrind support, restrict to -m64 compilation?
# XXX: proper tempfile usage and cleanup
time_str = str(long(time.time() * 1000.0))
parser = optparse.OptionParser()
parser.add_option('--count', dest='count', default='1000')
parser.add_option('--seed', dest='seed', default='default_seed_' + time_str)
parser.add_option('--out-results-json', dest='out_results_json', default='/tmp/matrix_results%s.json' % time_str)
parser.add_option('--out-failed', dest='out_failed', default='/tmp/matrix_failed%s.txt' % time_str)
parser.add_option('--verbose', dest='verbose', default=False, action='store_true')
# XXX: add option for testcase(s) to run?
# XXX: add valgrind support, restrict to -m64 compilation?
# XXX: proper tempfile usage and cleanup
time_str = str(long(time.time() * 1000.0))
parser = optparse.OptionParser()
parser.add_option('--count', dest='count', default='1000')
parser.add_option('--seed', dest='seed', default='default_seed_' + time_str)
parser.add_option('--out-results-json', dest='out_results_json', default='/tmp/matrix_results%s.json' % time_str)
parser.add_option('--out-failed', dest='out_failed', default='/tmp/matrix_failed%s.txt' % time_str)
parser.add_option('--verbose', dest='verbose', default=False, action='store_true')
(opts, args) = parser.parse_args()
fn_testjs = '/tmp/test%s.js' % time_str
fn_duk = '/tmp/duk%s' % time_str
fn_testjs = '/tmp/test%s.js' % time_str
fn_duk = '/tmp/duk%s' % time_str
# Avoid any optional features (like JSON or RegExps) in the test.
# Don't make the test very long, as it executes very slowly when
# DUK_OPT_DDDPRINT and DUK_OPT_ASSERTIONS are enabled.
# Avoid any optional features (like JSON or RegExps) in the test.
# Don't make the test very long, as it executes very slowly when
# DUK_OPT_DDDPRINT and DUK_OPT_ASSERTIONS are enabled.
f = open(fn_testjs, 'wb')
f.write('''
f = open(fn_testjs, 'wb')
f.write('''
// Fibonacci using try-catch, exercises setjmp/longjmp a lot
function fibthrow(n) {
var f1, f2;
@ -455,109 +454,109 @@ print(Math.PI); // tests constant endianness
print(JSON.stringify({ foo: 'bar' }));
try { fibthrow(9); } catch (e) { print(e); }
''')
f.close()
expect = 'Hello world\n3\n3.141592653589793\n{"foo":"bar"}\n34\n'
print('Using seed: ' + repr(opts.seed))
random.seed(opts.seed)
matrix = create_matrix(fn_duk)
prepped = prepcomb(matrix)
# print(json.dumps(prepped, indent=4))
# print(json.dumps(getcombinations(prepped), indent=4))
numcombinations = countcombinations(prepped)
# The number of combinations is large so do (pseudo) random
# testing over the matrix. Ideally we'd avoid re-testing the
# same combination twice, but with the matrix space in billions
# this doesn't need to be checked.
res = []
failed = []
for i in xrange(long(opts.count)):
fail = False
idx = random.randrange(0, numcombinations)
cmd = getcombination(prepped, idx)
#cmd = getcombination(prepped, idx)
compile_command = flatten(cmd)
compile_command = [ elem for elem in compile_command if elem != '' ] # remove empty strings
print('%d/%d (combination %d, count %d)' % (i + 1, long(opts.count), idx, numcombinations))
#print('%d/%d (combination %d, count %d) %s' % (i + 1, long(opts.count), idx, numcombinations, repr(compile_command)))
if opts.verbose:
print(' '.join(compile_command))
check_unlink(fn_duk)
#print(repr(compile_command))
compile_p = subprocess.Popen(compile_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
compile_stdout, compile_stderr = compile_p.communicate()
compile_exitcode = compile_p.returncode
if compile_exitcode != 0:
fail = True
else:
if not os.path.exists(fn_duk):
print('*** WARNING: compile success but no %s ***' % fn_duk)
run_command = [ fn_duk, fn_testjs ]
if fail:
run_stdout = None
run_stderr = None
run_exitcode = 1
else:
run_p = subprocess.Popen(run_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
run_stdout, run_stderr = run_p.communicate()
run_exitcode = run_p.returncode
if run_exitcode != 0:
fail = True
if run_stdout != expect:
fail = True
if fail:
print('------------------------------------------------------------------------------')
print('*** FAILED: %s' % repr(compile_command))
print(' '.join(compile_command))
failed.append(' '.join(compile_command))
print('COMPILE STDOUT:')
print(compile_stdout)
print('COMPILE STDERR:')
print(compile_stderr)
print('RUN STDOUT:')
print(run_stdout)
print('RUN STDERR:')
print(run_stderr)
print('------------------------------------------------------------------------------')
res.append({
'compile_command': compile_command,
'compile_stdout': compile_stdout,
'compile_stderr': compile_stderr,
'compile_exitcode': compile_exitcode,
'run_command': run_command,
'run_stdout': run_stdout,
# Don't include debug output, it's huge with DUK_OPT_DDDPRINT
#'run_stderr': run_stderr,
'run_exitcode': run_exitcode,
'run_expect': expect,
'success': not fail
})
sys.stdout.flush()
sys.stderr.flush()
f = open(opts.out_results_json, 'wb')
f.write(json.dumps(res, indent=4, sort_keys=True))
f.close()
f = open(opts.out_failed, 'wb')
f.write('\n'.join(failed) + '\n')
f.close()
check_unlink(fn_duk)
check_unlink(fn_testjs)
# XXX: summary of success/failure/warnings (= stderr got anything)
f.close()
expect = 'Hello world\n3\n3.141592653589793\n{"foo":"bar"}\n34\n'
print('Using seed: ' + repr(opts.seed))
random.seed(opts.seed)
matrix = create_matrix(fn_duk)
prepped = prepcomb(matrix)
# print(json.dumps(prepped, indent=4))
# print(json.dumps(getcombinations(prepped), indent=4))
numcombinations = countcombinations(prepped)
# The number of combinations is large so do (pseudo) random
# testing over the matrix. Ideally we'd avoid re-testing the
# same combination twice, but with the matrix space in billions
# this doesn't need to be checked.
res = []
failed = []
for i in xrange(long(opts.count)):
fail = False
idx = random.randrange(0, numcombinations)
cmd = getcombination(prepped, idx)
#cmd = getcombination(prepped, idx)
compile_command = flatten(cmd)
compile_command = [ elem for elem in compile_command if elem != '' ] # remove empty strings
print('%d/%d (combination %d, count %d)' % (i + 1, long(opts.count), idx, numcombinations))
#print('%d/%d (combination %d, count %d) %s' % (i + 1, long(opts.count), idx, numcombinations, repr(compile_command)))
if opts.verbose:
print(' '.join(compile_command))
check_unlink(fn_duk)
#print(repr(compile_command))
compile_p = subprocess.Popen(compile_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
compile_stdout, compile_stderr = compile_p.communicate()
compile_exitcode = compile_p.returncode
if compile_exitcode != 0:
fail = True
else:
if not os.path.exists(fn_duk):
print('*** WARNING: compile success but no %s ***' % fn_duk)
run_command = [ fn_duk, fn_testjs ]
if fail:
run_stdout = None
run_stderr = None
run_exitcode = 1
else:
run_p = subprocess.Popen(run_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
run_stdout, run_stderr = run_p.communicate()
run_exitcode = run_p.returncode
if run_exitcode != 0:
fail = True
if run_stdout != expect:
fail = True
if fail:
print('------------------------------------------------------------------------------')
print('*** FAILED: %s' % repr(compile_command))
print(' '.join(compile_command))
failed.append(' '.join(compile_command))
print('COMPILE STDOUT:')
print(compile_stdout)
print('COMPILE STDERR:')
print(compile_stderr)
print('RUN STDOUT:')
print(run_stdout)
print('RUN STDERR:')
print(run_stderr)
print('------------------------------------------------------------------------------')
res.append({
'compile_command': compile_command,
'compile_stdout': compile_stdout,
'compile_stderr': compile_stderr,
'compile_exitcode': compile_exitcode,
'run_command': run_command,
'run_stdout': run_stdout,
# Don't include debug output, it's huge with DUK_OPT_DDDPRINT
#'run_stderr': run_stderr,
'run_exitcode': run_exitcode,
'run_expect': expect,
'success': not fail
})
sys.stdout.flush()
sys.stderr.flush()
f = open(opts.out_results_json, 'wb')
f.write(json.dumps(res, indent=4, sort_keys=True))
f.close()
f = open(opts.out_failed, 'wb')
f.write('\n'.join(failed) + '\n')
f.close()
check_unlink(fn_duk)
check_unlink(fn_testjs)
# XXX: summary of success/failure/warnings (= stderr got anything)
if __name__ == '__main__':
main()
main()

310
util/prep_test.py

@ -26,168 +26,168 @@ import optparse
re_include = re.compile(r'^/\*@include\s(.*?)\s*@\*/$')
def readFile(fn):
f = open(fn, 'rb')
data = f.read()
f.close()
return data
f = open(fn, 'rb')
data = f.read()
f.close()
return data
def writeFile(fn, data):
f = open(fn, 'wb')
f.write(data)
f.close()
f = open(fn, 'wb')
f.write(data)
f.close()
def stripTrailingNewlines(data):
while data.endswith('\n'):
data = data[:-1]
return data
while data.endswith('\n'):
data = data[:-1]
return data
class TestcasePreparer:
def __init__(self,
util_include_path=None,
minify_provider=None,
closure_jar_path=None,
uglifyjs_exe_path=None,
uglifyjs2_exe_path=None):
self.util_include_path = util_include_path
self.minify_provider = minify_provider
self.closure_jar_path = closure_jar_path
self.uglifyjs_exe_path = uglifyjs_exe_path
self.uglifyjs2_exe_path = uglifyjs2_exe_path
def prepApiTest(self, fn, data):
# FIXME: implement API testcase prepping
return data
def minifyClosure(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call(['java', '-jar', self.closure_jar_path, '--js_output_file', absFn, fn ])
if rc != 0:
raise Exception('closure minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyUglifyJS(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call([self.uglifyjs_exe_path, '-o', absFn, fn])
if rc != 0:
raise Exception('uglifyjs minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyUglifyJS2(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call([self.uglifyjs2_exe_path, '-o', absFn, fn])
if rc != 0:
raise Exception('uglifyjs2 minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyOneLine(self, fn):
# Closure is very slow to start so it's not ideal for test case use.
# The only thing we really need is to make Ecmascript a one-liner.
if self.minify_provider == 'closure':
return self.minifyClosure(fn)
elif self.minify_provider == 'uglifyjs':
return self.minifyUglifyJS(fn)
elif self.minify_provider == 'uglifyjs2':
return self.minifyUglifyJS2(fn)
else:
raise Exception('no minifier')
def prepEcmaPrologue(self, fn):
return stripTrailingNewlines(self.minifyOneLine(fn))
def prepEcmaInclude(self, fn):
absFn = os.path.join(self.util_include_path, fn)
return '/* INCLUDE: ' + fn + ' */ ' + stripTrailingNewlines(self.minifyOneLine(absFn))
def prepEcmaTest(self, fn_in, fn_prologue, data):
is_strict = False
lines = []
for line in data.split('\n'):
if line.startswith('/'):
m = re_include.match(line)
if m is not None:
lines.append(self.prepEcmaInclude(m.group(1)))
continue
elif line.startswith('"use strict"') or line.startswith("'use strict'"):
# This is very approximate, but correct for current tests.
is_strict = True
lines.append(line)
if fn_prologue is not None:
# Prepend prologue to first line; if the program is strict
# duplicate the 'use strict' declaration.
lines[0] = self.prepEcmaPrologue(fn_prologue) + ' /*...*/ ' + lines[0]
if is_strict:
lines[0] = "'use strict'; " + lines[0]
return '\n'.join(lines)
def prepareTestcase(self, fn_in, fn_out, fn_prologue):
data = readFile(fn_in)
if fn_in.endswith('.c'):
res = self.prepApiTest(fn_in, fn_prologue, data)
elif fn_in.endswith('.js'):
res = self.prepEcmaTest(fn_in, fn_prologue, data)
else:
raise Exception('invalid file (not .c or .js)')
writeFile(fn_out, res)
def __init__(self,
util_include_path=None,
minify_provider=None,
closure_jar_path=None,
uglifyjs_exe_path=None,
uglifyjs2_exe_path=None):
self.util_include_path = util_include_path
self.minify_provider = minify_provider
self.closure_jar_path = closure_jar_path
self.uglifyjs_exe_path = uglifyjs_exe_path
self.uglifyjs2_exe_path = uglifyjs2_exe_path
def prepApiTest(self, fn, data):
# XXX: implement API testcase prepping
return data
def minifyClosure(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call(['java', '-jar', self.closure_jar_path, '--js_output_file', absFn, fn ])
if rc != 0:
raise Exception('closure minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyUglifyJS(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call([self.uglifyjs_exe_path, '-o', absFn, fn])
if rc != 0:
raise Exception('uglifyjs minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyUglifyJS2(self, fn):
fh, absFn = tempfile.mkstemp(suffix='prep_temp')
os.close(fh)
rc = subprocess.call([self.uglifyjs2_exe_path, '-o', absFn, fn])
if rc != 0:
raise Exception('uglifyjs2 minify failed')
res = readFile(absFn)
os.unlink(absFn)
return res
def minifyOneLine(self, fn):
# Closure is very slow to start so it's not ideal for test case use.
# The only thing we really need is to make Ecmascript a one-liner.
if self.minify_provider == 'closure':
return self.minifyClosure(fn)
elif self.minify_provider == 'uglifyjs':
return self.minifyUglifyJS(fn)
elif self.minify_provider == 'uglifyjs2':
return self.minifyUglifyJS2(fn)
else:
raise Exception('no minifier')
def prepEcmaPrologue(self, fn):
return stripTrailingNewlines(self.minifyOneLine(fn))
def prepEcmaInclude(self, fn):
absFn = os.path.join(self.util_include_path, fn)
return '/* INCLUDE: ' + fn + ' */ ' + stripTrailingNewlines(self.minifyOneLine(absFn))
def prepEcmaTest(self, fn_in, fn_prologue, data):
is_strict = False
lines = []
for line in data.split('\n'):
if line.startswith('/'):
m = re_include.match(line)
if m is not None:
lines.append(self.prepEcmaInclude(m.group(1)))
continue
elif line.startswith('"use strict"') or line.startswith("'use strict'"):
# This is very approximate, but correct for current tests.
is_strict = True
lines.append(line)
if fn_prologue is not None:
# Prepend prologue to first line; if the program is strict
# duplicate the 'use strict' declaration.
lines[0] = self.prepEcmaPrologue(fn_prologue) + ' /*...*/ ' + lines[0]
if is_strict:
lines[0] = "'use strict'; " + lines[0]
return '\n'.join(lines)
def prepareTestcase(self, fn_in, fn_out, fn_prologue):
data = readFile(fn_in)
if fn_in.endswith('.c'):
res = self.prepApiTest(fn_in, fn_prologue, data)
elif fn_in.endswith('.js'):
res = self.prepEcmaTest(fn_in, fn_prologue, data)
else:
raise Exception('invalid file (not .c or .js)')
writeFile(fn_out, res)
def main():
parser = optparse.OptionParser()
parser.add_option('--input', dest='input', default=None)
parser.add_option('--output', dest='output', default=None)
parser.add_option('--prologue', dest='prologue', default=None)
parser.add_option('--util-include-path', dest='util_include_path', default=None)
parser.add_option('--minify-closure', dest='minify_closure', default=None) # point to compiler.jar
parser.add_option('--minify-uglifyjs', dest='minify_uglifyjs', default=None) # point to uglifyjs exe
parser.add_option('--minify-uglifyjs2', dest='minify_uglifyjs2', default=None) # point to uglifyjs exe
(opts, args) = parser.parse_args()
if opts.input is None or opts.output is None:
raise Exception('filename argument(s) missing (--input and/or --output)')
if opts.util_include_path is None:
raise Exception('missing util include path (--util-include-path)')
fn_in = opts.input
fn_out = opts.output
fn_prologue = opts.prologue
minify_provider = None
if opts.minify_closure is not None:
minify_provider = 'closure'
elif opts.minify_uglifyjs is not None:
minify_provider = 'uglifyjs'
elif opts.minify_uglifyjs2 is not None:
minify_provider = 'uglifyjs2'
else:
raise Exception('must provide a minifier (include files must be converted to one-liners)')
preparer = TestcasePreparer(util_include_path=opts.util_include_path,
minify_provider=minify_provider,
closure_jar_path=opts.minify_closure,
uglifyjs_exe_path=opts.minify_uglifyjs,
uglifyjs2_exe_path=opts.minify_uglifyjs2)
preparer.prepareTestcase(fn_in, fn_out, fn_prologue)
parser = optparse.OptionParser()
parser.add_option('--input', dest='input', default=None)
parser.add_option('--output', dest='output', default=None)
parser.add_option('--prologue', dest='prologue', default=None)
parser.add_option('--util-include-path', dest='util_include_path', default=None)
parser.add_option('--minify-closure', dest='minify_closure', default=None) # point to compiler.jar
parser.add_option('--minify-uglifyjs', dest='minify_uglifyjs', default=None) # point to uglifyjs exe
parser.add_option('--minify-uglifyjs2', dest='minify_uglifyjs2', default=None) # point to uglifyjs exe
(opts, args) = parser.parse_args()
if opts.input is None or opts.output is None:
raise Exception('filename argument(s) missing (--input and/or --output)')
if opts.util_include_path is None:
raise Exception('missing util include path (--util-include-path)')
fn_in = opts.input
fn_out = opts.output
fn_prologue = opts.prologue
minify_provider = None
if opts.minify_closure is not None:
minify_provider = 'closure'
elif opts.minify_uglifyjs is not None:
minify_provider = 'uglifyjs'
elif opts.minify_uglifyjs2 is not None:
minify_provider = 'uglifyjs2'
else:
raise Exception('must provide a minifier (include files must be converted to one-liners)')
preparer = TestcasePreparer(util_include_path=opts.util_include_path,
minify_provider=minify_provider,
closure_jar_path=opts.minify_closure,
uglifyjs_exe_path=opts.minify_uglifyjs,
uglifyjs2_exe_path=opts.minify_uglifyjs2)
preparer.prepareTestcase(fn_in, fn_out, fn_prologue)
if __name__ == '__main__':
main()
main()

33
util/rdfdiff.py

@ -0,0 +1,33 @@
#!usr/bin/env python2
#
# RDF graph diff, useful for diffing SPDX license for release checklist.
#
# Based on:
#
# - https://www.w3.org/2001/sw/wiki/How_to_diff_RDF
# - https://github.com/RDFLib/rdflib/blob/master/rdflib/compare.py
#
import os
import sys
def main():
from rdflib import Graph
from rdflib.compare import to_isomorphic, graph_diff
with open(sys.argv[1]) as f:
d1 = f.read()
with open(sys.argv[2]) as f:
d2 = f.read()
print('Loading graph 1 from ' + sys.argv[1])
g1 = Graph().parse(format='n3', data=d1)
print('Loading graph 2 from ' + sys.argv[2])
g2 = Graph().parse(format='n3', data=d2)
iso1 = to_isomorphic(g1)
iso2 = to_isomorphic(g2)
if __name__ == '__main__':
main()

26
util/resolve_combined_lineno.py

@ -1,26 +0,0 @@
#!/usr/bin/env python2
#
# Resolve a line number in the combined source into an uncombined file/line
# using a dist/src/metadata.json file.
#
# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345
#
import os
import sys
import json
def main():
with open(sys.argv[1], 'rb') as f:
metadata = json.loads(f.read())
lineno = int(sys.argv[2])
for e in reversed(metadata['line_map']):
if lineno >= e['combined_line']:
orig_lineno = e['original_line'] + (lineno - e['combined_line'])
print('%s:%d -> %s:%d' % ('duktape.c', lineno,
e['original_file'], orig_lineno))
break
if __name__ == '__main__':
main()

135
util/scan_strings.py

@ -1,135 +0,0 @@
#!/usr/bin/env python2
#
# Scan potential external strings from Ecmascript and C files.
#
# Very simplistic example with a lot of limitations:
#
# - Doesn't handle multiple variables in a variable declaration
#
# - Only extracts strings from C files, these may correspond to
# Duktape/C bindings (but in many cases don't)
#
import os
import sys
import re
import json
strmap = {}
# Ecmascript function declaration
re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE)
# Ecmascript variable declaration
# XXX: doesn't handle multiple variables
re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE)
# Ecmascript variable assignment
re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE)
# Ecmascript dotted property reference (also matches numbers like
# '4.0', which are separately rejected below)
re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE)
re_digits = re.compile(r'^\d+$', re.UNICODE)
# Ecmascript or C string literal
re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE)
re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE)
def strDecode(x):
# Need to decode hex, unicode, and other escapes. Python syntax
# is close enough to C and Ecmascript so use eval for now.
try:
return eval('u' + x) # interpret as unicode string
except:
sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n')
return None
def scan(f, fn):
global strmap
# Scan rules depend on file type
if fn[-2:] == '.c':
use_funcname = False
use_vardecl = False
use_varassign = False
use_propref = False
use_strlit_dquot = True
use_strlit_squot = False
else:
use_funcname = True
use_vardecl = True
use_varassign = True
use_propref = True
use_strlit_dquot = True
use_strlit_squot = True
for line in f:
# Assume input data is UTF-8
line = line.decode('utf-8')
if use_funcname:
for m in re_funcname.finditer(line):
strmap[m.group(1)] = True
if use_vardecl:
for m in re_vardecl.finditer(line):
strmap[m.group(1)] = True
if use_varassign:
for m in re_varassign.finditer(line):
strmap[m.group(1)] = True
if use_propref:
for m in re_propref.finditer(line):
parts = m.group(1).split('.')
if re_digits.match(parts[0]) is not None:
# Probably a number ('4.0' or such)
pass
else:
for part in parts:
strmap[part] = True
if use_strlit_dquot:
for m in re_strlit_dquot.finditer(line):
s = strDecode(m.group(1))
if s is not None:
strmap[s] = True
if use_strlit_squot:
for m in re_strlit_squot.finditer(line):
s = strDecode(m.group(1))
if s is not None:
strmap[s] = True
def main():
for fn in sys.argv[1:]:
f = open(fn, 'rb')
scan(f, fn)
f.close()
strs = []
strs_base64 = []
doc = {
# Strings as Unicode strings
'scanned_strings': strs,
# Strings as base64-encoded UTF-8 data, which should be ready
# to be used in C code (Duktape internal string representation
# is UTF-8)
'scanned_strings_base64': strs_base64
}
k = strmap.keys()
k.sort()
for s in k:
strs.append(s)
t = s.encode('utf-8').encode('base64')
if len(t) > 0 and t[-1] == '\n':
t = t[0:-1]
strs_base64.append(t)
print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True))
if __name__ == '__main__':
main()

176
util/time_multi.py

@ -10,93 +10,93 @@ import optparse
import subprocess
def main():
parser = optparse.OptionParser()
parser.add_option('--count', type='int', dest='count', default=3)
parser.add_option('--mode', dest='mode', default='min')
parser.add_option('--sleep', type='float', dest='sleep', default=0.0)
parser.add_option('--sleep-factor', type='float', dest='sleep_factor', default=0.0)
parser.add_option('--rerun-limit', type='int', dest='rerun_limit', default=30)
parser.add_option('--verbose', action='store_true', dest='verbose', default=False)
(opts, args) = parser.parse_args()
time_min = None
time_max = None
time_sum = 0.0
time_list = []
if opts.verbose:
sys.stderr.write('Running:')
sys.stderr.flush()
for i in xrange(opts.count):
time.sleep(opts.sleep)
cmd = [
'time',
'-f', '%U',
'--quiet'
]
cmd = cmd + args
#print(repr(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
retval = p.wait()
#print(i, retval, stdout, stderr)
if retval == 139:
print 'segv'
sys.exit(1)
elif retval != 0:
print 'n/a'
sys.exit(1)
time_this = float(stderr)
#print(i, time_this)
if time_min is None:
time_min = time_this
else:
time_min = min(time_min, time_this)
if time_max is None:
time_max = time_this
else:
time_max = max(time_max, time_this)
time_sum += time_this
if opts.verbose:
sys.stderr.write(' %f' % time_this)
sys.stderr.flush()
time_list.append(time_this)
# Sleep time dependent on test time is useful for thermal throttling.
time.sleep(opts.sleep_factor * time_this)
# If run takes too long, there's no point in trying to get an accurate
# estimate.
if time_this >= opts.rerun_limit:
break
if opts.verbose:
sys.stderr.write('\n')
sys.stderr.flush()
time_avg = time_sum / float(len(time_list))
# /usr/bin/time has only two digits of resolution
if opts.mode == 'min':
print('%.02f' % time_min)
elif opts.mode == 'max':
print('%.02f' % time_max)
elif opts.mode == 'avg':
print('%.02f' % time_avg)
elif opts.mode == 'all':
print('min=%.02f, max=%.02f, avg=%0.2f, count=%d: %r' % \
(time_min, time_max, time_avg, len(time_list), time_list))
else:
print('invalid mode: %r' % opts.mode)
sys.exit(0)
parser = optparse.OptionParser()
parser.add_option('--count', type='int', dest='count', default=3)
parser.add_option('--mode', dest='mode', default='min')
parser.add_option('--sleep', type='float', dest='sleep', default=0.0)
parser.add_option('--sleep-factor', type='float', dest='sleep_factor', default=0.0)
parser.add_option('--rerun-limit', type='int', dest='rerun_limit', default=30)
parser.add_option('--verbose', action='store_true', dest='verbose', default=False)
(opts, args) = parser.parse_args()
time_min = None
time_max = None
time_sum = 0.0
time_list = []
if opts.verbose:
sys.stderr.write('Running:')
sys.stderr.flush()
for i in xrange(opts.count):
time.sleep(opts.sleep)
cmd = [
'time',
'-f', '%U',
'--quiet'
]
cmd = cmd + args
#print(repr(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
retval = p.wait()
#print(i, retval, stdout, stderr)
if retval == 139:
print 'segv'
sys.exit(1)
elif retval != 0:
print 'n/a'
sys.exit(1)
time_this = float(stderr)
#print(i, time_this)
if time_min is None:
time_min = time_this
else:
time_min = min(time_min, time_this)
if time_max is None:
time_max = time_this
else:
time_max = max(time_max, time_this)
time_sum += time_this
if opts.verbose:
sys.stderr.write(' %f' % time_this)
sys.stderr.flush()
time_list.append(time_this)
# Sleep time dependent on test time is useful for thermal throttling.
time.sleep(opts.sleep_factor * time_this)
# If run takes too long, there's no point in trying to get an accurate
# estimate.
if time_this >= opts.rerun_limit:
break
if opts.verbose:
sys.stderr.write('\n')
sys.stderr.flush()
time_avg = time_sum / float(len(time_list))
# /usr/bin/time has only two digits of resolution
if opts.mode == 'min':
print('%.02f' % time_min)
elif opts.mode == 'max':
print('%.02f' % time_max)
elif opts.mode == 'avg':
print('%.02f' % time_avg)
elif opts.mode == 'all':
print('min=%.02f, max=%.02f, avg=%0.2f, count=%d: %r' % \
(time_min, time_max, time_avg, len(time_list), time_list))
else:
print('invalid mode: %r' % opts.mode)
sys.exit(0)
if __name__ == '__main__':
main()
main()

Loading…
Cancel
Save