mirror of https://github.com/svaarala/duktape.git
Sami Vaarala
8 years ago
70 changed files with 10680 additions and 10250 deletions
@ -1,41 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Extract unique DUK_USE_xxx flags from current code base: |
|||
# |
|||
# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in |
|||
# |
|||
|
|||
import os, sys, re |
|||
|
|||
# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched |
|||
# (only uppercase allowed) |
|||
re_use = re.compile(r'DUK_USE_[A-Z0-9_]+') |
|||
re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+') |
|||
|
|||
def main(): |
|||
uses = {} |
|||
opts = {} |
|||
|
|||
for fn in sys.argv[1:]: |
|||
f = open(fn, 'rb') |
|||
for line in f: |
|||
for t in re.findall(re_use, line): |
|||
if t[-1] != '_': # skip e.g. 'DUK_USE_' |
|||
uses[t] = True |
|||
for t in re.findall(re_opt, line): |
|||
if t[-1] != '_': |
|||
opts[t] = True |
|||
f.close() |
|||
|
|||
k = opts.keys() |
|||
k.sort() |
|||
for i in k: |
|||
print(i) |
|||
|
|||
k = uses.keys() |
|||
k.sort() |
|||
for i in k: |
|||
print(i) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
File diff suppressed because it is too large
@ -1,32 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Merge debugger YAML metadata files and output a merged JSON metadata file. |
|||
# |
|||
|
|||
import os, sys, json, yaml |
|||
import optparse |
|||
|
|||
if __name__ == '__main__': |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--output', dest='output', default=None, help='output JSON filename') |
|||
parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names') |
|||
parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands') |
|||
parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes') |
|||
parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
res = {} |
|||
def merge(fn): |
|||
with open(fn, 'rb') as f: |
|||
doc = yaml.load(f) |
|||
for k in doc.keys(): |
|||
res[k] = doc[k] |
|||
|
|||
merge(opts.class_names) |
|||
merge(opts.debug_commands) |
|||
merge(opts.debug_errors) |
|||
merge(opts.opcodes) |
|||
|
|||
with open(opts.output, 'wb') as f: |
|||
f.write(json.dumps(res, indent=4) + '\n') |
|||
print('Wrote merged debugger metadata to ' + str(opts.output)) |
File diff suppressed because it is too large
@ -1,266 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Python utilities shared by the build scripts. |
|||
# |
|||
|
|||
import datetime |
|||
import json |
|||
|
|||
class BitEncoder: |
|||
"Bitstream encoder." |
|||
|
|||
_bits = None |
|||
|
|||
def __init__(self): |
|||
self._bits = [] |
|||
|
|||
def bits(self, x, nbits): |
|||
if (x >> nbits) != 0: |
|||
raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits)) |
|||
for i in xrange(nbits): |
|||
t = (x >> (nbits - i - 1)) & 0x01 |
|||
self._bits.append(t) |
|||
|
|||
def string(self, x): |
|||
nbits = len(x) * 8 |
|||
|
|||
for i in xrange(nbits): |
|||
byteidx = i / 8 |
|||
bitidx = i % 8 |
|||
if byteidx < 0 or byteidx >= len(x): |
|||
self._bits.append(0) |
|||
else: |
|||
t = (ord(x[byteidx]) >> (7 - bitidx)) & 0x01 |
|||
self._bits.append(t) |
|||
|
|||
def getNumBits(self): |
|||
"Get current number of encoded bits." |
|||
return len(self._bits) |
|||
|
|||
def getNumBytes(self): |
|||
"Get current number of encoded bytes, rounded up." |
|||
nbits = len(self._bits) |
|||
while (nbits % 8) != 0: |
|||
nbits += 1 |
|||
return nbits / 8 |
|||
|
|||
def getBytes(self): |
|||
"Get current bitstream as a byte sequence, padded with zero bits." |
|||
bytes = [] |
|||
|
|||
for i in xrange(self.getNumBytes()): |
|||
t = 0 |
|||
for j in xrange(8): |
|||
off = i*8 + j |
|||
if off >= len(self._bits): |
|||
t = (t << 1) |
|||
else: |
|||
t = (t << 1) + self._bits[off] |
|||
bytes.append(t) |
|||
|
|||
return bytes |
|||
|
|||
def getByteString(self): |
|||
"Get current bitstream as a string." |
|||
return ''.join([chr(i) for i in self.getBytes()]) |
|||
|
|||
class GenerateC: |
|||
"Helper for generating C source and header files." |
|||
|
|||
_data = None |
|||
wrap_col = 76 |
|||
|
|||
def __init__(self): |
|||
self._data = [] |
|||
|
|||
def emitRaw(self, text): |
|||
"Emit raw text (without automatic newline)." |
|||
self._data.append(text) |
|||
|
|||
def emitLine(self, text): |
|||
"Emit a raw line (with automatic newline)." |
|||
self._data.append(text + '\n') |
|||
|
|||
def emitHeader(self, autogen_by): |
|||
"Emit file header comments." |
|||
|
|||
# Note: a timestamp would be nice but it breaks incremental building |
|||
self.emitLine('/*') |
|||
self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by) |
|||
self.emitLine(' */') |
|||
self.emitLine('') |
|||
|
|||
def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True): |
|||
"Emit an array as a C array." |
|||
|
|||
# lenient input |
|||
if isinstance(data, unicode): |
|||
data = data.encode('utf-8') |
|||
if isinstance(data, str): |
|||
tmp = [] |
|||
for i in xrange(len(data)): |
|||
tmp.append(ord(data[i])) |
|||
data = tmp |
|||
|
|||
size_spec = '' |
|||
if size is not None: |
|||
size_spec = '%d' % size |
|||
visib_qual = '' |
|||
if visibility is not None: |
|||
visib_qual = visibility + ' ' |
|||
const_qual = '' |
|||
if const: |
|||
const_qual = 'const ' |
|||
self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec)) |
|||
|
|||
line = '' |
|||
for i in xrange(len(data)): |
|||
if intvalues: |
|||
suffix = '' |
|||
if data[i] < -32768 or data[i] > 32767: |
|||
suffix = 'L' |
|||
t = "%d%s," % (data[i], suffix) |
|||
else: |
|||
t = "(%s)'\\x%02x', " % (typename, data[i]) |
|||
if len(line) + len(t) >= self.wrap_col: |
|||
self.emitLine(line) |
|||
line = t |
|||
else: |
|||
line += t |
|||
if line != '': |
|||
self.emitLine(line) |
|||
self.emitLine('};') |
|||
|
|||
def emitDefine(self, name, value, comment=None): |
|||
"Emit a C define with an optional comment." |
|||
|
|||
# XXX: there is no escaping right now (for comment or value) |
|||
if comment is not None: |
|||
self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment)) |
|||
else: |
|||
self.emitLine('#define %-60s %s' % (name, value)) |
|||
|
|||
def getString(self): |
|||
"Get the entire file as a string." |
|||
return ''.join(self._data) |
|||
|
|||
def json_encode(x): |
|||
"JSON encode a value." |
|||
try: |
|||
return json.dumps(x) |
|||
except AttributeError: |
|||
pass |
|||
|
|||
# for older library versions |
|||
return json.write(x) |
|||
|
|||
def json_decode(x): |
|||
"JSON decode a value." |
|||
try: |
|||
return json.loads(x) |
|||
except AttributeError: |
|||
pass |
|||
|
|||
# for older library versions |
|||
return json.read(x) |
|||
|
|||
# Compute a byte hash identical to duk_util_hashbytes(). |
|||
DUK__MAGIC_M = 0x5bd1e995 |
|||
DUK__MAGIC_R = 24 |
|||
def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian): |
|||
h = (str_seed ^ nbytes) & 0xffffffff |
|||
|
|||
while nbytes >= 4: |
|||
# 4-byte fetch byte order: |
|||
# - native (endian dependent) if unaligned accesses allowed |
|||
# - little endian if unaligned accesses not allowed |
|||
|
|||
if big_endian: |
|||
k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \ |
|||
(ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24) |
|||
else: |
|||
k = ord(x[off]) + (ord(x[off + 1]) << 8) + \ |
|||
(ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24) |
|||
|
|||
k = (k * DUK__MAGIC_M) & 0xffffffff |
|||
k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff |
|||
k = (k * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h ^ k) & 0xffffffff |
|||
|
|||
off += 4 |
|||
nbytes -= 4 |
|||
|
|||
if nbytes >= 3: |
|||
h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff |
|||
if nbytes >= 2: |
|||
h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff |
|||
if nbytes >= 1: |
|||
h = (h ^ ord(x[off])) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
|
|||
h = (h ^ (h >> 13)) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h ^ (h >> 15)) & 0xffffffff |
|||
|
|||
return h |
|||
|
|||
# Compute a string hash identical to duk_heap_hashstring() when dense |
|||
# hashing is enabled. |
|||
DUK__STRHASH_SHORTSTRING = 4096 |
|||
DUK__STRHASH_MEDIUMSTRING = 256 * 1024 |
|||
DUK__STRHASH_BLOCKSIZE = 256 |
|||
def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False): |
|||
str_seed = (hash_seed ^ len(x)) & 0xffffffff |
|||
|
|||
if len(x) <= DUK__STRHASH_SHORTSTRING: |
|||
res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian) |
|||
else: |
|||
if len(x) <= DUK__STRHASH_MEDIUMSTRING: |
|||
skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE |
|||
else: |
|||
skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE |
|||
|
|||
res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian) |
|||
off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256 |
|||
|
|||
while off < len(x): |
|||
left = len(x) - off |
|||
now = left |
|||
if now > DUK__STRHASH_BLOCKSIZE: |
|||
now = DUK__STRHASH_BLOCKSIZE |
|||
res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff |
|||
off += skip |
|||
|
|||
if strhash16: |
|||
res &= 0xffff |
|||
|
|||
return res |
|||
|
|||
# Compute a string hash identical to duk_heap_hashstring() when sparse |
|||
# hashing is enabled. |
|||
DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value |
|||
def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False): |
|||
res = (hash_seed ^ len(x)) & 0xffffffff |
|||
|
|||
step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1 |
|||
off = len(x) |
|||
while off >= step: |
|||
assert(off >= 1) |
|||
res = ((res * 33) + ord(x[off - 1])) & 0xffffffff |
|||
off -= step |
|||
|
|||
if strhash16: |
|||
res &= 0xffff |
|||
|
|||
return res |
|||
|
|||
# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length(). |
|||
def duk_unicode_unvalidated_utf8_length(x): |
|||
assert(isinstance(x, str)) |
|||
clen = 0 |
|||
for c in x: |
|||
t = ord(c) |
|||
if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted |
|||
clen += 1 |
|||
return clen |
@ -1,444 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Extract rules for Unicode case conversion, specifically the behavior |
|||
# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The |
|||
# bitstream encoded rules are used for the slow path at run time, so |
|||
# compactness is favored over speed. |
|||
# |
|||
# There is no support for context or locale sensitive rules, as they |
|||
# are handled directly in C code before consulting tables generated |
|||
# here. Ecmascript requires case conversion both with and without |
|||
# locale/language specific rules (e.g. String.prototype.toLowerCase() |
|||
# and String.prototype.toLocaleLowerCase()), so they are best handled |
|||
# in C anyway. |
|||
# |
|||
# Case conversion rules for ASCII are also excluded as they are |
|||
# handled by C fast path. Rules for non-BMP characters (codepoints |
|||
# above U+FFFF) are omitted as they're not required for standard |
|||
# Ecmascript. |
|||
# |
|||
|
|||
import os, sys, math |
|||
import optparse |
|||
import dukutil |
|||
|
|||
class UnicodeData: |
|||
"Read UnicodeData.txt into an internal representation." |
|||
|
|||
def __init__(self, filename): |
|||
self.data = self.read_unicode_data(filename) |
|||
print 'read %d unicode data entries' % len(self.data) |
|||
|
|||
def read_unicode_data(self, filename): |
|||
res = [] |
|||
f = open(filename, 'rb') |
|||
for line in f: |
|||
if line.startswith('#'): |
|||
continue |
|||
line = line.strip() |
|||
if line == '': |
|||
continue |
|||
parts = line.split(';') |
|||
if len(parts) != 15: |
|||
raise Exception('invalid unicode data line') |
|||
res.append(parts) |
|||
f.close() |
|||
|
|||
# Sort based on Unicode codepoint |
|||
def mycmp(a,b): |
|||
return cmp(long(a[0], 16), long(b[0], 16)) |
|||
|
|||
res.sort(cmp=mycmp) |
|||
return res |
|||
|
|||
class SpecialCasing: |
|||
"Read SpecialCasing.txt into an internal representation." |
|||
|
|||
def __init__(self, filename): |
|||
self.data = self.read_special_casing_data(filename) |
|||
print 'read %d special casing entries' % len(self.data) |
|||
|
|||
def read_special_casing_data(self, filename): |
|||
res = [] |
|||
f = open(filename, 'rb') |
|||
for line in f: |
|||
try: |
|||
idx = line.index('#') |
|||
line = line[:idx] |
|||
except ValueError: |
|||
pass |
|||
line = line.strip() |
|||
if line == '': |
|||
continue |
|||
parts = line.split(';') |
|||
parts = [i.strip() for i in parts] |
|||
while len(parts) < 6: |
|||
parts.append('') |
|||
res.append(parts) |
|||
f.close() |
|||
return res |
|||
|
|||
def parse_unicode_sequence(x): |
|||
res = '' |
|||
for i in x.split(' '): |
|||
i = i.strip() |
|||
if i == '': |
|||
continue |
|||
res += unichr(long(i, 16)) |
|||
return res |
|||
|
|||
def get_base_conversion_maps(unicode_data): |
|||
"Create case conversion tables without handling special casing yet." |
|||
|
|||
uc = {} # codepoint (number) -> string |
|||
lc = {} |
|||
tc = {} # titlecase |
|||
|
|||
for x in unicode_data.data: |
|||
c1 = long(x[0], 16) |
|||
|
|||
# just 16-bit support needed |
|||
if c1 >= 0x10000: |
|||
continue |
|||
|
|||
if x[12] != '': |
|||
# field 12: simple uppercase mapping |
|||
c2 = parse_unicode_sequence(x[12]) |
|||
uc[c1] = c2 |
|||
tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary |
|||
if x[13] != '': |
|||
# field 13: simple lowercase mapping |
|||
c2 = parse_unicode_sequence(x[13]) |
|||
lc[c1] = c2 |
|||
if x[14] != '': |
|||
# field 14: simple titlecase mapping |
|||
c2 = parse_unicode_sequence(x[14]) |
|||
tc[c1] = c2 |
|||
|
|||
return uc, lc, tc |
|||
|
|||
def update_special_casings(uc, lc, tc, special_casing): |
|||
"Update case conversion tables with special case conversion rules." |
|||
|
|||
for x in special_casing.data: |
|||
c1 = long(x[0], 16) |
|||
|
|||
if x[4] != '': |
|||
# conditions |
|||
continue |
|||
|
|||
lower = parse_unicode_sequence(x[1]) |
|||
title = parse_unicode_sequence(x[2]) |
|||
upper = parse_unicode_sequence(x[3]) |
|||
|
|||
if len(lower) > 1: |
|||
lc[c1] = lower |
|||
if len(upper) > 1: |
|||
uc[c1] = upper |
|||
if len(title) > 1: |
|||
tc[c1] = title |
|||
|
|||
print 'special case: %d %d %d' % (len(lower), len(upper), len(title)) |
|||
|
|||
def remove_ascii_part(convmap): |
|||
"Remove ASCII case conversion parts (handled by C fast path)." |
|||
|
|||
for i in xrange(128): |
|||
if convmap.has_key(i): |
|||
del convmap[i] |
|||
|
|||
def scan_range_with_skip(convmap, start_idx, skip): |
|||
"Scan for a range of continuous case conversion with a certain 'skip'." |
|||
|
|||
conv_i = start_idx |
|||
if not convmap.has_key(conv_i): |
|||
return None, None, None |
|||
elif len(convmap[conv_i]) > 1: |
|||
return None, None, None |
|||
else: |
|||
conv_o = ord(convmap[conv_i]) |
|||
|
|||
start_i = conv_i |
|||
start_o = conv_o |
|||
|
|||
while True: |
|||
new_i = conv_i + skip |
|||
new_o = conv_o + skip |
|||
|
|||
if not convmap.has_key(new_i): |
|||
break |
|||
if len(convmap[new_i]) > 1: |
|||
break |
|||
if ord(convmap[new_i]) != new_o: |
|||
break |
|||
|
|||
conv_i = new_i |
|||
conv_o = new_o |
|||
|
|||
# [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char |
|||
count = (conv_i - start_i) / skip + 1 |
|||
if count <= 1: |
|||
return None, None, None |
|||
|
|||
# we have an acceptable range, remove them from the convmap here |
|||
for i in xrange(start_i, conv_i + skip, skip): |
|||
del convmap[i] |
|||
|
|||
return start_i, start_o, count |
|||
|
|||
def find_first_range_with_skip(convmap, skip): |
|||
"Find first range with a certain 'skip' value." |
|||
|
|||
for i in xrange(65536): |
|||
start_i, start_o, count = scan_range_with_skip(convmap, i, skip) |
|||
if start_i is None: |
|||
continue |
|||
return start_i, start_o, count |
|||
|
|||
return None, None, None |
|||
|
|||
def generate_tables(convmap): |
|||
"Generate bit-packed case conversion table for a given conversion map." |
|||
|
|||
# The bitstream encoding is based on manual inspection for whatever |
|||
# regularity the Unicode case conversion rules have. |
|||
# |
|||
# Start with a full description of case conversions which does not |
|||
# cover all codepoints; unmapped codepoints convert to themselves. |
|||
# Scan for range-to-range mappings with a range of skips starting from 1. |
|||
# Whenever a valid range is found, remove it from the map. Finally, |
|||
# output the remaining case conversions (1:1 and 1:n) on a per codepoint |
|||
# basis. |
|||
# |
|||
# This is very slow because we always scan from scratch, but its the |
|||
# most reliable and simple way to scan |
|||
|
|||
ranges = [] # range mappings (2 or more consecutive mappings with a certain skip) |
|||
singles = [] # 1:1 character mappings |
|||
complex = [] # 1:n character mappings |
|||
|
|||
# Ranges with skips |
|||
|
|||
for skip in xrange(1,6+1): # skips 1...6 are useful |
|||
while True: |
|||
start_i, start_o, count = find_first_range_with_skip(convmap, skip) |
|||
if start_i is None: |
|||
break |
|||
print 'skip %d: %d %d %d' % (skip, start_i, start_o, count) |
|||
ranges.append([start_i, start_o, count, skip]) |
|||
|
|||
# 1:1 conversions |
|||
|
|||
k = convmap.keys() |
|||
k.sort() |
|||
for i in k: |
|||
if len(convmap[i]) > 1: |
|||
continue |
|||
singles.append([i, ord(convmap[i])]) # codepoint, codepoint |
|||
del convmap[i] |
|||
|
|||
# There are many mappings to 2-char sequences with latter char being U+0399. |
|||
# These could be handled as a special case, but we don't do that right now. |
|||
# |
|||
# [8064L, u'\u1f08\u0399'] |
|||
# [8065L, u'\u1f09\u0399'] |
|||
# [8066L, u'\u1f0a\u0399'] |
|||
# [8067L, u'\u1f0b\u0399'] |
|||
# [8068L, u'\u1f0c\u0399'] |
|||
# [8069L, u'\u1f0d\u0399'] |
|||
# [8070L, u'\u1f0e\u0399'] |
|||
# [8071L, u'\u1f0f\u0399'] |
|||
# ... |
|||
# |
|||
# tmp = {} |
|||
# k = convmap.keys() |
|||
# k.sort() |
|||
# for i in k: |
|||
# if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399': |
|||
# tmp[i] = convmap[i][0] |
|||
# del convmap[i] |
|||
# print repr(tmp) |
|||
# |
|||
# skip = 1 |
|||
# while True: |
|||
# start_i, start_o, count = find_first_range_with_skip(tmp, skip) |
|||
# if start_i is None: |
|||
# break |
|||
# print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count) |
|||
# print len(tmp.keys()) |
|||
# print repr(tmp) |
|||
# XXX: need to put 12 remaining mappings back to convmap... |
|||
|
|||
# 1:n conversions |
|||
|
|||
k = convmap.keys() |
|||
k.sort() |
|||
for i in k: |
|||
complex.append([i, convmap[i]]) # codepoint, string |
|||
del convmap[i] |
|||
|
|||
for t in singles: |
|||
print repr(t) |
|||
|
|||
for t in complex: |
|||
print repr(t) |
|||
|
|||
print 'range mappings: %d' % len(ranges) |
|||
print 'single character mappings: %d' % len(singles) |
|||
print 'complex mappings (1:n): %d' % len(complex) |
|||
print 'remaining (should be zero): %d' % len(convmap.keys()) |
|||
|
|||
# XXX: opportunities for diff encoding skip=3 ranges? |
|||
prev = None |
|||
for t in ranges: |
|||
# range: [start_i, start_o, count, skip] |
|||
if t[3] != 3: |
|||
continue |
|||
if prev is not None: |
|||
print '%d %d' % (t[0] - prev[0], t[1] - prev[1]) |
|||
else: |
|||
print 'start: %d %d' % (t[0], t[1]) |
|||
prev = t |
|||
|
|||
# bit packed encoding |
|||
|
|||
be = dukutil.BitEncoder() |
|||
|
|||
for curr_skip in xrange(1, 7): # 1...6 |
|||
count = 0 |
|||
for r in ranges: |
|||
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] |
|||
if skip != curr_skip: |
|||
continue |
|||
count += 1 |
|||
be.bits(count, 6) |
|||
print 'encode: skip=%d, count=%d' % (curr_skip, count) |
|||
|
|||
for r in ranges: |
|||
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] |
|||
if skip != curr_skip: |
|||
continue |
|||
be.bits(start_i, 16) |
|||
be.bits(start_o, 16) |
|||
be.bits(r_count, 7) |
|||
be.bits(0x3f, 6) # maximum count value = end of skips |
|||
|
|||
count = len(singles) |
|||
be.bits(count, 6) |
|||
for t in singles: |
|||
cp_i, cp_o = t[0], t[1] |
|||
be.bits(cp_i, 16) |
|||
be.bits(cp_o, 16) |
|||
|
|||
count = len(complex) |
|||
be.bits(count, 7) |
|||
for t in complex: |
|||
cp_i, str_o = t[0], t[1] |
|||
be.bits(cp_i, 16) |
|||
be.bits(len(str_o), 2) |
|||
for i in xrange(len(str_o)): |
|||
be.bits(ord(str_o[i]), 16) |
|||
|
|||
return be.getBytes(), be.getNumBits() |
|||
|
|||
def generate_regexp_canonicalize_lookup(convmap): |
|||
res = [] |
|||
|
|||
highest_nonid = -1 |
|||
|
|||
for cp in xrange(65536): |
|||
res_cp = cp # default to as is |
|||
if convmap.has_key(cp): |
|||
tmp = convmap[cp] |
|||
if len(tmp) == 1: |
|||
# Multiple codepoints from input, ignore |
|||
res_cp = ord(tmp[0]) |
|||
if cp >= 0x80 and res_cp < 0x80: |
|||
res_cp = cp # non-ASCII mapped to ASCII, ignore |
|||
|
|||
if cp != res_cp: |
|||
highest_nonid = cp |
|||
|
|||
res.append(res_cp) |
|||
|
|||
# At the moment this is 65370, which means there's very little |
|||
# gain in assuming 1:1 mapping above a certain BMP codepoint. |
|||
print('HIGHEST NON-ID MAPPING: %d' % highest_nonid) |
|||
return res |
|||
|
|||
def clonedict(x): |
|||
"Shallow clone of input dict." |
|||
res = {} |
|||
for k in x.keys(): |
|||
res[k] = x[k] |
|||
return res |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--command', dest='command', default='caseconv_bitpacked') |
|||
parser.add_option('--unicode-data', dest='unicode_data') |
|||
parser.add_option('--special-casing', dest='special_casing') |
|||
parser.add_option('--out-source', dest='out_source') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc') |
|||
parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc') |
|||
parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
unicode_data = UnicodeData(opts.unicode_data) |
|||
special_casing = SpecialCasing(opts.special_casing) |
|||
|
|||
uc, lc, tc = get_base_conversion_maps(unicode_data) |
|||
update_special_casings(uc, lc, tc, special_casing) |
|||
|
|||
if opts.command == 'caseconv_bitpacked': |
|||
# XXX: ASCII and non-BMP filtering could be an option but is now hardcoded |
|||
|
|||
# ascii is handled with 'fast path' so not needed here |
|||
t = clonedict(uc) |
|||
remove_ascii_part(t) |
|||
uc_bytes, uc_nbits = generate_tables(t) |
|||
|
|||
t = clonedict(lc) |
|||
remove_ascii_part(t) |
|||
lc_bytes, lc_nbits = generate_tables(t) |
|||
|
|||
# Generate C source and header files |
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True) |
|||
genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True) |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes))) |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes))) |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
elif opts.command == 're_canon_lookup': |
|||
# direct canonicalization lookup for case insensitive regexps, includes ascii part |
|||
t = clonedict(uc) |
|||
re_canon_lookup = generate_regexp_canonicalize_lookup(t) |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True) |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup))) |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
else: |
|||
raise Exception('invalid command: %r' % opts.command) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,353 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Select a set of Unicode characters (based on included/excluded categories |
|||
# etc) and write out a compact bitstream for matching a character against |
|||
# the set at runtime. This is for the slow path, where we're especially |
|||
# concerned with compactness. A C source file with the table is written, |
|||
# together with a matching C header. |
|||
# |
|||
# Unicode categories (such as 'Z') can be used. Two pseudo-categories |
|||
# are also available for exclusion only: ASCII and NONBMP. "ASCII" |
|||
# category excludes ASCII codepoints which is useful because C code |
|||
# typically contains an ASCII fast path so ASCII characters don't need |
|||
# to be considered in the Unicode tables. "NONBMP" excludes codepoints |
|||
# above U+FFFF which is useful because such codepoints don't need to be |
|||
# supported in standard Ecmascript. |
|||
# |
|||
|
|||
import os, sys, math |
|||
import optparse |
|||
import dukutil |
|||
|
|||
def read_unicode_data(unidata, catsinc, catsexc, filterfunc): |
|||
"Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc." |
|||
res = [] |
|||
f = open(unidata, 'rb') |
|||
for line in f: |
|||
line = line.strip() |
|||
parts = line.split(';') |
|||
codepoint = parts[0] |
|||
category = parts[2] |
|||
|
|||
if filterfunc is not None and not filterfunc(long(codepoint, 16)): |
|||
continue |
|||
|
|||
excluded = False |
|||
for cat in catsexc: |
|||
if category.startswith(cat) or codepoint == cat: |
|||
excluded = True |
|||
if excluded: |
|||
continue |
|||
|
|||
for cat in catsinc: |
|||
if category.startswith(cat) or codepoint == cat: |
|||
res.append(line) |
|||
f.close() |
|||
|
|||
# Sort based on Unicode codepoint |
|||
def mycmp(a,b): |
|||
t1 = a.split(';') |
|||
t2 = b.split(';') |
|||
n1 = long(t1[0], 16) |
|||
n2 = long(t2[0], 16) |
|||
return cmp(n1, n2) |
|||
|
|||
res.sort(cmp=mycmp) |
|||
|
|||
return res |
|||
|
|||
def scan_ranges(lines): |
|||
"Scan continuous ranges from (filtered) UnicodeData.txt lines." |
|||
ranges = [] |
|||
range_start = None |
|||
prev = None |
|||
|
|||
for line in lines: |
|||
t = line.split(';') |
|||
n = long(t[0], 16) |
|||
if range_start is None: |
|||
range_start = n |
|||
else: |
|||
if n == prev + 1: |
|||
# continue range |
|||
pass |
|||
else: |
|||
ranges.append((range_start, prev)) |
|||
range_start = n |
|||
prev = n |
|||
|
|||
if range_start is not None: |
|||
ranges.append((range_start, prev)) |
|||
|
|||
return ranges |
|||
|
|||
def generate_png(lines, fname): |
|||
"Generate an illustrative PNG of the character set." |
|||
from PIL import Image |
|||
|
|||
m = {} |
|||
for line in lines: |
|||
t = line.split(';') |
|||
n = long(t[0], 16) |
|||
m[n] = 1 |
|||
|
|||
codepoints = 0x10ffff + 1 |
|||
width = int(256) |
|||
height = int(math.ceil(float(codepoints) / float(width))) |
|||
im = Image.new('RGB', (width, height)) |
|||
black = (0,0,0) |
|||
white = (255,255,255) |
|||
for cp in xrange(codepoints): |
|||
y = cp / width |
|||
x = cp % width |
|||
|
|||
if m.has_key(long(cp)): |
|||
im.putpixel((x,y), black) |
|||
else: |
|||
im.putpixel((x,y), white) |
|||
|
|||
im.save(fname) |
|||
|
|||
def generate_match_table1(ranges): |
|||
"Unused match table format." |
|||
|
|||
# This is an earlier match table format which is no longer used. |
|||
# IdentifierStart-UnicodeLetter has 445 ranges and generates a |
|||
# match table of 2289 bytes. |
|||
|
|||
data = [] |
|||
prev_re = None |
|||
|
|||
def genrange(rs, re): |
|||
if (rs > re): |
|||
raise Exception('assumption failed: rs=%d re=%d' % (rs, re)) |
|||
|
|||
while True: |
|||
now = re - rs + 1 |
|||
if now > 255: |
|||
now = 255 |
|||
data.append(now) # range now |
|||
data.append(0) # skip 0 |
|||
rs = rs + now |
|||
else: |
|||
data.append(now) # range now |
|||
break |
|||
|
|||
def genskip(ss, se): |
|||
if (ss > se): |
|||
raise Exception('assumption failed: ss=%d se=%s' % (ss, se)) |
|||
|
|||
while True: |
|||
now = se - ss + 1 |
|||
if now > 255: |
|||
now = 255 |
|||
data.append(now) # skip now |
|||
data.append(0) # range 0 |
|||
ss = ss + now |
|||
else: |
|||
data.append(now) # skip now |
|||
break |
|||
|
|||
for rs, re in ranges: |
|||
if prev_re is not None: |
|||
genskip(prev_re + 1, rs - 1) |
|||
genrange(rs, re) |
|||
prev_re = re |
|||
|
|||
num_entries = len(data) |
|||
|
|||
# header: start of first range |
|||
# num entries |
|||
hdr = [] |
|||
hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over |
|||
hdr.append(ranges[0][1] & 0xff) |
|||
hdr.append(num_entries >> 8) |
|||
hdr.append(num_entries & 0xff) |
|||
|
|||
return hdr + data |
|||
|
|||
def generate_match_table2(ranges): |
|||
"Unused match table format." |
|||
|
|||
# Another attempt at a match table which is also unused. |
|||
# Total tables for all current classes is now 1472 bytes. |
|||
|
|||
data = [] |
|||
|
|||
def enc(x): |
|||
while True: |
|||
if x < 0x80: |
|||
data.append(x) |
|||
break |
|||
data.append(0x80 + (x & 0x7f)) |
|||
x = x >> 7 |
|||
|
|||
prev_re = 0 |
|||
|
|||
for rs, re in ranges: |
|||
r1 = rs - prev_re # 1 or above (no unjoined ranges) |
|||
r2 = re - rs # 0 or above |
|||
enc(r1) |
|||
enc(r2) |
|||
prev_re = re |
|||
|
|||
enc(0) # end marker |
|||
|
|||
return data |
|||
|
|||
def generate_match_table3(ranges): |
|||
"Current match table format." |
|||
|
|||
# Yet another attempt, similar to generate_match_table2 except |
|||
# in packing format. |
|||
# |
|||
# Total match size now (at time of writing): 1194 bytes. |
|||
# |
|||
# This is the current encoding format used in duk_lexer.c. |
|||
|
|||
be = dukutil.BitEncoder() |
|||
|
|||
freq = [0] * (0x10ffff + 1) # informative |
|||
|
|||
def enc(x): |
|||
freq[x] += 1 |
|||
|
|||
if x <= 0x0e: |
|||
# 4-bit encoding |
|||
be.bits(x, 4) |
|||
return |
|||
x -= 0x0e + 1 |
|||
if x <= 0xfd: |
|||
# 12-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(x, 8) |
|||
return |
|||
x -= 0xfd + 1 |
|||
if x <= 0xfff: |
|||
# 24-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(0xfe, 8) |
|||
be.bits(x, 12) |
|||
return |
|||
x -= 0xfff + 1 |
|||
if True: |
|||
# 36-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(0xff, 8) |
|||
be.bits(x, 24) |
|||
return |
|||
|
|||
raise Exception('cannot encode') |
|||
|
|||
prev_re = 0 |
|||
|
|||
for rs, re in ranges: |
|||
r1 = rs - prev_re # 1 or above (no unjoined ranges) |
|||
r2 = re - rs # 0 or above |
|||
enc(r1) |
|||
enc(r2) |
|||
prev_re = re |
|||
|
|||
enc(0) # end marker |
|||
|
|||
data, nbits = be.getBytes(), be.getNumBits() |
|||
return data, freq |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt |
|||
parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt |
|||
parser.add_option('--include-categories', dest='include_categories') |
|||
parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE') |
|||
parser.add_option('--out-source', dest='out_source') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
parser.add_option('--out-png', dest='out_png') |
|||
parser.add_option('--table-name', dest='table_name', default='match_table') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
unidata = opts.unicode_data |
|||
catsinc = [] |
|||
if opts.include_categories != '': |
|||
catsinc = opts.include_categories.split(',') |
|||
catsexc = [] |
|||
if opts.exclude_categories != 'NONE': |
|||
catsexc = opts.exclude_categories.split(',') |
|||
|
|||
print 'CATSEXC: %s' % repr(catsexc) |
|||
print 'CATSINC: %s' % repr(catsinc) |
|||
|
|||
# pseudocategories |
|||
filter_ascii = ('ASCII' in catsexc) |
|||
filter_nonbmp = ('NONBMP' in catsexc) |
|||
|
|||
# Read raw result |
|||
def filter1(x): |
|||
if filter_ascii and x <= 0x7f: |
|||
# exclude ascii |
|||
return False |
|||
if filter_nonbmp and x >= 0x10000: |
|||
# exclude non-bmp |
|||
return False |
|||
return True |
|||
|
|||
res = read_unicode_data(unidata, catsinc, catsexc, filter1) |
|||
|
|||
# Raw output |
|||
print('RAW OUTPUT:') |
|||
print('===========') |
|||
print('\n'.join(res)) |
|||
|
|||
# Scan ranges |
|||
print('') |
|||
print('RANGES:') |
|||
print('=======') |
|||
ranges = scan_ranges(res) |
|||
for i in ranges: |
|||
if i[0] == i[1]: |
|||
print('0x%04x' % i[0]) |
|||
else: |
|||
print('0x%04x ... 0x%04x' % (i[0], i[1])) |
|||
print('') |
|||
print('%d ranges total' % len(ranges)) |
|||
|
|||
# Generate match table |
|||
print('') |
|||
print('MATCH TABLE:') |
|||
print('============') |
|||
#matchtable1 = generate_match_table1(ranges) |
|||
#matchtable2 = generate_match_table2(ranges) |
|||
matchtable3, freq = generate_match_table3(ranges) |
|||
print 'match table: %s' % repr(matchtable3) |
|||
print 'match table length: %d bytes' % len(matchtable3) |
|||
print 'encoding freq:' |
|||
for i in xrange(len(freq)): |
|||
if freq[i] == 0: |
|||
continue |
|||
print ' %6d: %d' % (i, freq[i]) |
|||
|
|||
print('') |
|||
print('MATCH C TABLE -> file %s' % repr(opts.out_header)) |
|||
|
|||
# Create C source and header files |
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_chars.py') |
|||
genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True) |
|||
if opts.out_source is not None: |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_chars.py') |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3))) |
|||
if opts.out_header is not None: |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
# Image (for illustrative purposes only) |
|||
if opts.out_png is not None: |
|||
generate_png(res, opts.out_png) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,44 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate build parameter files based on build information. |
|||
# A C header is generated for C code, and a JSON file for |
|||
# build scripts etc which need to know the build config. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
import optparse |
|||
|
|||
import dukutil |
|||
|
|||
if __name__ == '__main__': |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--version', dest='version') |
|||
parser.add_option('--git-commit', dest='git_commit') |
|||
parser.add_option('--git-describe', dest='git_describe') |
|||
parser.add_option('--git-branch', dest='git_branch') |
|||
parser.add_option('--out-json', dest='out_json') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
t = { |
|||
'version': opts.version, |
|||
'git_commit': opts.git_commit, |
|||
'git_describe': opts.git_describe, |
|||
'git_branch': opts.git_branch, |
|||
} |
|||
|
|||
f = open(opts.out_json, 'wb') |
|||
f.write(dukutil.json_encode(t).encode('ascii')) |
|||
f.close() |
|||
|
|||
f = open(opts.out_header, 'wb') |
|||
f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n') |
|||
f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n') |
|||
f.write('/* automatically generated by genbuildparams.py, do not edit */\n') |
|||
f.write('\n') |
|||
f.write('/* DUK_VERSION is defined in duktape.h */') |
|||
f.write('\n') |
|||
f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n') |
|||
f.close() |
File diff suppressed because it is too large
@ -1,57 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate equivalent year table needed by duk_bi_date.c. Based on: |
|||
# |
|||
# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146 |
|||
# |
|||
|
|||
import datetime |
|||
import pytz |
|||
|
|||
def isleapyear(year): |
|||
if (year % 4) != 0: |
|||
return False |
|||
if (year % 100) != 0: |
|||
return True |
|||
if (year % 400) != 0: |
|||
return False |
|||
return True |
|||
|
|||
def eqyear(weekday, isleap): |
|||
# weekday: 0=Sunday, 1=Monday, ... |
|||
|
|||
if isleap: |
|||
recent_year = 1956 |
|||
else: |
|||
recent_year = 1967 |
|||
recent_year += (weekday * 12) % 28 |
|||
year = 2008 + (recent_year + 3 * 28 - 2008) % 28 |
|||
|
|||
# some assertions |
|||
# |
|||
# Note that Ecmascript internal weekday (0=Sunday) matches neither |
|||
# Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday). |
|||
# Python isoweekday() % 7 matches the Ecmascript weekday. |
|||
# https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday |
|||
|
|||
dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC |
|||
#print(weekday, isleap, year, dt.isoweekday(), isleapyear(year)) |
|||
#print(repr(dt)) |
|||
#print(dt.isoformat()) |
|||
|
|||
if isleap != isleapyear(year): |
|||
raise Exception('internal error: equivalent year does not have same leap-year-ness') |
|||
pass |
|||
|
|||
if weekday != dt.isoweekday() % 7: |
|||
raise Exception('internal error: equivalent year does not begin with the same weekday') |
|||
pass |
|||
|
|||
return year |
|||
|
|||
def main(): |
|||
for i in xrange(14): |
|||
print(eqyear(i % 7, i >= 7)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,164 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate a size report from a Duktape library / executable. |
|||
# Write out useful information about function sizes in a variety |
|||
# of forms. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import subprocess |
|||
|
|||
#000000000040d200 <duk_to_hstring>: |
|||
# 40d200: 55 push %rbp |
|||
# 40d201: 89 f5 mov %esi,%ebp |
|||
|
|||
re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$') |
|||
re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$') |
|||
|
|||
def objdump(filename): |
|||
proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE) |
|||
curr_func = None |
|||
func_start = None |
|||
func_end = None |
|||
ret = {} |
|||
|
|||
def storeFunc(): |
|||
if curr_func is None or func_start is None or func_end is None: |
|||
return |
|||
ret[curr_func] = { |
|||
'name': curr_func, |
|||
'start': func_start, |
|||
'end': func_end, # exclusive |
|||
'length': func_end - func_start |
|||
} |
|||
|
|||
for line in proc.stdout: |
|||
line = line.strip() |
|||
|
|||
m = re_funcstart.match(line) |
|||
if m is not None: |
|||
if curr_func is not None: |
|||
storeFunc() |
|||
curr_func = m.group(1) |
|||
func_start = None |
|||
func_end = None |
|||
|
|||
m = re_codeline.match(line) |
|||
if m is not None: |
|||
func_addr = long(m.group(1), 16) |
|||
func_bytes = m.group(2) |
|||
func_nbytes = len(func_bytes.split(' ')) |
|||
func_instr = m.group(3) |
|||
if func_start is None: |
|||
func_start = func_addr |
|||
func_end = func_addr + func_nbytes |
|||
|
|||
storeFunc() |
|||
|
|||
return ret |
|||
|
|||
def filterFuncs(funcs): |
|||
todo = [] # avoid mutation while iterating |
|||
|
|||
def accept(fun): |
|||
n = fun['name'] |
|||
|
|||
if n in [ '.comment', |
|||
'.dynstr', |
|||
'.dynsym', |
|||
'.eh_frame_hdr', |
|||
'.interp', |
|||
'.rela.dyn', |
|||
'.rela.plt', |
|||
'_DYNAMIC', |
|||
'_GLOBAL_OFFSET_TABLE_', |
|||
'_IO_stdin_used', |
|||
'__CTOR_LIST__', |
|||
'__DTOR_LIST__', |
|||
'_fini', |
|||
'_init', |
|||
'_start', |
|||
'' ]: |
|||
return False |
|||
|
|||
for pfx in [ '.debug', '.gnu', '.note', |
|||
'__FRAME_', '__' ]: |
|||
if n.startswith(pfx): |
|||
return False |
|||
|
|||
return True |
|||
|
|||
for k in funcs.keys(): |
|||
if not accept(funcs[k]): |
|||
todo.append(k) |
|||
|
|||
for k in todo: |
|||
del funcs[k] |
|||
|
|||
def main(): |
|||
funcs = objdump(sys.argv[1]) |
|||
filterFuncs(funcs) |
|||
|
|||
funcs_keys = funcs.keys() |
|||
funcs_keys.sort() |
|||
combined_size_all = 0 |
|||
combined_size_duk = 0 |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
combined_size_all += fun['length'] |
|||
if fun['name'].startswith('duk_'): |
|||
combined_size_duk += fun['length'] |
|||
|
|||
f = sys.stdout |
|||
f.write('<html>') |
|||
f.write('<head>') |
|||
f.write('<title>Size dump for %s</title>' % sys.argv[1]) |
|||
f.write("""\ |
|||
<style type="text/css"> |
|||
tr:nth-child(2n) { |
|||
background: #eeeeee; |
|||
} |
|||
tr:nth-child(2n+1) { |
|||
background: #dddddd; |
|||
} |
|||
</style> |
|||
""") |
|||
f.write('</head>') |
|||
f.write('<body>') |
|||
|
|||
f.write('<h1>Summary</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><td>Entries</td><td>%d</td></tr>' % len(funcs_keys)) |
|||
f.write('<tr><td>Combined size (all)</td><td>%d</td></tr>' % combined_size_all) |
|||
f.write('<tr><td>Combined size (duk_*)</td><td>%d</td></tr>' % combined_size_duk) |
|||
f.write('</table>') |
|||
|
|||
f.write('<h1>Sorted by function name</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><th>Name</th><th>Bytes</th></tr>') |
|||
funcs_keys = funcs.keys() |
|||
funcs_keys.sort() |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length'])) |
|||
f.write('</table>') |
|||
|
|||
f.write('<h1>Sorted by size</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><th>Name</th><th>Bytes</th></tr>') |
|||
funcs_keys = funcs.keys() |
|||
def cmpSize(a,b): |
|||
return cmp(funcs[a]['length'], funcs[b]['length']) |
|||
funcs_keys.sort(cmp=cmpSize) |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length'])) |
|||
f.write('</table>') |
|||
|
|||
f.write('</body>') |
|||
f.write('</html>') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,124 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Find a sequence of duk_hobject hash sizes which have a desired 'ratio' |
|||
# and are primes. Prime hash sizes ensure that all probe sequence values |
|||
# (less than hash size) are relatively prime to hash size, i.e. cover the |
|||
# entire hash. Prime data is packed into about 1 byte/prime using a |
|||
# prediction-correction model. |
|||
# |
|||
# Also generates a set of probe steps which are relatively prime to every |
|||
# hash size. |
|||
|
|||
import sys |
|||
import math |
|||
|
|||
def is_prime(n): |
|||
if n == 0: |
|||
return False |
|||
if n == 1 or n == 2: |
|||
return True |
|||
|
|||
n_limit = int(math.ceil(float(n) ** 0.5)) + 1 |
|||
n_limit += 100 # paranoia |
|||
if n_limit >= n: |
|||
n_limit = n - 1 |
|||
for i in xrange(2,n_limit + 1): |
|||
if (n % i) == 0: |
|||
return False |
|||
return True |
|||
|
|||
def next_prime(n): |
|||
while True: |
|||
n += 1 |
|||
if is_prime(n): |
|||
return n |
|||
|
|||
def generate_sizes(min_size, max_size, step_ratio): |
|||
"Generate a set of hash sizes following a nice ratio." |
|||
|
|||
sizes = [] |
|||
ratios = [] |
|||
curr = next_prime(min_size) |
|||
next = curr |
|||
sizes.append(curr) |
|||
|
|||
step_ratio = float(step_ratio) / 1024 |
|||
|
|||
while True: |
|||
if next > max_size: |
|||
break |
|||
ratio = float(next) / float(curr) |
|||
if ratio < step_ratio: |
|||
next = next_prime(next) |
|||
continue |
|||
sys.stdout.write('.'); sys.stdout.flush() |
|||
sizes.append(next) |
|||
ratios.append(ratio) |
|||
curr = next |
|||
next = next_prime(int(next * step_ratio)) |
|||
|
|||
sys.stdout.write('\n'); sys.stdout.flush() |
|||
return sizes, ratios |
|||
|
|||
def generate_corrections(sizes, step_ratio): |
|||
"Generate a set of correction from a ratio-based predictor." |
|||
|
|||
# Generate a correction list for size list, assuming steps follow a certain |
|||
# ratio; this allows us to pack size list into one byte per size |
|||
|
|||
res = [] |
|||
|
|||
res.append(sizes[0]) # first entry is first size |
|||
|
|||
for i in xrange(1, len(sizes)): |
|||
prev = sizes[i - 1] |
|||
pred = int(prev * step_ratio) >> 10 |
|||
diff = int(sizes[i] - pred) |
|||
res.append(diff) |
|||
|
|||
if diff < 0 or diff > 127: |
|||
raise Exception('correction does not fit into 8 bits') |
|||
|
|||
res.append(-1) # negative denotes last end of list |
|||
return res |
|||
|
|||
def generate_probes(count, sizes): |
|||
res = [] |
|||
|
|||
# Generate probe values which are guaranteed to be relatively prime to |
|||
# all generated hash size primes. These don't have to be primes, but |
|||
# we currently use smallest non-conflicting primes here. |
|||
|
|||
i = 2 |
|||
while len(res) < count: |
|||
if is_prime(i) and (i not in sizes): |
|||
if i > 255: |
|||
raise Exception('probe step does not fit into 8 bits') |
|||
res.append(i) |
|||
i += 1 |
|||
continue |
|||
i += 1 |
|||
|
|||
return res |
|||
|
|||
# NB: these must match duk_hobject defines and code |
|||
step_ratio = 1177 # approximately (1.15 * (1 << 10)) |
|||
min_size = 16 |
|||
max_size = 2**32 - 1 |
|||
|
|||
sizes, ratios = generate_sizes(min_size, max_size, step_ratio) |
|||
corrections = generate_corrections(sizes, step_ratio) |
|||
probes = generate_probes(32, sizes) |
|||
print len(sizes) |
|||
print 'SIZES: ' + repr(sizes) |
|||
print 'RATIOS: ' + repr(ratios) |
|||
print 'CORRECTIONS: ' + repr(corrections) |
|||
print 'PROBES: ' + repr(probes) |
|||
|
|||
# highest 32-bit prime |
|||
i = 2**32 |
|||
while True: |
|||
i -= 1 |
|||
if is_prime(i): |
|||
print 'highest 32-bit prime is: %d (0x%08x)' % (i, i) |
|||
break |
@ -1,73 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Size report of (stripped) object and source files. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
|
|||
def getsize(fname): |
|||
return os.stat(fname).st_size |
|||
|
|||
def getlines(fname): |
|||
f = None |
|||
try: |
|||
f = open(fname, 'rb') |
|||
lines = f.read().split('\n') |
|||
return len(lines) |
|||
finally: |
|||
if f is not None: |
|||
f.close() |
|||
f = None |
|||
|
|||
def process(srcfile, objfile): |
|||
srcsize = getsize(srcfile) |
|||
srclines = getlines(srcfile) |
|||
srcbpl = float(srcsize) / float(srclines) |
|||
objsize = getsize(objfile) |
|||
objbpl = float(objsize) / float(srclines) |
|||
|
|||
return objsize, objbpl, srcsize, srclines, srcbpl |
|||
|
|||
def main(): |
|||
tot_srcsize = 0 |
|||
tot_srclines = 0 |
|||
tot_objsize = 0 |
|||
|
|||
tmp = [] |
|||
for i in sys.argv[1:]: |
|||
objfile = i |
|||
if i.endswith('.strip'): |
|||
objname = i[:-6] |
|||
else: |
|||
objname = i |
|||
base, ext = os.path.splitext(objname) |
|||
srcfile = base + '.c' |
|||
|
|||
objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile) |
|||
srcbase = os.path.basename(srcfile) |
|||
objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o |
|||
tot_srcsize += srcsize |
|||
tot_srclines += srclines |
|||
tot_objsize += objsize |
|||
tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl)) |
|||
|
|||
def mycmp(a,b): |
|||
return cmp(a[5], b[5]) |
|||
|
|||
tmp.sort(cmp=mycmp, reverse=True) # sort by object size |
|||
fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f' |
|||
for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp: |
|||
print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl)) |
|||
|
|||
print('========================================================================') |
|||
print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines), |
|||
'', tot_objsize, float(tot_objsize) / float(tot_srclines))) |
|||
|
|||
if __name__ == '__main__': |
|||
# Usage: |
|||
# |
|||
# $ strip *.o |
|||
# $ python genobjsizereport.py *.o |
|||
|
|||
main() |
@ -1,37 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# UnicodeData.txt may contain ranges in addition to individual characters. |
|||
# Unpack the ranges into individual characters for the other scripts to use. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
|
|||
def main(): |
|||
f_in = open(sys.argv[1], 'rb') |
|||
f_out = open(sys.argv[2], 'wb') |
|||
while True: |
|||
line = f_in.readline() |
|||
if line == '' or line == '\n': |
|||
break |
|||
parts = line.split(';') # keep newline |
|||
if parts[1].endswith('First>'): |
|||
line2 = f_in.readline() |
|||
parts2 = line2.split(';') |
|||
if not parts2[1].endswith('Last>'): |
|||
raise Exception('cannot parse range') |
|||
cp1 = long(parts[0], 16) |
|||
cp2 = long(parts2[0], 16) |
|||
|
|||
for i in xrange(cp1, cp2 + 1): # inclusive |
|||
parts[0] = '%04X' % i |
|||
f_out.write(';'.join(parts)) |
|||
else: |
|||
f_out.write(line) |
|||
|
|||
f_in.close() |
|||
f_out.flush() |
|||
f_out.close() |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,56 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Scan Duktape code base for references to built-in strings and built-in |
|||
# objects, i.e. for: |
|||
# |
|||
# - Strings which will need DUK_STRIDX_xxx constants and a place in the |
|||
# thr->strs[] array. |
|||
# |
|||
# - Objects which will need DUK_BIDX_xxx constants and a place in the |
|||
# thr->builtins[] array. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
|
|||
re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE) |
|||
re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE) |
|||
re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE) |
|||
re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE) |
|||
|
|||
def main(): |
|||
str_defs = {} |
|||
obj_defs = {} |
|||
|
|||
for fn in sys.argv[1:]: |
|||
with open(fn, 'rb') as f: |
|||
d = f.read() |
|||
for m in re.finditer(re_str_stridx, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_str_heap, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_str_hthread, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_obj_bidx, d): |
|||
obj_defs[m.group(1)] = True |
|||
|
|||
str_used = [] |
|||
for k in sorted(str_defs.keys()): |
|||
str_used.append('DUK_STRIDX_' + k) |
|||
|
|||
obj_used = [] |
|||
for k in sorted(obj_defs.keys()): |
|||
obj_used.append('DUK_BIDX_' + k) |
|||
|
|||
doc = { |
|||
'used_stridx_defines': str_used, |
|||
'used_bidx_defines': obj_used, |
|||
'count_used_stridx_defines': len(str_used), |
|||
'count_used_bidx_defines': len(obj_used) |
|||
} |
|||
print(json.dumps(doc, indent=4)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,257 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Combine a set of a source files into a single C file. |
|||
# |
|||
# Overview of the process: |
|||
# |
|||
# * Parse user supplied C files. Add automatic #undefs at the end |
|||
# of each C file to avoid defined bleeding from one file to another. |
|||
# |
|||
# * Combine the C files in specified order. If sources have ordering |
|||
# dependencies (depends on application), order may matter. |
|||
# |
|||
# * Process #include statements in the combined source, categorizing |
|||
# them either as "internal" (found in specified include path) or |
|||
# "external". Internal includes, unless explicitly excluded, are |
|||
# inlined into the result while extenal includes are left as is. |
|||
# Duplicate #include statements are replaced with a comment. |
|||
# |
|||
# At every step, source and header lines are represented with explicit |
|||
# line objects which keep track of original filename and line. The |
|||
# output contains #line directives, if necessary, to ensure error |
|||
# throwing and other diagnostic info will work in a useful manner when |
|||
# deployed. It's also possible to generate a combined source with no |
|||
# #line directives. |
|||
# |
|||
# Making the process deterministic is important, so that if users have |
|||
# diffs that they apply to the combined source, such diffs would apply |
|||
# for as long as possible. |
|||
# |
|||
# Limitations and notes: |
|||
# |
|||
# * While there are automatic #undef's for #define's introduced in each |
|||
# C file, it's not possible to "undefine" structs, unions, etc. If |
|||
# there are structs/unions/typedefs with conflicting names, these |
|||
# have to be resolved in the source files first. |
|||
# |
|||
# * Because duplicate #include statements are suppressed, currently |
|||
# assumes #include statements are not conditional. |
|||
# |
|||
# * A system header might be #include'd in multiple source files with |
|||
# different feature defines (like _BSD_SOURCE). Because the #include |
|||
# file will only appear once in the resulting source, the first |
|||
# occurrence wins. The result may not work correctly if the feature |
|||
# defines must actually be different between two or more source files. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
import optparse |
|||
|
|||
# Include path for finding include files which are amalgamated. |
|||
include_paths = [] |
|||
|
|||
# Include files specifically excluded from being inlined. |
|||
include_excluded = [] |
|||
|
|||
class File: |
|||
filename_full = None |
|||
filename = None |
|||
lines = None |
|||
|
|||
def __init__(self, filename, lines): |
|||
self.filename = os.path.basename(filename) |
|||
self.filename_full = filename |
|||
self.lines = lines |
|||
|
|||
class Line: |
|||
filename_full = None |
|||
filename = None |
|||
lineno = None |
|||
data = None |
|||
|
|||
def __init__(self, filename, lineno, data): |
|||
self.filename = os.path.basename(filename) |
|||
self.filename_full = filename |
|||
self.lineno = lineno |
|||
self.data = data |
|||
|
|||
def readFile(filename): |
|||
lines = [] |
|||
|
|||
with open(filename, 'rb') as f: |
|||
lineno = 0 |
|||
for line in f: |
|||
lineno += 1 |
|||
if len(line) > 0 and line[-1] == '\n': |
|||
line = line[:-1] |
|||
lines.append(Line(filename, lineno, line)) |
|||
|
|||
return File(filename, lines) |
|||
|
|||
def lookupInclude(incfn): |
|||
re_sep = re.compile(r'/|\\') |
|||
|
|||
inccomp = re.split(re_sep, incfn) # split include path, support / and \ |
|||
|
|||
for path in include_paths: |
|||
fn = apply(os.path.join, [ path ] + inccomp) |
|||
if os.path.exists(fn): |
|||
return fn # Return full path to first match |
|||
|
|||
return None |
|||
|
|||
def addAutomaticUndefs(f): |
|||
defined = {} |
|||
|
|||
re_def = re.compile(r'#define\s+(\w+).*$') |
|||
re_undef = re.compile(r'#undef\s+(\w+).*$') |
|||
|
|||
for line in f.lines: |
|||
m = re_def.match(line.data) |
|||
if m is not None: |
|||
#print('DEFINED: %s' % repr(m.group(1))) |
|||
defined[m.group(1)] = True |
|||
m = re_undef.match(line.data) |
|||
if m is not None: |
|||
# Could just ignore #undef's here: we'd then emit |
|||
# reliable #undef's (though maybe duplicates) at |
|||
# the end. |
|||
#print('UNDEFINED: %s' % repr(m.group(1))) |
|||
if defined.has_key(m.group(1)): |
|||
del defined[m.group(1)] |
|||
|
|||
# Undefine anything that seems to be left defined. This not a 100% |
|||
# process because some #undef's might be conditional which we don't |
|||
# track at the moment. Note that it's safe to #undef something that's |
|||
# not defined. |
|||
|
|||
keys = sorted(defined.keys()) # deterministic order |
|||
if len(keys) > 0: |
|||
#print('STILL DEFINED: %r' % repr(defined.keys())) |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '')) |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */')) |
|||
for k in keys: |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k)) |
|||
|
|||
def createCombined(files, prologue_filename, line_directives): |
|||
res = [] |
|||
line_map = [] # indicate combined source lines where uncombined file/line would change |
|||
metadata = { |
|||
'line_map': line_map |
|||
} |
|||
|
|||
emit_state = [ None, None ] # curr_filename, curr_lineno |
|||
|
|||
def emit(line): |
|||
if isinstance(line, (str, unicode)): |
|||
res.append(line) |
|||
emit_state[1] += 1 |
|||
else: |
|||
if line.filename != emit_state[0] or line.lineno != emit_state[1]: |
|||
if line_directives: |
|||
res.append('#line %d "%s"' % (line.lineno, line.filename)) |
|||
line_map.append({ 'original_file': line.filename, |
|||
'original_line': line.lineno, |
|||
'combined_line': len(res) + 1 }) |
|||
res.append(line.data) |
|||
emit_state[0] = line.filename |
|||
emit_state[1] = line.lineno + 1 |
|||
|
|||
included = {} # headers already included |
|||
|
|||
if prologue_filename is not None: |
|||
with open(prologue_filename, 'rb') as f: |
|||
for line in f.read().split('\n'): |
|||
res.append(line) |
|||
|
|||
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$') |
|||
|
|||
# Process a file, appending it to the result; the input may be a |
|||
# source or an include file. #include directives are handled |
|||
# recursively. |
|||
def processFile(f): |
|||
#print('Process file: ' + f.filename) |
|||
|
|||
for line in f.lines: |
|||
if not line.data.startswith('#include'): |
|||
emit(line) |
|||
continue |
|||
|
|||
m = re_inc.match(line.data) |
|||
if m is None: |
|||
raise Exception('Couldn\'t match #include line: %s' % repr(line.data)) |
|||
incpath = m.group(2) |
|||
if incpath in include_excluded: |
|||
# Specific include files excluded from the |
|||
# inlining / duplicate suppression process. |
|||
emit(line) # keep as is |
|||
continue |
|||
|
|||
if included.has_key(incpath): |
|||
# We suppress duplicate includes, both internal and |
|||
# external, based on the assumption that includes are |
|||
# not behind #ifdef checks. This is the case for |
|||
# Duktape (except for the include files excluded). |
|||
emit('/* #include %s -> already included */' % incpath) |
|||
continue |
|||
included[incpath] = True |
|||
|
|||
# An include file is considered "internal" and is amalgamated |
|||
# if it is found in the include path provided by the user. |
|||
|
|||
incfile = lookupInclude(incpath) |
|||
if incfile is not None: |
|||
#print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile))) |
|||
emit('/* #include %s */' % incpath) |
|||
processFile(readFile(incfile)) |
|||
else: |
|||
#print('Include considered external: %s' % repr(line.data)) |
|||
emit(line) # keep as is |
|||
|
|||
for f in files: |
|||
processFile(f) |
|||
|
|||
return '\n'.join(res) + '\n', metadata |
|||
|
|||
def main(): |
|||
global include_paths, include_excluded |
|||
|
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times') |
|||
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)') |
|||
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file') |
|||
parser.add_option('--output-source', dest='output_source', help='Output source filename') |
|||
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename') |
|||
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
assert(opts.include_paths is not None) |
|||
include_paths = opts.include_paths # global for easy access |
|||
include_excluded = opts.include_excluded |
|||
assert(opts.output_source) |
|||
assert(opts.output_metadata) |
|||
|
|||
print('Read input files, add automatic #undefs') |
|||
sources = args |
|||
files = [] |
|||
for fn in sources: |
|||
res = readFile(fn) |
|||
#print('Add automatic undefs for: ' + fn) |
|||
addAutomaticUndefs(res) |
|||
files.append(res) |
|||
|
|||
print('Create combined source file from %d source files' % len(files)) |
|||
combined_source, metadata = \ |
|||
createCombined(files, opts.prologue, opts.line_directives) |
|||
with open(opts.output_source, 'wb') as f: |
|||
f.write(combined_source) |
|||
with open(opts.output_metadata, 'wb') as f: |
|||
f.write(json.dumps(metadata, indent=4)) |
|||
|
|||
print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,246 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Helper to create an SPDX license file (http://spdx.org) |
|||
# |
|||
# This must be executed when the dist/ directory is otherwise complete, |
|||
# except for the SPDX license, so that the file lists and such contained |
|||
# in the SPDX license will be correct. |
|||
# |
|||
# The utility outputs RDF/XML to specified file: |
|||
# |
|||
# $ python create_spdx_license.py /tmp/license.spdx |
|||
# |
|||
# Then, validate with SPDXViewer and SPDXTools: |
|||
# |
|||
# $ java -jar SPDXViewer.jar /tmp/license.spdx |
|||
# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html |
|||
# |
|||
# Finally, copy to dist: |
|||
# |
|||
# $ cp /tmp/license.spdx dist/license.spdx |
|||
# |
|||
# SPDX FAQ indicates there is no standard extension for an SPDX license file |
|||
# but '.spdx' is a common practice. |
|||
# |
|||
# The algorithm to compute a "verification code", implemented in this file, |
|||
# can be verified as follows: |
|||
# |
|||
# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz |
|||
# $ cd /tmp |
|||
# $ tar xvfJ duktape-N.N.N.tar.xz |
|||
# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code |
|||
# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/ |
|||
# |
|||
# Compare the resulting verification code manually with the one in license.spdx. |
|||
# |
|||
# Resources: |
|||
# |
|||
# - http://spdx.org/about-spdx/faqs |
|||
# - http://wiki.spdx.org/view/Technical_Team/Best_Practices |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import datetime |
|||
import sha |
|||
import rdflib |
|||
from rdflib import URIRef, BNode, Literal, Namespace |
|||
|
|||
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
|||
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') |
|||
XSD = Namespace('http://www.w3.org/2001/XMLSchema#') |
|||
SPDX = Namespace('http://spdx.org/rdf/terms#') |
|||
DOAP = Namespace('http://usefulinc.com/ns/doap#') |
|||
DUKTAPE = Namespace('http://duktape.org/rdf/terms#') |
|||
|
|||
def checksumFile(g, filename): |
|||
f = open(filename, 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
shasum = sha.sha(d).digest().encode('hex').lower() |
|||
|
|||
csum_node = BNode() |
|||
g.add((csum_node, RDF.type, SPDX.Checksum)) |
|||
g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) |
|||
g.add((csum_node, SPDX.checksumValue, Literal(shasum))) |
|||
|
|||
return csum_node |
|||
|
|||
def computePackageVerification(g, dirname, excluded): |
|||
# SPDX 1.2 Section 4.7 |
|||
# The SPDXTools command "GenerateVerificationCode" can be used to |
|||
# check the verification codes created. Note that you must manually |
|||
# remove "license.spdx" from the unpacked dist directory before |
|||
# computing the verification code. |
|||
|
|||
verify_node = BNode() |
|||
|
|||
hashes = [] |
|||
for dirpath, dirnames, filenames in os.walk(dirname): |
|||
for fn in filenames: |
|||
full_fn = os.path.join(dirpath, fn) |
|||
f = open(full_fn, 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
|
|||
if full_fn in excluded: |
|||
#print('excluded in verification: ' + full_fn) |
|||
continue |
|||
#print('included in verification: ' + full_fn) |
|||
|
|||
file_sha1 = sha.sha(d).digest().encode('hex').lower() |
|||
hashes.append(file_sha1) |
|||
|
|||
#print(repr(hashes)) |
|||
hashes.sort() |
|||
#print(repr(hashes)) |
|||
verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower() |
|||
|
|||
for fn in excluded: |
|||
g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn))) |
|||
g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code))) |
|||
|
|||
return verify_node |
|||
|
|||
def fileType(filename): |
|||
ign, ext = os.path.splitext(filename) |
|||
if ext in [ '.c', '.h', '.js' ]: |
|||
return SPDX.fileType_source |
|||
else: |
|||
return SPDX.fileType_other |
|||
|
|||
def getDuktapeVersion(): |
|||
f = open('./src/duktape.h') |
|||
re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$') |
|||
for line in f: |
|||
line = line.strip() |
|||
m = re_ver.match(line) |
|||
if m is None: |
|||
continue |
|||
ver = int(m.group(1)) |
|||
return '%d.%d.%d' % ((ver / 10000) % 100, |
|||
(ver / 100) % 100, |
|||
ver % 100) |
|||
|
|||
raise Exception('could not figure out Duktape version') |
|||
|
|||
def main(): |
|||
outfile = sys.argv[1] |
|||
|
|||
if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'): |
|||
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') |
|||
sys.exit(1) |
|||
os.chdir('dist') |
|||
if not os.path.exists('Makefile.cmdline'): |
|||
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') |
|||
sys.exit(1) |
|||
|
|||
duktape_version = getDuktapeVersion() |
|||
duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz' |
|||
now = datetime.datetime.utcnow() |
|||
now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second) |
|||
creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime) |
|||
duktape_org = Literal('Organization: duktape.org') |
|||
mit_license = URIRef('http://spdx.org/licenses/MIT') |
|||
duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)') |
|||
|
|||
g = rdflib.Graph() |
|||
|
|||
crea_node = BNode() |
|||
g.add((crea_node, RDF.type, SPDX.CreationInfo)) |
|||
g.add((crea_node, RDFS.comment, Literal(''))) |
|||
g.add((crea_node, SPDX.creator, duktape_org)) |
|||
g.add((crea_node, SPDX.created, creation_date)) |
|||
g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/ |
|||
|
|||
# 'name' should not include a version number (see best practices) |
|||
pkg_node = BNode() |
|||
g.add((pkg_node, RDF.type, SPDX.Package)) |
|||
g.add((pkg_node, SPDX.name, Literal('Duktape'))) |
|||
g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version))) |
|||
g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname))) |
|||
g.add((pkg_node, SPDX.supplier, duktape_org)) |
|||
g.add((pkg_node, SPDX.originator, duktape_org)) |
|||
g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI))) |
|||
g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI))) |
|||
verify_node = computePackageVerification(g, '.', [ './license.spdx' ]) |
|||
g.add((pkg_node, SPDX.packageVerificationCode, verify_node)) |
|||
# SPDX.checksum: omitted because license is inside the package |
|||
g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.'))) |
|||
|
|||
# NOTE: MIT license alone is sufficient for now, because Duktape, Lua, |
|||
# Murmurhash2, and CommonJS (though probably not even relevant for |
|||
# licensing) are all MIT. |
|||
g.add((pkg_node, SPDX.licenseConcluded, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseDeclared, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.'))) |
|||
g.add((pkg_node, SPDX.copyrightText, duktape_copyright)) |
|||
g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter'))) |
|||
g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint'))) |
|||
# hasFile properties added separately below |
|||
|
|||
#reviewed_node = BNode() |
|||
#g.add((reviewed_node, RDF.type, SPDX.Review)) |
|||
#g.add((reviewed_node, SPDX.reviewer, XXX)) |
|||
#g.add((reviewed_node, SPDX.reviewDate, XXX)) |
|||
#g.add((reviewed_node, RDFS.comment, '')) |
|||
|
|||
spdx_doc = BNode() |
|||
g.add((spdx_doc, RDF.type, SPDX.SpdxDocument)) |
|||
g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2'))) |
|||
g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0'))) |
|||
g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version))) |
|||
g.add((spdx_doc, SPDX.creationInfo, crea_node)) |
|||
g.add((spdx_doc, SPDX.describesPackage, pkg_node)) |
|||
# SPDX.hasExtractedLicensingInfo |
|||
# SPDX.reviewed |
|||
# SPDX.referencesFile: added below |
|||
|
|||
for dirpath, dirnames, filenames in os.walk('.'): |
|||
for fn in filenames: |
|||
full_fn = os.path.join(dirpath, fn) |
|||
#print('# file: ' + full_fn) |
|||
|
|||
file_node = BNode() |
|||
g.add((file_node, RDF.type, SPDX.File)) |
|||
g.add((file_node, SPDX.fileName, Literal(full_fn))) |
|||
g.add((file_node, SPDX.fileType, fileType(full_fn))) |
|||
g.add((file_node, SPDX.checksum, checksumFile(g, full_fn))) |
|||
|
|||
# Here we assume that LICENSE.txt provides the actual "in file" |
|||
# licensing information, and everything else is implicitly under |
|||
# MIT license. |
|||
g.add((file_node, SPDX.licenseConcluded, mit_license)) |
|||
if full_fn == './LICENSE.txt': |
|||
g.add((file_node, SPDX.licenseInfoInFile, mit_license)) |
|||
else: |
|||
g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none))) |
|||
|
|||
# SPDX.licenseComments |
|||
g.add((file_node, SPDX.copyrightText, duktape_copyright)) |
|||
# SPDX.noticeText |
|||
# SPDX.artifactOf |
|||
# SPDX.fileDependency |
|||
# SPDX.fileContributor |
|||
|
|||
# XXX: should referencesFile include all files? |
|||
g.add((spdx_doc, SPDX.referencesFile, file_node)) |
|||
|
|||
g.add((pkg_node, SPDX.hasFile, file_node)) |
|||
|
|||
# Serialize into RDF/XML directly. We could also serialize into |
|||
# N-Triples and use external tools (like 'rapper') to get cleaner, |
|||
# abbreviated output. |
|||
|
|||
#print('# Duktape SPDX license file (autogenerated)') |
|||
#print(g.serialize(format='turtle')) |
|||
#print(g.serialize(format='nt')) |
|||
f = open(outfile, 'wb') |
|||
#f.write(g.serialize(format='rdf/xml')) |
|||
f.write(g.serialize(format='xml')) |
|||
f.close() |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,49 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Create an array of C strings with Duktape built-in strings. |
|||
# Useful when using external strings. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
|
|||
def to_c_string(x): |
|||
res = '"' |
|||
term = False |
|||
for i, c in enumerate(x): |
|||
if term: |
|||
term = False |
|||
res += '" "' |
|||
|
|||
o = ord(c) |
|||
if o < 0x20 or o > 0x7e or c in '\'"\\': |
|||
# Terminate C string so that escape doesn't become |
|||
# ambiguous |
|||
res += '\\x%02x' % o |
|||
term = True |
|||
else: |
|||
res += c |
|||
res += '"' |
|||
return res |
|||
|
|||
def main(): |
|||
f = open(sys.argv[1], 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
meta = json.loads(d) |
|||
|
|||
print('const char *duk_builtin_strings[] = {') |
|||
|
|||
strlist = meta['builtin_strings_base64'] |
|||
for i in xrange(len(strlist)): |
|||
s = strlist[i] |
|||
if i == len(strlist) - 1: |
|||
print(' %s' % to_c_string(s.decode('base64'))) |
|||
else: |
|||
print(' %s,' % to_c_string(s.decode('base64'))) |
|||
|
|||
print('};') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,259 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Python utilities shared by the build scripts. |
|||
# |
|||
|
|||
import datetime |
|||
import json |
|||
|
|||
class BitEncoder: |
|||
"Bitstream encoder." |
|||
|
|||
_bits = None |
|||
|
|||
def __init__(self): |
|||
self._bits = [] |
|||
|
|||
def bits(self, x, nbits): |
|||
if (x >> nbits) != 0: |
|||
raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits)) |
|||
for shift in xrange(nbits - 1, -1, -1): # nbits - 1, nbits - 2, ..., 0 |
|||
self._bits.append((x >> shift) & 0x01) |
|||
|
|||
def string(self, x): |
|||
for i in xrange(len(x)): |
|||
ch = ord(x[i]) |
|||
for shift in xrange(7, -1, -1): # 7, 6, ..., 0 |
|||
self._bits.append((ch >> shift) & 0x01) |
|||
|
|||
def getNumBits(self): |
|||
"Get current number of encoded bits." |
|||
return len(self._bits) |
|||
|
|||
def getNumBytes(self): |
|||
"Get current number of encoded bytes, rounded up." |
|||
nbits = len(self._bits) |
|||
while (nbits % 8) != 0: |
|||
nbits += 1 |
|||
return nbits / 8 |
|||
|
|||
def getBytes(self): |
|||
"Get current bitstream as a byte sequence, padded with zero bits." |
|||
bytes = [] |
|||
|
|||
for i in xrange(self.getNumBytes()): |
|||
t = 0 |
|||
for j in xrange(8): |
|||
off = i*8 + j |
|||
if off >= len(self._bits): |
|||
t = (t << 1) |
|||
else: |
|||
t = (t << 1) + self._bits[off] |
|||
bytes.append(t) |
|||
|
|||
return bytes |
|||
|
|||
def getByteString(self): |
|||
"Get current bitstream as a string." |
|||
return ''.join([chr(i) for i in self.getBytes()]) |
|||
|
|||
class GenerateC: |
|||
"Helper for generating C source and header files." |
|||
|
|||
_data = None |
|||
wrap_col = 76 |
|||
|
|||
def __init__(self): |
|||
self._data = [] |
|||
|
|||
def emitRaw(self, text): |
|||
"Emit raw text (without automatic newline)." |
|||
self._data.append(text) |
|||
|
|||
def emitLine(self, text): |
|||
"Emit a raw line (with automatic newline)." |
|||
self._data.append(text + '\n') |
|||
|
|||
def emitHeader(self, autogen_by): |
|||
"Emit file header comments." |
|||
|
|||
# Note: a timestamp would be nice but it breaks incremental building |
|||
self.emitLine('/*') |
|||
self.emitLine(' * Automatically generated by %s, do not edit!' % autogen_by) |
|||
self.emitLine(' */') |
|||
self.emitLine('') |
|||
|
|||
def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True): |
|||
"Emit an array as a C array." |
|||
|
|||
# lenient input |
|||
if isinstance(data, unicode): |
|||
data = data.encode('utf-8') |
|||
if isinstance(data, str): |
|||
tmp = [] |
|||
for i in xrange(len(data)): |
|||
tmp.append(ord(data[i])) |
|||
data = tmp |
|||
|
|||
size_spec = '' |
|||
if size is not None: |
|||
size_spec = '%d' % size |
|||
visib_qual = '' |
|||
if visibility is not None: |
|||
visib_qual = visibility + ' ' |
|||
const_qual = '' |
|||
if const: |
|||
const_qual = 'const ' |
|||
self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec)) |
|||
|
|||
line = '' |
|||
for i in xrange(len(data)): |
|||
if intvalues: |
|||
suffix = '' |
|||
if data[i] < -32768 or data[i] > 32767: |
|||
suffix = 'L' |
|||
t = "%d%s," % (data[i], suffix) |
|||
else: |
|||
t = "(%s)'\\x%02x', " % (typename, data[i]) |
|||
if len(line) + len(t) >= self.wrap_col: |
|||
self.emitLine(line) |
|||
line = t |
|||
else: |
|||
line += t |
|||
if line != '': |
|||
self.emitLine(line) |
|||
self.emitLine('};') |
|||
|
|||
def emitDefine(self, name, value, comment=None): |
|||
"Emit a C define with an optional comment." |
|||
|
|||
# XXX: there is no escaping right now (for comment or value) |
|||
if comment is not None: |
|||
self.emitLine('#define %-60s %-30s /* %s */' % (name, value, comment)) |
|||
else: |
|||
self.emitLine('#define %-60s %s' % (name, value)) |
|||
|
|||
def getString(self): |
|||
"Get the entire file as a string." |
|||
return ''.join(self._data) |
|||
|
|||
def json_encode(x): |
|||
"JSON encode a value." |
|||
try: |
|||
return json.dumps(x) |
|||
except AttributeError: |
|||
pass |
|||
|
|||
# for older library versions |
|||
return json.write(x) |
|||
|
|||
def json_decode(x): |
|||
"JSON decode a value." |
|||
try: |
|||
return json.loads(x) |
|||
except AttributeError: |
|||
pass |
|||
|
|||
# for older library versions |
|||
return json.read(x) |
|||
|
|||
# Compute a byte hash identical to duk_util_hashbytes(). |
|||
DUK__MAGIC_M = 0x5bd1e995 |
|||
DUK__MAGIC_R = 24 |
|||
def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian): |
|||
h = (str_seed ^ nbytes) & 0xffffffff |
|||
|
|||
while nbytes >= 4: |
|||
# 4-byte fetch byte order: |
|||
# - native (endian dependent) if unaligned accesses allowed |
|||
# - little endian if unaligned accesses not allowed |
|||
|
|||
if big_endian: |
|||
k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \ |
|||
(ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24) |
|||
else: |
|||
k = ord(x[off]) + (ord(x[off + 1]) << 8) + \ |
|||
(ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24) |
|||
|
|||
k = (k * DUK__MAGIC_M) & 0xffffffff |
|||
k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff |
|||
k = (k * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h ^ k) & 0xffffffff |
|||
|
|||
off += 4 |
|||
nbytes -= 4 |
|||
|
|||
if nbytes >= 3: |
|||
h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff |
|||
if nbytes >= 2: |
|||
h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff |
|||
if nbytes >= 1: |
|||
h = (h ^ ord(x[off])) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
|
|||
h = (h ^ (h >> 13)) & 0xffffffff |
|||
h = (h * DUK__MAGIC_M) & 0xffffffff |
|||
h = (h ^ (h >> 15)) & 0xffffffff |
|||
|
|||
return h |
|||
|
|||
# Compute a string hash identical to duk_heap_hashstring() when dense |
|||
# hashing is enabled. |
|||
DUK__STRHASH_SHORTSTRING = 4096 |
|||
DUK__STRHASH_MEDIUMSTRING = 256 * 1024 |
|||
DUK__STRHASH_BLOCKSIZE = 256 |
|||
def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False): |
|||
str_seed = (hash_seed ^ len(x)) & 0xffffffff |
|||
|
|||
if len(x) <= DUK__STRHASH_SHORTSTRING: |
|||
res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian) |
|||
else: |
|||
if len(x) <= DUK__STRHASH_MEDIUMSTRING: |
|||
skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE |
|||
else: |
|||
skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE |
|||
|
|||
res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian) |
|||
off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256 |
|||
|
|||
while off < len(x): |
|||
left = len(x) - off |
|||
now = left |
|||
if now > DUK__STRHASH_BLOCKSIZE: |
|||
now = DUK__STRHASH_BLOCKSIZE |
|||
res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff |
|||
off += skip |
|||
|
|||
if strhash16: |
|||
res &= 0xffff |
|||
|
|||
return res |
|||
|
|||
# Compute a string hash identical to duk_heap_hashstring() when sparse |
|||
# hashing is enabled. |
|||
DUK__STRHASH_SKIP_SHIFT = 5 # XXX: assumes default value |
|||
def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False): |
|||
res = (hash_seed ^ len(x)) & 0xffffffff |
|||
|
|||
step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1 |
|||
off = len(x) |
|||
while off >= step: |
|||
assert(off >= 1) |
|||
res = ((res * 33) + ord(x[off - 1])) & 0xffffffff |
|||
off -= step |
|||
|
|||
if strhash16: |
|||
res &= 0xffff |
|||
|
|||
return res |
|||
|
|||
# Must match src/duk_unicode_support:duk_unicode_unvalidated_utf8_length(). |
|||
def duk_unicode_unvalidated_utf8_length(x): |
|||
assert(isinstance(x, str)) |
|||
clen = 0 |
|||
for c in x: |
|||
t = ord(c) |
|||
if t < 0x80 or t >= 0xc0: # 0x80...0xbf are continuation chars, not counted |
|||
clen += 1 |
|||
return clen |
@ -0,0 +1,130 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Utility to dump bytecode into a human readable form. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import struct |
|||
import optparse |
|||
|
|||
def decode_string(buf, off): |
|||
strlen, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
strdata = buf[off:off+strlen] |
|||
off += strlen |
|||
|
|||
return off, strdata |
|||
|
|||
def sanitize_string(val): |
|||
# Don't try to UTF-8 decode, just escape non-printable ASCII. |
|||
def f(c): |
|||
if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"': |
|||
return '\\x%02x' % ord(c) |
|||
else: |
|||
return c |
|||
return "'" + ''.join(map(f, val)) + "'" |
|||
|
|||
def decode_sanitize_string(buf, off): |
|||
off, val = decode_string(buf, off) |
|||
return off, sanitize_string(val) |
|||
|
|||
def dump_function(buf, off, ind): |
|||
count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12]) |
|||
off += 12 |
|||
print '%sInstructions: %d' % (ind, count_inst) |
|||
print '%sConstants: %d' % (ind, count_const) |
|||
print '%sInner functions: %d' % (ind, count_funcs) |
|||
|
|||
nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12]) |
|||
off += 12 |
|||
print '%sNregs: %d' % (ind, nregs) |
|||
print '%sNargs: %d' % (ind, nargs) |
|||
print '%sStart line number: %d' % (ind, start_line) |
|||
print '%sEnd line number: %d' % (ind, end_line) |
|||
|
|||
compfunc_flags, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags) |
|||
|
|||
for i in xrange(count_inst): |
|||
ins, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s %06d: %08lx' % (ind, i, ins) |
|||
|
|||
print '%sConstants:' % ind |
|||
for i in xrange(count_const): |
|||
const_type, = struct.unpack('B', buf[off:off+1]) |
|||
off += 1 |
|||
|
|||
if const_type == 0x00: |
|||
off, strdata = decode_sanitize_string(buf, off) |
|||
print '%s %06d: %s' % (ind, i, strdata) |
|||
elif const_type == 0x01: |
|||
num, = struct.unpack('>d', buf[off:off+8]) |
|||
off += 8 |
|||
print '%s %06d: %f' % (ind, i, num) |
|||
else: |
|||
raise Exception('invalid constant type: %d' % const_type) |
|||
|
|||
for i in xrange(count_funcs): |
|||
print '%sInner function %d:' % (ind, i) |
|||
off = dump_function(buf, off, ind + ' ') |
|||
|
|||
val, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s.length: %d' % (ind, val) |
|||
off, val = decode_sanitize_string(buf, off) |
|||
print '%s.name: %s' % (ind, val) |
|||
off, val = decode_sanitize_string(buf, off) |
|||
print '%s.fileName: %s' % (ind, val) |
|||
off, val = decode_string(buf, off) # actually a buffer |
|||
print '%s._Pc2line: %s' % (ind, val.encode('hex')) |
|||
|
|||
while True: |
|||
off, name = decode_string(buf, off) |
|||
if name == '': |
|||
break |
|||
name = sanitize_string(name) |
|||
val, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s_Varmap[%s] = %d' % (ind, name, val) |
|||
|
|||
idx = 0 |
|||
while True: |
|||
off, name = decode_string(buf, off) |
|||
if name == '': |
|||
break |
|||
name = sanitize_string(name) |
|||
print '%s_Formals[%d] = %s' % (ind, idx, name) |
|||
idx += 1 |
|||
|
|||
return off |
|||
|
|||
def dump_bytecode(buf, off, ind): |
|||
sig, ver = struct.unpack('BB', buf[off:off+2]) |
|||
off += 2 |
|||
if sig != 0xff: |
|||
raise Exception('invalid signature byte: %d' % sig) |
|||
if ver != 0x00: |
|||
raise Exception('unsupported bytecode version: %d' % ver) |
|||
print '%sBytecode version: 0x%02x' % (ind, ver) |
|||
|
|||
off = dump_function(buf, off, ind + ' ') |
|||
|
|||
return off |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
with open(args[0], 'rb') as f: |
|||
d = f.read() |
|||
if opts.hex_decode: |
|||
d = d.strip() |
|||
d = d.decode('hex') |
|||
dump_bytecode(d, 0, '') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,444 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Extract rules for Unicode case conversion, specifically the behavior |
|||
# required by Ecmascript E5 in Sections 15.5.4.16 to 15.5.4.19. The |
|||
# bitstream encoded rules are used for the slow path at run time, so |
|||
# compactness is favored over speed. |
|||
# |
|||
# There is no support for context or locale sensitive rules, as they |
|||
# are handled directly in C code before consulting tables generated |
|||
# here. Ecmascript requires case conversion both with and without |
|||
# locale/language specific rules (e.g. String.prototype.toLowerCase() |
|||
# and String.prototype.toLocaleLowerCase()), so they are best handled |
|||
# in C anyway. |
|||
# |
|||
# Case conversion rules for ASCII are also excluded as they are |
|||
# handled by C fast path. Rules for non-BMP characters (codepoints |
|||
# above U+FFFF) are omitted as they're not required for standard |
|||
# Ecmascript. |
|||
# |
|||
|
|||
import os, sys, math |
|||
import optparse |
|||
import dukutil |
|||
|
|||
class UnicodeData: |
|||
"Read UnicodeData.txt into an internal representation." |
|||
|
|||
def __init__(self, filename): |
|||
self.data = self.read_unicode_data(filename) |
|||
print 'read %d unicode data entries' % len(self.data) |
|||
|
|||
def read_unicode_data(self, filename): |
|||
res = [] |
|||
f = open(filename, 'rb') |
|||
for line in f: |
|||
if line.startswith('#'): |
|||
continue |
|||
line = line.strip() |
|||
if line == '': |
|||
continue |
|||
parts = line.split(';') |
|||
if len(parts) != 15: |
|||
raise Exception('invalid unicode data line') |
|||
res.append(parts) |
|||
f.close() |
|||
|
|||
# Sort based on Unicode codepoint |
|||
def mycmp(a,b): |
|||
return cmp(long(a[0], 16), long(b[0], 16)) |
|||
|
|||
res.sort(cmp=mycmp) |
|||
return res |
|||
|
|||
class SpecialCasing: |
|||
"Read SpecialCasing.txt into an internal representation." |
|||
|
|||
def __init__(self, filename): |
|||
self.data = self.read_special_casing_data(filename) |
|||
print 'read %d special casing entries' % len(self.data) |
|||
|
|||
def read_special_casing_data(self, filename): |
|||
res = [] |
|||
f = open(filename, 'rb') |
|||
for line in f: |
|||
try: |
|||
idx = line.index('#') |
|||
line = line[:idx] |
|||
except ValueError: |
|||
pass |
|||
line = line.strip() |
|||
if line == '': |
|||
continue |
|||
parts = line.split(';') |
|||
parts = [i.strip() for i in parts] |
|||
while len(parts) < 6: |
|||
parts.append('') |
|||
res.append(parts) |
|||
f.close() |
|||
return res |
|||
|
|||
def parse_unicode_sequence(x): |
|||
res = '' |
|||
for i in x.split(' '): |
|||
i = i.strip() |
|||
if i == '': |
|||
continue |
|||
res += unichr(long(i, 16)) |
|||
return res |
|||
|
|||
def get_base_conversion_maps(unicode_data): |
|||
"Create case conversion tables without handling special casing yet." |
|||
|
|||
uc = {} # codepoint (number) -> string |
|||
lc = {} |
|||
tc = {} # titlecase |
|||
|
|||
for x in unicode_data.data: |
|||
c1 = long(x[0], 16) |
|||
|
|||
# just 16-bit support needed |
|||
if c1 >= 0x10000: |
|||
continue |
|||
|
|||
if x[12] != '': |
|||
# field 12: simple uppercase mapping |
|||
c2 = parse_unicode_sequence(x[12]) |
|||
uc[c1] = c2 |
|||
tc[c1] = c2 # titlecase default == uppercase, overridden below if necessary |
|||
if x[13] != '': |
|||
# field 13: simple lowercase mapping |
|||
c2 = parse_unicode_sequence(x[13]) |
|||
lc[c1] = c2 |
|||
if x[14] != '': |
|||
# field 14: simple titlecase mapping |
|||
c2 = parse_unicode_sequence(x[14]) |
|||
tc[c1] = c2 |
|||
|
|||
return uc, lc, tc |
|||
|
|||
def update_special_casings(uc, lc, tc, special_casing): |
|||
"Update case conversion tables with special case conversion rules." |
|||
|
|||
for x in special_casing.data: |
|||
c1 = long(x[0], 16) |
|||
|
|||
if x[4] != '': |
|||
# conditions |
|||
continue |
|||
|
|||
lower = parse_unicode_sequence(x[1]) |
|||
title = parse_unicode_sequence(x[2]) |
|||
upper = parse_unicode_sequence(x[3]) |
|||
|
|||
if len(lower) > 1: |
|||
lc[c1] = lower |
|||
if len(upper) > 1: |
|||
uc[c1] = upper |
|||
if len(title) > 1: |
|||
tc[c1] = title |
|||
|
|||
print 'special case: %d %d %d' % (len(lower), len(upper), len(title)) |
|||
|
|||
def remove_ascii_part(convmap): |
|||
"Remove ASCII case conversion parts (handled by C fast path)." |
|||
|
|||
for i in xrange(128): |
|||
if convmap.has_key(i): |
|||
del convmap[i] |
|||
|
|||
def scan_range_with_skip(convmap, start_idx, skip): |
|||
"Scan for a range of continuous case conversion with a certain 'skip'." |
|||
|
|||
conv_i = start_idx |
|||
if not convmap.has_key(conv_i): |
|||
return None, None, None |
|||
elif len(convmap[conv_i]) > 1: |
|||
return None, None, None |
|||
else: |
|||
conv_o = ord(convmap[conv_i]) |
|||
|
|||
start_i = conv_i |
|||
start_o = conv_o |
|||
|
|||
while True: |
|||
new_i = conv_i + skip |
|||
new_o = conv_o + skip |
|||
|
|||
if not convmap.has_key(new_i): |
|||
break |
|||
if len(convmap[new_i]) > 1: |
|||
break |
|||
if ord(convmap[new_i]) != new_o: |
|||
break |
|||
|
|||
conv_i = new_i |
|||
conv_o = new_o |
|||
|
|||
# [start_i,conv_i] maps to [start_o,conv_o], ignore ranges of 1 char |
|||
count = (conv_i - start_i) / skip + 1 |
|||
if count <= 1: |
|||
return None, None, None |
|||
|
|||
# we have an acceptable range, remove them from the convmap here |
|||
for i in xrange(start_i, conv_i + skip, skip): |
|||
del convmap[i] |
|||
|
|||
return start_i, start_o, count |
|||
|
|||
def find_first_range_with_skip(convmap, skip): |
|||
"Find first range with a certain 'skip' value." |
|||
|
|||
for i in xrange(65536): |
|||
start_i, start_o, count = scan_range_with_skip(convmap, i, skip) |
|||
if start_i is None: |
|||
continue |
|||
return start_i, start_o, count |
|||
|
|||
return None, None, None |
|||
|
|||
def generate_tables(convmap): |
|||
"Generate bit-packed case conversion table for a given conversion map." |
|||
|
|||
# The bitstream encoding is based on manual inspection for whatever |
|||
# regularity the Unicode case conversion rules have. |
|||
# |
|||
# Start with a full description of case conversions which does not |
|||
# cover all codepoints; unmapped codepoints convert to themselves. |
|||
# Scan for range-to-range mappings with a range of skips starting from 1. |
|||
# Whenever a valid range is found, remove it from the map. Finally, |
|||
# output the remaining case conversions (1:1 and 1:n) on a per codepoint |
|||
# basis. |
|||
# |
|||
# This is very slow because we always scan from scratch, but its the |
|||
# most reliable and simple way to scan |
|||
|
|||
ranges = [] # range mappings (2 or more consecutive mappings with a certain skip) |
|||
singles = [] # 1:1 character mappings |
|||
complex = [] # 1:n character mappings |
|||
|
|||
# Ranges with skips |
|||
|
|||
for skip in xrange(1,6+1): # skips 1...6 are useful |
|||
while True: |
|||
start_i, start_o, count = find_first_range_with_skip(convmap, skip) |
|||
if start_i is None: |
|||
break |
|||
print 'skip %d: %d %d %d' % (skip, start_i, start_o, count) |
|||
ranges.append([start_i, start_o, count, skip]) |
|||
|
|||
# 1:1 conversions |
|||
|
|||
k = convmap.keys() |
|||
k.sort() |
|||
for i in k: |
|||
if len(convmap[i]) > 1: |
|||
continue |
|||
singles.append([i, ord(convmap[i])]) # codepoint, codepoint |
|||
del convmap[i] |
|||
|
|||
# There are many mappings to 2-char sequences with latter char being U+0399. |
|||
# These could be handled as a special case, but we don't do that right now. |
|||
# |
|||
# [8064L, u'\u1f08\u0399'] |
|||
# [8065L, u'\u1f09\u0399'] |
|||
# [8066L, u'\u1f0a\u0399'] |
|||
# [8067L, u'\u1f0b\u0399'] |
|||
# [8068L, u'\u1f0c\u0399'] |
|||
# [8069L, u'\u1f0d\u0399'] |
|||
# [8070L, u'\u1f0e\u0399'] |
|||
# [8071L, u'\u1f0f\u0399'] |
|||
# ... |
|||
# |
|||
# tmp = {} |
|||
# k = convmap.keys() |
|||
# k.sort() |
|||
# for i in k: |
|||
# if len(convmap[i]) == 2 and convmap[i][1] == u'\u0399': |
|||
# tmp[i] = convmap[i][0] |
|||
# del convmap[i] |
|||
# print repr(tmp) |
|||
# |
|||
# skip = 1 |
|||
# while True: |
|||
# start_i, start_o, count = find_first_range_with_skip(tmp, skip) |
|||
# if start_i is None: |
|||
# break |
|||
# print 'special399, skip %d: %d %d %d' % (skip, start_i, start_o, count) |
|||
# print len(tmp.keys()) |
|||
# print repr(tmp) |
|||
# XXX: need to put 12 remaining mappings back to convmap... |
|||
|
|||
# 1:n conversions |
|||
|
|||
k = convmap.keys() |
|||
k.sort() |
|||
for i in k: |
|||
complex.append([i, convmap[i]]) # codepoint, string |
|||
del convmap[i] |
|||
|
|||
for t in singles: |
|||
print repr(t) |
|||
|
|||
for t in complex: |
|||
print repr(t) |
|||
|
|||
print 'range mappings: %d' % len(ranges) |
|||
print 'single character mappings: %d' % len(singles) |
|||
print 'complex mappings (1:n): %d' % len(complex) |
|||
print 'remaining (should be zero): %d' % len(convmap.keys()) |
|||
|
|||
# XXX: opportunities for diff encoding skip=3 ranges? |
|||
prev = None |
|||
for t in ranges: |
|||
# range: [start_i, start_o, count, skip] |
|||
if t[3] != 3: |
|||
continue |
|||
if prev is not None: |
|||
print '%d %d' % (t[0] - prev[0], t[1] - prev[1]) |
|||
else: |
|||
print 'start: %d %d' % (t[0], t[1]) |
|||
prev = t |
|||
|
|||
# bit packed encoding |
|||
|
|||
be = dukutil.BitEncoder() |
|||
|
|||
for curr_skip in xrange(1, 7): # 1...6 |
|||
count = 0 |
|||
for r in ranges: |
|||
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] |
|||
if skip != curr_skip: |
|||
continue |
|||
count += 1 |
|||
be.bits(count, 6) |
|||
print 'encode: skip=%d, count=%d' % (curr_skip, count) |
|||
|
|||
for r in ranges: |
|||
start_i, start_o, r_count, skip = r[0], r[1], r[2], r[3] |
|||
if skip != curr_skip: |
|||
continue |
|||
be.bits(start_i, 16) |
|||
be.bits(start_o, 16) |
|||
be.bits(r_count, 7) |
|||
be.bits(0x3f, 6) # maximum count value = end of skips |
|||
|
|||
count = len(singles) |
|||
be.bits(count, 6) |
|||
for t in singles: |
|||
cp_i, cp_o = t[0], t[1] |
|||
be.bits(cp_i, 16) |
|||
be.bits(cp_o, 16) |
|||
|
|||
count = len(complex) |
|||
be.bits(count, 7) |
|||
for t in complex: |
|||
cp_i, str_o = t[0], t[1] |
|||
be.bits(cp_i, 16) |
|||
be.bits(len(str_o), 2) |
|||
for i in xrange(len(str_o)): |
|||
be.bits(ord(str_o[i]), 16) |
|||
|
|||
return be.getBytes(), be.getNumBits() |
|||
|
|||
def generate_regexp_canonicalize_lookup(convmap): |
|||
res = [] |
|||
|
|||
highest_nonid = -1 |
|||
|
|||
for cp in xrange(65536): |
|||
res_cp = cp # default to as is |
|||
if convmap.has_key(cp): |
|||
tmp = convmap[cp] |
|||
if len(tmp) == 1: |
|||
# Multiple codepoints from input, ignore |
|||
res_cp = ord(tmp[0]) |
|||
if cp >= 0x80 and res_cp < 0x80: |
|||
res_cp = cp # non-ASCII mapped to ASCII, ignore |
|||
|
|||
if cp != res_cp: |
|||
highest_nonid = cp |
|||
|
|||
res.append(res_cp) |
|||
|
|||
# At the moment this is 65370, which means there's very little |
|||
# gain in assuming 1:1 mapping above a certain BMP codepoint. |
|||
print('HIGHEST NON-ID MAPPING: %d' % highest_nonid) |
|||
return res |
|||
|
|||
def clonedict(x): |
|||
"Shallow clone of input dict." |
|||
res = {} |
|||
for k in x.keys(): |
|||
res[k] = x[k] |
|||
return res |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--command', dest='command', default='caseconv_bitpacked') |
|||
parser.add_option('--unicode-data', dest='unicode_data') |
|||
parser.add_option('--special-casing', dest='special_casing') |
|||
parser.add_option('--out-source', dest='out_source') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
parser.add_option('--table-name-lc', dest='table_name_lc', default='caseconv_lc') |
|||
parser.add_option('--table-name-uc', dest='table_name_uc', default='caseconv_uc') |
|||
parser.add_option('--table-name-re-canon-lookup', dest='table_name_re_canon_lookup', default='caseconv_re_canon_lookup') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
unicode_data = UnicodeData(opts.unicode_data) |
|||
special_casing = SpecialCasing(opts.special_casing) |
|||
|
|||
uc, lc, tc = get_base_conversion_maps(unicode_data) |
|||
update_special_casings(uc, lc, tc, special_casing) |
|||
|
|||
if opts.command == 'caseconv_bitpacked': |
|||
# XXX: ASCII and non-BMP filtering could be an option but is now hardcoded |
|||
|
|||
# ascii is handled with 'fast path' so not needed here |
|||
t = clonedict(uc) |
|||
remove_ascii_part(t) |
|||
uc_bytes, uc_nbits = generate_tables(t) |
|||
|
|||
t = clonedict(lc) |
|||
remove_ascii_part(t) |
|||
lc_bytes, lc_nbits = generate_tables(t) |
|||
|
|||
# Generate C source and header files |
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitArray(uc_bytes, opts.table_name_uc, size=len(uc_bytes), typename='duk_uint8_t', intvalues=True, const=True) |
|||
genc.emitArray(lc_bytes, opts.table_name_lc, size=len(lc_bytes), typename='duk_uint8_t', intvalues=True, const=True) |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_uc, len(uc_bytes))) |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name_lc, len(lc_bytes))) |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
elif opts.command == 're_canon_lookup': |
|||
# direct canonicalization lookup for case insensitive regexps, includes ascii part |
|||
t = clonedict(uc) |
|||
re_canon_lookup = generate_regexp_canonicalize_lookup(t) |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitArray(re_canon_lookup, opts.table_name_re_canon_lookup, size=len(re_canon_lookup), typename='duk_uint16_t', intvalues=True, const=True) |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_caseconv.py') |
|||
genc.emitLine('extern const duk_uint16_t %s[%d];' % (opts.table_name_re_canon_lookup, len(re_canon_lookup))) |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
else: |
|||
raise Exception('invalid command: %r' % opts.command) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,382 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Select a set of Unicode characters (based on included/excluded categories |
|||
# etc) and write out a compact bitstream for matching a character against |
|||
# the set at runtime. This is for the slow path, where we're especially |
|||
# concerned with compactness. A C source file with the table is written, |
|||
# together with a matching C header. |
|||
# |
|||
# Unicode categories (such as 'Z') can be used. Two pseudo-categories |
|||
# are also available for exclusion only: ASCII and NONBMP. "ASCII" |
|||
# category excludes ASCII codepoints which is useful because C code |
|||
# typically contains an ASCII fast path so ASCII characters don't need |
|||
# to be considered in the Unicode tables. "NONBMP" excludes codepoints |
|||
# above U+FFFF which is useful because such codepoints don't need to be |
|||
# supported in standard Ecmascript. |
|||
# |
|||
|
|||
import os, sys, math |
|||
import optparse |
|||
import dukutil |
|||
|
|||
def read_unicode_data(unidata, catsinc, catsexc, filterfunc): |
|||
"Read UnicodeData.txt, including lines matching catsinc unless excluded by catsexc or filterfunc." |
|||
res = [] |
|||
f = open(unidata, 'rb') |
|||
|
|||
def filter_none(cp): |
|||
return True |
|||
if filterfunc is None: |
|||
filterfunc = filter_none |
|||
|
|||
# The Unicode parsing is slow enough to warrant some speedups. |
|||
exclude_cat_exact = {} |
|||
for cat in catsexc: |
|||
exclude_cat_exact[cat] = True |
|||
include_cat_exact = {} |
|||
for cat in catsinc: |
|||
include_cat_exact[cat] = True |
|||
|
|||
for line in f: |
|||
#line = line.strip() |
|||
parts = line.split(';') |
|||
|
|||
codepoint = parts[0] |
|||
if not filterfunc(long(codepoint, 16)): |
|||
continue |
|||
|
|||
category = parts[2] |
|||
if exclude_cat_exact.has_key(category): |
|||
continue # quick reject |
|||
|
|||
rejected = False |
|||
for cat in catsexc: |
|||
if category.startswith(cat) or codepoint == cat: |
|||
rejected = True |
|||
break |
|||
if rejected: |
|||
continue |
|||
|
|||
if include_cat_exact.has_key(category): |
|||
res.append(line) |
|||
continue |
|||
|
|||
accepted = False |
|||
for cat in catsinc: |
|||
if category.startswith(cat) or codepoint == cat: |
|||
accepted = True |
|||
break |
|||
if accepted: |
|||
res.append(line) |
|||
|
|||
f.close() |
|||
|
|||
# Sort based on Unicode codepoint |
|||
def mycmp(a,b): |
|||
t1 = a.split(';') |
|||
t2 = b.split(';') |
|||
n1 = long(t1[0], 16) |
|||
n2 = long(t2[0], 16) |
|||
return cmp(n1, n2) |
|||
|
|||
res.sort(cmp=mycmp) |
|||
|
|||
return res |
|||
|
|||
def scan_ranges(lines): |
|||
"Scan continuous ranges from (filtered) UnicodeData.txt lines." |
|||
ranges = [] |
|||
range_start = None |
|||
prev = None |
|||
|
|||
for line in lines: |
|||
t = line.split(';') |
|||
n = long(t[0], 16) |
|||
if range_start is None: |
|||
range_start = n |
|||
else: |
|||
if n == prev + 1: |
|||
# continue range |
|||
pass |
|||
else: |
|||
ranges.append((range_start, prev)) |
|||
range_start = n |
|||
prev = n |
|||
|
|||
if range_start is not None: |
|||
ranges.append((range_start, prev)) |
|||
|
|||
return ranges |
|||
|
|||
def generate_png(lines, fname): |
|||
"Generate an illustrative PNG of the character set." |
|||
from PIL import Image |
|||
|
|||
m = {} |
|||
for line in lines: |
|||
t = line.split(';') |
|||
n = long(t[0], 16) |
|||
m[n] = 1 |
|||
|
|||
codepoints = 0x10ffff + 1 |
|||
width = int(256) |
|||
height = int(math.ceil(float(codepoints) / float(width))) |
|||
im = Image.new('RGB', (width, height)) |
|||
black = (0,0,0) |
|||
white = (255,255,255) |
|||
for cp in xrange(codepoints): |
|||
y = cp / width |
|||
x = cp % width |
|||
|
|||
if m.has_key(long(cp)): |
|||
im.putpixel((x,y), black) |
|||
else: |
|||
im.putpixel((x,y), white) |
|||
|
|||
im.save(fname) |
|||
|
|||
def generate_match_table1(ranges): |
|||
"Unused match table format." |
|||
|
|||
# This is an earlier match table format which is no longer used. |
|||
# IdentifierStart-UnicodeLetter has 445 ranges and generates a |
|||
# match table of 2289 bytes. |
|||
|
|||
data = [] |
|||
prev_re = None |
|||
|
|||
def genrange(rs, re): |
|||
if (rs > re): |
|||
raise Exception('assumption failed: rs=%d re=%d' % (rs, re)) |
|||
|
|||
while True: |
|||
now = re - rs + 1 |
|||
if now > 255: |
|||
now = 255 |
|||
data.append(now) # range now |
|||
data.append(0) # skip 0 |
|||
rs = rs + now |
|||
else: |
|||
data.append(now) # range now |
|||
break |
|||
|
|||
def genskip(ss, se): |
|||
if (ss > se): |
|||
raise Exception('assumption failed: ss=%d se=%s' % (ss, se)) |
|||
|
|||
while True: |
|||
now = se - ss + 1 |
|||
if now > 255: |
|||
now = 255 |
|||
data.append(now) # skip now |
|||
data.append(0) # range 0 |
|||
ss = ss + now |
|||
else: |
|||
data.append(now) # skip now |
|||
break |
|||
|
|||
for rs, re in ranges: |
|||
if prev_re is not None: |
|||
genskip(prev_re + 1, rs - 1) |
|||
genrange(rs, re) |
|||
prev_re = re |
|||
|
|||
num_entries = len(data) |
|||
|
|||
# header: start of first range |
|||
# num entries |
|||
hdr = [] |
|||
hdr.append(ranges[0][0] >> 8) # XXX: check that not 0x10000 or over |
|||
hdr.append(ranges[0][1] & 0xff) |
|||
hdr.append(num_entries >> 8) |
|||
hdr.append(num_entries & 0xff) |
|||
|
|||
return hdr + data |
|||
|
|||
def generate_match_table2(ranges): |
|||
"Unused match table format." |
|||
|
|||
# Another attempt at a match table which is also unused. |
|||
# Total tables for all current classes is now 1472 bytes. |
|||
|
|||
data = [] |
|||
|
|||
def enc(x): |
|||
while True: |
|||
if x < 0x80: |
|||
data.append(x) |
|||
break |
|||
data.append(0x80 + (x & 0x7f)) |
|||
x = x >> 7 |
|||
|
|||
prev_re = 0 |
|||
|
|||
for rs, re in ranges: |
|||
r1 = rs - prev_re # 1 or above (no unjoined ranges) |
|||
r2 = re - rs # 0 or above |
|||
enc(r1) |
|||
enc(r2) |
|||
prev_re = re |
|||
|
|||
enc(0) # end marker |
|||
|
|||
return data |
|||
|
|||
def generate_match_table3(ranges): |
|||
"Current match table format." |
|||
|
|||
# Yet another attempt, similar to generate_match_table2 except |
|||
# in packing format. |
|||
# |
|||
# Total match size now (at time of writing): 1194 bytes. |
|||
# |
|||
# This is the current encoding format used in duk_lexer.c. |
|||
|
|||
be = dukutil.BitEncoder() |
|||
|
|||
freq = [0] * (0x10ffff + 1) # informative |
|||
|
|||
def enc(x): |
|||
freq[x] += 1 |
|||
|
|||
if x <= 0x0e: |
|||
# 4-bit encoding |
|||
be.bits(x, 4) |
|||
return |
|||
x -= 0x0e + 1 |
|||
if x <= 0xfd: |
|||
# 12-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(x, 8) |
|||
return |
|||
x -= 0xfd + 1 |
|||
if x <= 0xfff: |
|||
# 24-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(0xfe, 8) |
|||
be.bits(x, 12) |
|||
return |
|||
x -= 0xfff + 1 |
|||
if True: |
|||
# 36-bit encoding |
|||
be.bits(0x0f, 4) |
|||
be.bits(0xff, 8) |
|||
be.bits(x, 24) |
|||
return |
|||
|
|||
raise Exception('cannot encode') |
|||
|
|||
prev_re = 0 |
|||
|
|||
for rs, re in ranges: |
|||
r1 = rs - prev_re # 1 or above (no unjoined ranges) |
|||
r2 = re - rs # 0 or above |
|||
enc(r1) |
|||
enc(r2) |
|||
prev_re = re |
|||
|
|||
enc(0) # end marker |
|||
|
|||
data, nbits = be.getBytes(), be.getNumBits() |
|||
return data, freq |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--unicode-data', dest='unicode_data') # UnicodeData.txt |
|||
parser.add_option('--special-casing', dest='special_casing') # SpecialCasing.txt |
|||
parser.add_option('--include-categories', dest='include_categories') |
|||
parser.add_option('--exclude-categories', dest='exclude_categories', default='NONE') |
|||
parser.add_option('--out-source', dest='out_source') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
parser.add_option('--out-png', dest='out_png') |
|||
parser.add_option('--table-name', dest='table_name', default='match_table') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
unidata = opts.unicode_data |
|||
catsinc = [] |
|||
if opts.include_categories != '': |
|||
catsinc = opts.include_categories.split(',') |
|||
catsexc = [] |
|||
if opts.exclude_categories != 'NONE': |
|||
catsexc = opts.exclude_categories.split(',') |
|||
|
|||
print 'CATSEXC: %s' % repr(catsexc) |
|||
print 'CATSINC: %s' % repr(catsinc) |
|||
|
|||
# pseudocategories |
|||
filter_ascii = ('ASCII' in catsexc) |
|||
filter_nonbmp = ('NONBMP' in catsexc) |
|||
|
|||
# Read raw result |
|||
def filter1(x): |
|||
if filter_ascii and x <= 0x7f: |
|||
# exclude ascii |
|||
return False |
|||
if filter_nonbmp and x >= 0x10000: |
|||
# exclude non-bmp |
|||
return False |
|||
return True |
|||
|
|||
print('read unicode data') |
|||
res = read_unicode_data(unidata, catsinc, catsexc, filter1) |
|||
print('done reading unicode data') |
|||
|
|||
# Raw output |
|||
#print('RAW OUTPUT:') |
|||
#print('===========') |
|||
#print('\n'.join(res)) |
|||
|
|||
# Scan ranges |
|||
#print('') |
|||
#print('RANGES:') |
|||
#print('=======') |
|||
ranges = scan_ranges(res) |
|||
#for i in ranges: |
|||
# if i[0] == i[1]: |
|||
# print('0x%04x' % i[0]) |
|||
# else: |
|||
# print('0x%04x ... 0x%04x' % (i[0], i[1])) |
|||
#print('') |
|||
print('%d ranges total' % len(ranges)) |
|||
|
|||
# Generate match table |
|||
#print('') |
|||
#print('MATCH TABLE:') |
|||
#print('============') |
|||
#matchtable1 = generate_match_table1(ranges) |
|||
#matchtable2 = generate_match_table2(ranges) |
|||
matchtable3, freq = generate_match_table3(ranges) |
|||
#print 'match table: %s' % repr(matchtable3) |
|||
print 'match table length: %d bytes' % len(matchtable3) |
|||
print 'encoding freq:' |
|||
for i in xrange(len(freq)): |
|||
if freq[i] == 0: |
|||
continue |
|||
print ' %6d: %d' % (i, freq[i]) |
|||
|
|||
print('') |
|||
print('MATCH C TABLE -> file %s' % repr(opts.out_header)) |
|||
|
|||
# Create C source and header files |
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_chars.py') |
|||
genc.emitArray(matchtable3, opts.table_name, size=len(matchtable3), typename='duk_uint8_t', intvalues=True, const=True) |
|||
if opts.out_source is not None: |
|||
f = open(opts.out_source, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
genc = dukutil.GenerateC() |
|||
genc.emitHeader('extract_chars.py') |
|||
genc.emitLine('extern const duk_uint8_t %s[%d];' % (opts.table_name, len(matchtable3))) |
|||
if opts.out_header is not None: |
|||
f = open(opts.out_header, 'wb') |
|||
f.write(genc.getString()) |
|||
f.close() |
|||
|
|||
# Image (for illustrative purposes only) |
|||
if opts.out_png is not None: |
|||
generate_png(res, opts.out_png) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,41 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Extract unique DUK_USE_xxx flags from current code base: |
|||
# |
|||
# $ python extract_unique_options.py ../src/*.c ../src/*.h ../src/*.h.in |
|||
# |
|||
|
|||
import os, sys, re |
|||
|
|||
# DUK_USE_xxx/DUK_OPT_xxx are used as placeholders and not matched |
|||
# (only uppercase allowed) |
|||
re_use = re.compile(r'DUK_USE_[A-Z0-9_]+') |
|||
re_opt = re.compile(r'DUK_OPT_[A-Z0-9_]+') |
|||
|
|||
def main(): |
|||
uses = {} |
|||
opts = {} |
|||
|
|||
for fn in sys.argv[1:]: |
|||
f = open(fn, 'rb') |
|||
for line in f: |
|||
for t in re.findall(re_use, line): |
|||
if t[-1] != '_': # skip e.g. 'DUK_USE_' |
|||
uses[t] = True |
|||
for t in re.findall(re_opt, line): |
|||
if t[-1] != '_': |
|||
opts[t] = True |
|||
f.close() |
|||
|
|||
k = opts.keys() |
|||
k.sort() |
|||
for i in k: |
|||
print(i) |
|||
|
|||
k = uses.keys() |
|||
k.sort() |
|||
for i in k: |
|||
print(i) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,44 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate build parameter files based on build information. |
|||
# A C header is generated for C code, and a JSON file for |
|||
# build scripts etc which need to know the build config. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
import optparse |
|||
|
|||
import dukutil |
|||
|
|||
if __name__ == '__main__': |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--version', dest='version') |
|||
parser.add_option('--git-commit', dest='git_commit') |
|||
parser.add_option('--git-describe', dest='git_describe') |
|||
parser.add_option('--git-branch', dest='git_branch') |
|||
parser.add_option('--out-json', dest='out_json') |
|||
parser.add_option('--out-header', dest='out_header') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
t = { |
|||
'version': opts.version, |
|||
'git_commit': opts.git_commit, |
|||
'git_describe': opts.git_describe, |
|||
'git_branch': opts.git_branch, |
|||
} |
|||
|
|||
f = open(opts.out_json, 'wb') |
|||
f.write(dukutil.json_encode(t).encode('ascii')) |
|||
f.close() |
|||
|
|||
f = open(opts.out_header, 'wb') |
|||
f.write('#ifndef DUK_BUILDPARAMS_H_INCLUDED\n') |
|||
f.write('#define DUK_BUILDPARAMS_H_INCLUDED\n') |
|||
f.write('/* automatically generated by genbuildparams.py, do not edit */\n') |
|||
f.write('\n') |
|||
f.write('/* DUK_VERSION is defined in duktape.h */') |
|||
f.write('\n') |
|||
f.write('#endif /* DUK_BUILDPARAMS_H_INCLUDED */\n') |
|||
f.close() |
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,5 @@ |
|||
import os, sys, json, yaml |
|||
|
|||
if __name__ == '__main__': |
|||
# Use safe_dump() instead of dump() to avoid tags like "!!python/unicode" |
|||
print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False)) |
@ -0,0 +1,32 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Merge debugger YAML metadata files and output a merged JSON metadata file. |
|||
# |
|||
|
|||
import os, sys, json, yaml |
|||
import optparse |
|||
|
|||
if __name__ == '__main__': |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--output', dest='output', default=None, help='output JSON filename') |
|||
parser.add_option('--class-names', dest='class_names', help='YAML metadata for class names') |
|||
parser.add_option('--debug-commands', dest='debug_commands', help='YAML metadata for debug commands') |
|||
parser.add_option('--debug-errors', dest='debug_errors', help='YAML metadata for debug protocol error codes') |
|||
parser.add_option('--opcodes', dest='opcodes', help='YAML metadata for opcodes') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
res = {} |
|||
def merge(fn): |
|||
with open(fn, 'rb') as f: |
|||
doc = yaml.load(f) |
|||
for k in doc.keys(): |
|||
res[k] = doc[k] |
|||
|
|||
merge(opts.class_names) |
|||
merge(opts.debug_commands) |
|||
merge(opts.debug_errors) |
|||
merge(opts.opcodes) |
|||
|
|||
with open(opts.output, 'wb') as f: |
|||
f.write(json.dumps(res, indent=4) + '\n') |
|||
print('Wrote merged debugger metadata to ' + str(opts.output)) |
@ -0,0 +1,854 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Config-and-prepare: create a duk_config.h and combined/separate sources |
|||
# for configuration options specified on the command line. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import shutil |
|||
import glob |
|||
import optparse |
|||
import tarfile |
|||
import json |
|||
import yaml |
|||
import subprocess |
|||
|
|||
# Helpers |
|||
|
|||
def exec_get_stdout(cmd, input=None, default=None, print_stdout=False): |
|||
try: |
|||
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
|||
ret = proc.communicate(input=input) |
|||
if print_stdout: |
|||
sys.stdout.write(ret[0]) |
|||
sys.stdout.flush() |
|||
if proc.returncode != 0: |
|||
sys.stdout.write(ret[1]) # print stderr on error |
|||
sys.stdout.flush() |
|||
if default is not None: |
|||
print('WARNING: command %r failed, return default' % cmd) |
|||
return default |
|||
raise Exception('command failed, return code %d: %r' % (proc.returncode, cmd)) |
|||
return ret[0] |
|||
except: |
|||
if default is not None: |
|||
print('WARNING: command %r failed, return default' % cmd) |
|||
return default |
|||
raise |
|||
|
|||
def exec_print_stdout(cmd, input=None): |
|||
ret = exec_get_stdout(cmd, input=input, print_stdout=True) |
|||
|
|||
def mkdir(path): |
|||
os.mkdir(path) |
|||
|
|||
def copy_file(src, dst): |
|||
with open(src, 'rb') as f_in: |
|||
with open(dst, 'wb') as f_out: |
|||
f_out.write(f_in.read()) |
|||
|
|||
def copy_files(filelist, srcdir, dstdir): |
|||
for i in filelist: |
|||
copy_file(os.path.join(srcdir, i), os.path.join(dstdir, i)) |
|||
|
|||
def copy_and_replace(src, dst, rules): |
|||
# Read and write separately to allow in-place replacement |
|||
keys = sorted(rules.keys()) |
|||
res = [] |
|||
with open(src, 'rb') as f_in: |
|||
for line in f_in: |
|||
for k in keys: |
|||
line = line.replace(k, rules[k]) |
|||
res.append(line) |
|||
with open(dst, 'wb') as f_out: |
|||
f_out.write(''.join(res)) |
|||
|
|||
def copy_and_cquote(src, dst): |
|||
with open(src, 'rb') as f_in: |
|||
with open(dst, 'wb') as f_out: |
|||
f_out.write('/*\n') |
|||
for line in f_in: |
|||
line = line.decode('utf-8') |
|||
f_out.write(' * ') |
|||
for c in line: |
|||
if (ord(c) >= 0x20 and ord(c) <= 0x7e) or (c in '\x0a'): |
|||
f_out.write(c.encode('ascii')) |
|||
else: |
|||
f_out.write('\\u%04x' % ord(c)) |
|||
f_out.write(' */\n') |
|||
|
|||
def read_file(src, strip_last_nl=False): |
|||
with open(src, 'rb') as f: |
|||
data = f.read() |
|||
if len(data) > 0 and data[-1] == '\n': |
|||
data = data[:-1] |
|||
return data |
|||
|
|||
def delete_matching_files(dirpath, cb): |
|||
for fn in os.listdir(dirpath): |
|||
if os.path.isfile(os.path.join(dirpath, fn)) and cb(fn): |
|||
#print('Deleting %r' % os.path.join(dirpath, fn)) |
|||
os.unlink(os.path.join(dirpath, fn)) |
|||
|
|||
def create_targz(dstfile, filelist): |
|||
# https://docs.python.org/2/library/tarfile.html#examples |
|||
|
|||
def _add(tf, fn): # recursive add |
|||
#print('Adding to tar: ' + fn) |
|||
if os.path.isdir(fn): |
|||
for i in sorted(os.listdir(fn)): |
|||
_add(tf, os.path.join(fn, i)) |
|||
elif os.path.isfile(fn): |
|||
tf.add(fn) |
|||
else: |
|||
raise Exception('invalid file: %r' % fn) |
|||
|
|||
with tarfile.open(dstfile, 'w:gz') as tf: |
|||
for fn in filelist: |
|||
_add(tf, fn) |
|||
|
|||
def cstring(x): |
|||
return '"' + x + '"' # good enough for now |
|||
|
|||
# DUK_VERSION is grepped from duk_api_public.h.in: it is needed for the |
|||
# public API and we want to avoid defining it in two places. |
|||
def get_duk_version(apiheader_filename): |
|||
r = re.compile(r'^#define\s+DUK_VERSION\s+(.*?)L?\s*$') |
|||
with open(apiheader_filename, 'rb') as f: |
|||
for line in f: |
|||
m = r.match(line) |
|||
if m is not None: |
|||
duk_version = int(m.group(1)) |
|||
duk_major = duk_version / 10000 |
|||
duk_minor = (duk_version % 10000) / 100 |
|||
duk_patch = duk_version % 100 |
|||
duk_version_formatted = '%d.%d.%d' % (duk_major, duk_minor, duk_patch) |
|||
return duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted |
|||
|
|||
raise Exception('cannot figure out duktape version') |
|||
|
|||
# Python module check and friendly errors |
|||
|
|||
def check_python_modules(): |
|||
# make_dist.py doesn't need yaml but other dist utils will; check for it and |
|||
# warn if it is missing. |
|||
failed = False |
|||
|
|||
def _warning(module, aptPackage, pipPackage): |
|||
sys.stderr.write('\n') |
|||
sys.stderr.write('*** NOTE: Could not "import %s" needed for dist. Install it using e.g.:\n' % module) |
|||
sys.stderr.write('\n') |
|||
sys.stderr.write(' # Linux\n') |
|||
sys.stderr.write(' $ sudo apt-get install %s\n' % aptPackage) |
|||
sys.stderr.write('\n') |
|||
sys.stderr.write(' # Windows\n') |
|||
sys.stderr.write(' > pip install %s\n' % pipPackage) |
|||
|
|||
try: |
|||
import yaml |
|||
except ImportError: |
|||
_warning('yaml', 'python-yaml', 'PyYAML') |
|||
failed = True |
|||
|
|||
if failed: |
|||
sys.stderr.write('\n') |
|||
raise Exception('Missing some required Python modules') |
|||
|
|||
check_python_modules() |
|||
|
|||
# Option parsing |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
|
|||
# Forced options from multiple sources are gathered into a shared list |
|||
# so that the override order remains the same as on the command line. |
|||
force_options_yaml = [] |
|||
def add_force_option_yaml(option, opt, value, parser): |
|||
# XXX: check that YAML parses |
|||
force_options_yaml.append(value) |
|||
def add_force_option_file(option, opt, value, parser): |
|||
# XXX: check that YAML parses |
|||
with open(value, 'rb') as f: |
|||
force_options_yaml.append(f.read()) |
|||
def add_force_option_define(option, opt, value, parser): |
|||
tmp = value.split('=') |
|||
if len(tmp) == 1: |
|||
doc = { tmp[0]: True } |
|||
elif len(tmp) == 2: |
|||
doc = { tmp[0]: tmp[1] } |
|||
else: |
|||
raise Exception('invalid option value: %r' % value) |
|||
force_options_yaml.append(yaml.safe_dump(doc)) |
|||
def add_force_option_undefine(option, opt, value, parser): |
|||
tmp = value.split('=') |
|||
if len(tmp) == 1: |
|||
doc = { tmp[0]: False } |
|||
else: |
|||
raise Exception('invalid option value: %r' % value) |
|||
force_options_yaml.append(yaml.safe_dump(doc)) |
|||
|
|||
fixup_header_lines = [] |
|||
def add_fixup_header_line(option, opt, value, parser): |
|||
fixup_header_lines.append(value) |
|||
def add_fixup_header_file(option, opt, value, parser): |
|||
with open(value, 'rb') as f: |
|||
for line in f: |
|||
if line[-1] == '\n': |
|||
line = line[:-1] |
|||
fixup_header_lines.append(line) |
|||
|
|||
# Options for config-and-prepare tool itself. |
|||
parser.add_option('--source-directory', dest='source_directory', default=None, help='Directory with raw input sources (src-input/)') |
|||
parser.add_option('--output-directory', dest='output_directory', default=None, help='Directory for output files, must already exist') |
|||
parser.add_option('--duk-build-meta', dest='duk_build_meta', default=None, help='duk_build_meta.json for git commit info etc') |
|||
parser.add_option('--git-commit', dest='git_commit', default=None, help='Force git commit hash') |
|||
parser.add_option('--git-describe', dest='git_describe', default=None, help='Force git describe') |
|||
parser.add_option('--git-branch', dest='git_branch', default=None, help='Force git branch name') |
|||
|
|||
# Options forwarded to genbuiltins.py. |
|||
parser.add_option('--rom-support', dest='rom_support', action='store_true', help='Add support for ROM strings/objects (increases duktape.c size considerably)') |
|||
parser.add_option('--rom-auto-lightfunc', dest='rom_auto_lightfunc', action='store_true', default=False, help='Convert ROM built-in function properties into lightfuncs automatically whenever possible') |
|||
parser.add_option('--user-builtin-metadata', dest='user_builtin_metadata', action='append', default=[], help='User strings and objects to add, YAML format (can be repeated for multiple overrides)') |
|||
|
|||
# Options forwarded to genconfig.py. |
|||
parser.add_option('--config-metadata', dest='config_metadata', default=None, help='metadata directory or metadata tar.gz file') |
|||
parser.add_option('--platform', dest='platform', default=None, help='platform (default is autodetect)') |
|||
parser.add_option('--compiler', dest='compiler', default=None, help='compiler (default is autodetect)') |
|||
parser.add_option('--architecture', dest='architecture', default=None, help='architecture (default is autodetec)') |
|||
parser.add_option('--c99-types-only', dest='c99_types_only', action='store_true', default=False, help='assume C99 types, no legacy type detection') |
|||
parser.add_option('--dll', dest='dll', action='store_true', default=False, help='dll build of Duktape, affects symbol visibility macros especially on Windows') |
|||
parser.add_option('--support-feature-options', dest='support_feature_options', action='store_true', default=False, help='support DUK_OPT_xxx feature options in duk_config.h') |
|||
parser.add_option('--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help='emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)') |
|||
parser.add_option('--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help='emit preprocessor checks for config option consistency (DUK_OPT_xxx)') |
|||
parser.add_option('--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers') |
|||
parser.add_option('--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers') |
|||
parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers') |
|||
parser.add_option('--add-active-defines-macro', dest='add_active_defines_macro', action='store_true', default=False, help='add DUK_ACTIVE_DEFINES macro, for development only') |
|||
parser.add_option('--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"') |
|||
parser.add_option('-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help='synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"') |
|||
parser.add_option('--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"') |
|||
parser.add_option('-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"') |
|||
parser.add_option('--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help='force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")') |
|||
parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides') |
|||
parser.add_option('--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help='C header snippet file(s) to be appended to generated header, useful for manual option fixups') |
|||
parser.add_option('--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help='C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")') |
|||
parser.add_option('--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues') |
|||
parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate') |
|||
|
|||
(opts, args) = parser.parse_args() |
|||
|
|||
assert(opts.source_directory) |
|||
srcdir = opts.source_directory |
|||
assert(opts.output_directory) |
|||
outdir = opts.output_directory |
|||
|
|||
# Figure out directories, git info, etc |
|||
|
|||
entry_pwd = os.getcwd() |
|||
|
|||
duk_build_meta = None |
|||
if opts.duk_build_meta is not None: |
|||
with open(opts.duk_build_meta, 'rb') as f: |
|||
duk_build_meta = json.loads(f.read()) |
|||
|
|||
duk_version, duk_major, duk_minor, duk_patch, duk_version_formatted = \ |
|||
get_duk_version(os.path.join(srcdir, 'duk_api_public.h.in')) |
|||
|
|||
git_commit = None |
|||
git_branch = None |
|||
git_describe = None |
|||
|
|||
if duk_build_meta is not None: |
|||
git_commit = duk_build_meta['git_commit'] |
|||
git_branch = duk_build_meta['git_branch'] |
|||
git_describe = duk_build_meta['git_describe'] |
|||
else: |
|||
print('No --duk-build-meta, git commit information determined automatically') |
|||
|
|||
if opts.git_commit is not None: |
|||
git_commit = opts.git_commit |
|||
if opts.git_describe is not None: |
|||
git_describe = opts.git_describe |
|||
if opts.git_branch is not None: |
|||
git_branch = opts.git_branch |
|||
|
|||
if git_commit is None: |
|||
git_commit = exec_get_stdout([ 'git', 'rev-parse', 'HEAD' ], default='external').strip() |
|||
if git_describe is None: |
|||
git_describe = exec_get_stdout([ 'git', 'describe', '--always', '--dirty' ], default='external').strip() |
|||
if git_branch is None: |
|||
git_branch = exec_get_stdout([ 'git', 'rev-parse', '--abbrev-ref', 'HEAD' ], default='external').strip() |
|||
|
|||
git_commit = str(git_commit) |
|||
git_describe = str(git_describe) |
|||
git_branch = str(git_branch) |
|||
|
|||
git_commit_cstring = cstring(git_commit) |
|||
git_describe_cstring = cstring(git_describe) |
|||
git_branch_cstring = cstring(git_branch) |
|||
|
|||
print('Config-and-prepare for Duktape version %s, commit %s, describe %s, branch %s' % \ |
|||
(duk_version_formatted, git_commit, git_describe, git_branch)) |
|||
|
|||
# For now, create the src/, src-noline/, and src-separate/ structure into the |
|||
# output directory. Later on the output directory should get the specific |
|||
# variant output directly. |
|||
mkdir(os.path.join(outdir, 'src')) |
|||
mkdir(os.path.join(outdir, 'src-noline')) |
|||
mkdir(os.path.join(outdir, 'src-separate')) |
|||
|
|||
# Separate sources are mostly copied as is at present. |
|||
copy_files([ |
|||
'duk_alloc_default.c', |
|||
'duk_api_internal.h', |
|||
'duk_api_stack.c', |
|||
'duk_api_heap.c', |
|||
'duk_api_buffer.c', |
|||
'duk_api_call.c', |
|||
'duk_api_codec.c', |
|||
'duk_api_compile.c', |
|||
'duk_api_bytecode.c', |
|||
'duk_api_memory.c', |
|||
'duk_api_object.c', |
|||
'duk_api_string.c', |
|||
'duk_api_time.c', |
|||
'duk_api_debug.c', |
|||
'duk_bi_array.c', |
|||
'duk_bi_boolean.c', |
|||
'duk_bi_buffer.c', |
|||
'duk_bi_date.c', |
|||
'duk_bi_date_unix.c', |
|||
'duk_bi_date_windows.c', |
|||
'duk_bi_duktape.c', |
|||
'duk_bi_error.c', |
|||
'duk_bi_function.c', |
|||
'duk_bi_global.c', |
|||
'duk_bi_json.c', |
|||
'duk_bi_math.c', |
|||
'duk_bi_number.c', |
|||
'duk_bi_object.c', |
|||
'duk_bi_pointer.c', |
|||
'duk_bi_protos.h', |
|||
'duk_bi_regexp.c', |
|||
'duk_bi_string.c', |
|||
'duk_bi_proxy.c', |
|||
'duk_bi_thread.c', |
|||
'duk_bi_thrower.c', |
|||
'duk_debug_fixedbuffer.c', |
|||
'duk_debug.h', |
|||
'duk_debug_macros.c', |
|||
'duk_debug_vsnprintf.c', |
|||
'duk_error_augment.c', |
|||
'duk_error.h', |
|||
'duk_error_longjmp.c', |
|||
'duk_error_macros.c', |
|||
'duk_error_misc.c', |
|||
'duk_error_throw.c', |
|||
'duk_forwdecl.h', |
|||
'duk_harray.h', |
|||
'duk_hbuffer_alloc.c', |
|||
'duk_hbuffer.h', |
|||
'duk_hbuffer_ops.c', |
|||
'duk_hcompfunc.h', |
|||
'duk_heap_alloc.c', |
|||
'duk_heap.h', |
|||
'duk_heap_hashstring.c', |
|||
'duk_heaphdr.h', |
|||
'duk_heap_markandsweep.c', |
|||
'duk_heap_memory.c', |
|||
'duk_heap_misc.c', |
|||
'duk_heap_refcount.c', |
|||
'duk_heap_stringcache.c', |
|||
'duk_heap_stringtable.c', |
|||
'duk_hnatfunc.h', |
|||
'duk_hobject_alloc.c', |
|||
'duk_hobject_class.c', |
|||
'duk_hobject_enum.c', |
|||
'duk_hobject_finalizer.c', |
|||
'duk_hobject.h', |
|||
'duk_hobject_misc.c', |
|||
'duk_hobject_pc2line.c', |
|||
'duk_hobject_props.c', |
|||
'duk_hstring.h', |
|||
'duk_hstring_misc.c', |
|||
'duk_hthread_alloc.c', |
|||
'duk_hthread_builtins.c', |
|||
'duk_hthread.h', |
|||
'duk_hthread_misc.c', |
|||
'duk_hthread_stacks.c', |
|||
'duk_hbufobj.h', |
|||
'duk_hbufobj_misc.c', |
|||
'duk_debugger.c', |
|||
'duk_debugger.h', |
|||
'duk_internal.h', |
|||
'duk_jmpbuf.h', |
|||
'duk_exception.h', |
|||
'duk_js_bytecode.h', |
|||
'duk_js_call.c', |
|||
'duk_js_compiler.c', |
|||
'duk_js_compiler.h', |
|||
'duk_js_executor.c', |
|||
'duk_js.h', |
|||
'duk_json.h', |
|||
'duk_js_ops.c', |
|||
'duk_js_var.c', |
|||
'duk_lexer.c', |
|||
'duk_lexer.h', |
|||
'duk_numconv.c', |
|||
'duk_numconv.h', |
|||
'duk_regexp_compiler.c', |
|||
'duk_regexp_executor.c', |
|||
'duk_regexp.h', |
|||
'duk_tval.c', |
|||
'duk_tval.h', |
|||
'duk_unicode.h', |
|||
'duk_unicode_support.c', |
|||
'duk_unicode_tables.c', |
|||
'duk_util_bitdecoder.c', |
|||
'duk_util_bitencoder.c', |
|||
'duk_util.h', |
|||
'duk_util_hashbytes.c', |
|||
'duk_util_hashprime.c', |
|||
'duk_util_misc.c', |
|||
'duk_util_tinyrandom.c', |
|||
'duk_util_bufwriter.c', |
|||
'duk_selftest.c', |
|||
'duk_selftest.h', |
|||
'duk_strings.h', |
|||
'duk_replacements.c', |
|||
'duk_replacements.h' |
|||
], srcdir, os.path.join(outdir, 'src-separate')) |
|||
|
|||
# Build temp versions of LICENSE.txt and AUTHORS.rst for embedding into |
|||
# autogenerated C/H files. |
|||
|
|||
# XXX: use a proper temp directory |
|||
|
|||
copy_and_cquote('LICENSE.txt', os.path.join(outdir, 'LICENSE.txt.tmp')) |
|||
copy_and_cquote('AUTHORS.rst', os.path.join(outdir, 'AUTHORS.rst.tmp')) |
|||
|
|||
# Create a duk_config.h. |
|||
# XXX: might be easier to invoke genconfig directly |
|||
def forward_genconfig_options(): |
|||
res = [] |
|||
res += [ '--metadata', os.path.abspath(opts.config_metadata) ] # rename option, --config-metadata => --metadata |
|||
if opts.platform is not None: |
|||
res += [ '--platform', opts.platform ] |
|||
if opts.compiler is not None: |
|||
res += [ '--compiler', opts.compiler ] |
|||
if opts.architecture is not None: |
|||
res += [ '--architecture', opts.architecture ] |
|||
if opts.c99_types_only: |
|||
res += [ '--c99-types-only' ] |
|||
if opts.dll: |
|||
res += [ '--dll' ] |
|||
if opts.support_feature_options: |
|||
res += [ '--support-feature-options' ] |
|||
if opts.emit_legacy_feature_check: |
|||
res += [ '--emit-legacy-feature-check' ] |
|||
if opts.emit_config_sanity_check: |
|||
res += [ '--emit-config-sanity-check' ] |
|||
if opts.omit_removed_config_options: |
|||
res += [ '--omit-removed-config-options' ] |
|||
if opts.omit_deprecated_config_options: |
|||
res += [ '--omit-deprecated-config-options' ] |
|||
if opts.omit_unused_config_options: |
|||
res += [ '--omit-unused-config-options' ] |
|||
if opts.add_active_defines_macro: |
|||
res += [ '--add-active-defines-macro' ] |
|||
for i in force_options_yaml: |
|||
res += [ '--option-yaml', i ] |
|||
for i in fixup_header_lines: |
|||
res += [ '--fixup-linu', i ] |
|||
if not opts.sanity_strict: |
|||
res += [ '--sanity-warning' ] |
|||
if opts.use_cpp_warning: |
|||
res += [ '--use-cpp-warning' ] |
|||
return res |
|||
|
|||
cmd = [ |
|||
sys.executable, os.path.join('tools', 'genconfig.py'), |
|||
'--output', os.path.join(outdir, 'duk_config.h.tmp'), |
|||
'--git-commit', git_commit, '--git-describe', git_describe, '--git-branch', git_branch |
|||
] |
|||
cmd += forward_genconfig_options() |
|||
cmd += [ |
|||
'duk-config-header' |
|||
] |
|||
print(repr(cmd)) |
|||
exec_print_stdout(cmd) |
|||
|
|||
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src', 'duk_config.h')) |
|||
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-noline', 'duk_config.h')) |
|||
copy_file(os.path.join(outdir, 'duk_config.h.tmp'), os.path.join(outdir, 'src-separate', 'duk_config.h')) |
|||
|
|||
# Build duktape.h from parts, with some git-related replacements. |
|||
# The only difference between single and separate file duktape.h |
|||
# is the internal DUK_SINGLE_FILE define. |
|||
# |
|||
# Newline after 'i \': |
|||
# http://stackoverflow.com/questions/25631989/sed-insert-line-command-osx |
|||
copy_and_replace(os.path.join(srcdir, 'duktape.h.in'), os.path.join(outdir, 'src', 'duktape.h'), { |
|||
'@DUK_SINGLE_FILE@': '#define DUK_SINGLE_FILE', |
|||
'@LICENSE_TXT@': read_file(os.path.join(outdir, 'LICENSE.txt.tmp'), strip_last_nl=True), |
|||
'@AUTHORS_RST@': read_file(os.path.join(outdir, 'AUTHORS.rst.tmp'), strip_last_nl=True), |
|||
'@DUK_API_PUBLIC_H@': read_file(os.path.join(srcdir, 'duk_api_public.h.in'), strip_last_nl=True), |
|||
'@DUK_DBLUNION_H@': read_file(os.path.join(srcdir, 'duk_dblunion.h.in'), strip_last_nl=True), |
|||
'@DUK_VERSION_FORMATTED@': duk_version_formatted, |
|||
'@GIT_COMMIT@': git_commit, |
|||
'@GIT_COMMIT_CSTRING@': git_commit_cstring, |
|||
'@GIT_DESCRIBE@': git_describe, |
|||
'@GIT_DESCRIBE_CSTRING@': git_describe_cstring, |
|||
'@GIT_BRANCH@': git_branch, |
|||
'@GIT_BRANCH_CSTRING@': git_branch_cstring |
|||
}) |
|||
# keep the line so line numbers match between the two variant headers |
|||
copy_and_replace(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-separate', 'duktape.h'), { |
|||
'#define DUK_SINGLE_FILE': '#undef DUK_SINGLE_FILE' |
|||
}) |
|||
copy_file(os.path.join(outdir, 'src', 'duktape.h'), os.path.join(outdir, 'src-noline', 'duktape.h')) |
|||
|
|||
# Autogenerated strings and built-in files |
|||
# |
|||
# There are currently no profile specific variants of strings/builtins, but |
|||
# this will probably change when functions are added/removed based on profile. |
|||
|
|||
# XXX: nuke this util, it's pointless |
|||
exec_print_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'genbuildparams.py'), |
|||
'--version=' + str(duk_version), |
|||
'--git-commit=' + git_commit, |
|||
'--git-describe=' + git_describe, |
|||
'--git-branch=' + git_branch, |
|||
'--out-json=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'), |
|||
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_buildparams.h.tmp') |
|||
]) |
|||
|
|||
res = exec_get_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'scan_used_stridx_bidx.py') |
|||
] + glob.glob(os.path.join(srcdir, '*.c')) \ |
|||
+ glob.glob(os.path.join(srcdir, '*.h')) \ |
|||
+ glob.glob(os.path.join(srcdir, '*.h.in')) |
|||
) |
|||
with open(os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'), 'wb') as f: |
|||
f.write(res) |
|||
|
|||
gb_opts = [] |
|||
gb_opts.append('--ram-support') # enable by default |
|||
if opts.rom_support: |
|||
# ROM string/object support is not enabled by default because |
|||
# it increases the generated duktape.c considerably. |
|||
print('Enabling --rom-support for genbuiltins.py') |
|||
gb_opts.append('--rom-support') |
|||
if opts.rom_auto_lightfunc: |
|||
print('Enabling --rom-auto-lightfunc for genbuiltins.py') |
|||
gb_opts.append('--rom-auto-lightfunc') |
|||
for fn in opts.user_builtin_metadata: |
|||
print('Forwarding --user-builtin-metadata %s' % fn) |
|||
gb_opts.append('--user-builtin-metadata') |
|||
gb_opts.append(fn) |
|||
exec_print_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'genbuiltins.py'), |
|||
'--buildinfo=' + os.path.join(outdir, 'src-separate', 'buildparams.json.tmp'), |
|||
'--used-stridx-metadata=' + os.path.join(outdir, 'duk_used_stridx_bidx_defs.json.tmp'), |
|||
'--strings-metadata=' + os.path.join(srcdir, 'strings.yaml'), |
|||
'--objects-metadata=' + os.path.join(srcdir, 'builtins.yaml'), |
|||
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_builtins.h'), |
|||
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_builtins.c'), |
|||
'--out-metadata-json=' + os.path.join(outdir, 'duk_build_meta.json') |
|||
] + gb_opts) |
|||
|
|||
# Autogenerated Unicode files |
|||
# |
|||
# Note: not all of the generated headers are used. For instance, the |
|||
# match table for "WhiteSpace-Z" is not used, because a custom piece |
|||
# of code handles that particular match. |
|||
# |
|||
# UnicodeData.txt contains ranges expressed like this: |
|||
# |
|||
# 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; |
|||
# 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; |
|||
# |
|||
# These are currently decoded into individual characters as a prestep. |
|||
# |
|||
# For IDPART: |
|||
# UnicodeCombiningMark -> categories Mn, Mc |
|||
# UnicodeDigit -> categories Nd |
|||
# UnicodeConnectorPunctuation -> categories Pc |
|||
|
|||
# Whitespace (unused now) |
|||
WHITESPACE_INCL='Zs' # USP = Any other Unicode space separator |
|||
WHITESPACE_EXCL='NONE' |
|||
|
|||
# Unicode letter (unused now) |
|||
LETTER_INCL='Lu,Ll,Lt,Lm,Lo' |
|||
LETTER_EXCL='NONE' |
|||
LETTER_NOA_INCL='Lu,Ll,Lt,Lm,Lo' |
|||
LETTER_NOA_EXCL='ASCII' |
|||
LETTER_NOABMP_INCL=LETTER_NOA_INCL |
|||
LETTER_NOABMP_EXCL='ASCII,NONBMP' |
|||
|
|||
# Identifier start |
|||
# E5 Section 7.6 |
|||
IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' |
|||
IDSTART_EXCL='NONE' |
|||
IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' |
|||
IDSTART_NOA_EXCL='ASCII' |
|||
IDSTART_NOABMP_INCL=IDSTART_NOA_INCL |
|||
IDSTART_NOABMP_EXCL='ASCII,NONBMP' |
|||
|
|||
# Identifier start - Letter: allows matching of (rarely needed) 'Letter' |
|||
# production space efficiently with the help of IdentifierStart. The |
|||
# 'Letter' production is only needed in case conversion of Greek final |
|||
# sigma. |
|||
IDSTART_MINUS_LETTER_INCL=IDSTART_NOA_INCL |
|||
IDSTART_MINUS_LETTER_EXCL='Lu,Ll,Lt,Lm,Lo' |
|||
IDSTART_MINUS_LETTER_NOA_INCL=IDSTART_NOA_INCL |
|||
IDSTART_MINUS_LETTER_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII' |
|||
IDSTART_MINUS_LETTER_NOABMP_INCL=IDSTART_NOA_INCL |
|||
IDSTART_MINUS_LETTER_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,ASCII,NONBMP' |
|||
|
|||
# Identifier start - Identifier part |
|||
# E5 Section 7.6: IdentifierPart, but remove IdentifierStart (already above) |
|||
IDPART_MINUS_IDSTART_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' |
|||
IDPART_MINUS_IDSTART_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F' |
|||
IDPART_MINUS_IDSTART_NOA_INCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,Mn,Mc,Nd,Pc,200C,200D' |
|||
IDPART_MINUS_IDSTART_NOA_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII' |
|||
IDPART_MINUS_IDSTART_NOABMP_INCL=IDPART_MINUS_IDSTART_NOA_INCL |
|||
IDPART_MINUS_IDSTART_NOABMP_EXCL='Lu,Ll,Lt,Lm,Lo,Nl,0024,005F,ASCII,NONBMP' |
|||
|
|||
print('Expand UnicodeData.txt ranges') |
|||
|
|||
exec_print_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'prepare_unicode_data.py'), |
|||
os.path.join(srcdir, 'UnicodeData.txt'), |
|||
os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp') |
|||
]) |
|||
|
|||
def extract_chars(incl, excl, suffix): |
|||
#print('- extract_chars: %s %s %s' % (incl, excl, suffix)) |
|||
res = exec_get_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'extract_chars.py'), |
|||
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), |
|||
'--include-categories=' + incl, |
|||
'--exclude-categories=' + excl, |
|||
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.c.tmp' % suffix), |
|||
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_%s.h.tmp' % suffix), |
|||
'--table-name=' + 'duk_unicode_%s' % suffix |
|||
]) |
|||
with open(os.path.join(outdir, 'src-separate', suffix + '.txt'), 'wb') as f: |
|||
f.write(res) |
|||
|
|||
def extract_caseconv(): |
|||
#print('- extract_caseconv case conversion') |
|||
res = exec_get_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'extract_caseconv.py'), |
|||
'--command=caseconv_bitpacked', |
|||
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), |
|||
'--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'), |
|||
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'), |
|||
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'), |
|||
'--table-name-lc=duk_unicode_caseconv_lc', |
|||
'--table-name-uc=duk_unicode_caseconv_uc' |
|||
]) |
|||
with open(os.path.join(outdir, 'src-separate', 'caseconv.txt'), 'wb') as f: |
|||
f.write(res) |
|||
|
|||
#print('- extract_caseconv canon lookup') |
|||
res = exec_get_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'extract_caseconv.py'), |
|||
'--command=re_canon_lookup', |
|||
'--unicode-data=' + os.path.join(outdir, 'src-separate', 'UnicodeData-expanded.tmp'), |
|||
'--special-casing=' + os.path.join(srcdir, 'SpecialCasing.txt'), |
|||
'--out-source=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'), |
|||
'--out-header=' + os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'), |
|||
'--table-name-re-canon-lookup=duk_unicode_re_canon_lookup' |
|||
]) |
|||
with open(os.path.join(outdir, 'src-separate', 'caseconv_re_canon_lookup.txt'), 'wb') as f: |
|||
f.write(res) |
|||
|
|||
print('Create Unicode tables for codepoint classes') |
|||
extract_chars(WHITESPACE_INCL, WHITESPACE_EXCL, 'ws') |
|||
extract_chars(LETTER_INCL, LETTER_EXCL, 'let') |
|||
extract_chars(LETTER_NOA_INCL, LETTER_NOA_EXCL, 'let_noa') |
|||
extract_chars(LETTER_NOABMP_INCL, LETTER_NOABMP_EXCL, 'let_noabmp') |
|||
extract_chars(IDSTART_INCL, IDSTART_EXCL, 'ids') |
|||
extract_chars(IDSTART_NOA_INCL, IDSTART_NOA_EXCL, 'ids_noa') |
|||
extract_chars(IDSTART_NOABMP_INCL, IDSTART_NOABMP_EXCL, 'ids_noabmp') |
|||
extract_chars(IDSTART_MINUS_LETTER_INCL, IDSTART_MINUS_LETTER_EXCL, 'ids_m_let') |
|||
extract_chars(IDSTART_MINUS_LETTER_NOA_INCL, IDSTART_MINUS_LETTER_NOA_EXCL, 'ids_m_let_noa') |
|||
extract_chars(IDSTART_MINUS_LETTER_NOABMP_INCL, IDSTART_MINUS_LETTER_NOABMP_EXCL, 'ids_m_let_noabmp') |
|||
extract_chars(IDPART_MINUS_IDSTART_INCL, IDPART_MINUS_IDSTART_EXCL, 'idp_m_ids') |
|||
extract_chars(IDPART_MINUS_IDSTART_NOA_INCL, IDPART_MINUS_IDSTART_NOA_EXCL, 'idp_m_ids_noa') |
|||
extract_chars(IDPART_MINUS_IDSTART_NOABMP_INCL, IDPART_MINUS_IDSTART_NOABMP_EXCL, 'idp_m_ids_noabmp') |
|||
|
|||
print('Create Unicode tables for case conversion') |
|||
extract_caseconv() |
|||
|
|||
print('Combine sources and clean up') |
|||
|
|||
# Inject autogenerated files into source and header files so that they are |
|||
# usable (for all profiles and define cases) directly. |
|||
# |
|||
# The injection points use a standard C preprocessor #include syntax |
|||
# (earlier these were actual includes). |
|||
|
|||
copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode.h'), os.path.join(outdir, 'src-separate', 'duk_unicode.h'), { |
|||
'#include "duk_unicode_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_m_let_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_m_let_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_idp_m_ids_noa.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_idp_m_ids_noabmp.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_caseconv.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.h.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_re_canon_lookup.h"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.h.tmp'), strip_last_nl=True) |
|||
}) |
|||
|
|||
copy_and_replace(os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), os.path.join(outdir, 'src-separate', 'duk_unicode_tables.c'), { |
|||
'#include "duk_unicode_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noa.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_noabmp.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_m_let_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noa.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_ids_m_let_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_ids_m_let_noabmp.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_idp_m_ids_noa.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noa.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_idp_m_ids_noabmp.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_idp_m_ids_noabmp.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_caseconv.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_caseconv.c.tmp'), strip_last_nl=True), |
|||
'#include "duk_unicode_re_canon_lookup.c"': read_file(os.path.join(outdir, 'src-separate', 'duk_unicode_re_canon_lookup.c.tmp'), strip_last_nl=True) |
|||
}) |
|||
|
|||
# Clean up some temporary files |
|||
|
|||
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[-4:] == '.tmp') |
|||
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x in [ |
|||
'ws.txt', |
|||
'let.txt', 'let_noa.txt', 'let_noabmp.txt', |
|||
'ids.txt', 'ids_noa.txt', 'ids_noabmp.txt', |
|||
'ids_m_let.txt', 'ids_m_let_noa.txt', 'ids_m_let_noabmp.txt', |
|||
'idp_m_ids.txt', 'idp_m_ids_noa.txt', 'idp_m_ids_noabmp.txt' |
|||
]) |
|||
delete_matching_files(os.path.join(outdir, 'src-separate'), lambda x: x[0:8] == 'caseconv' and x[-4:] == '.txt') |
|||
|
|||
# Create a combined source file, duktape.c, into a separate combined source |
|||
# directory. This allows user to just include "duktape.c", "duktape.h", and |
|||
# "duk_config.h" into a project and maximizes inlining and size optimization |
|||
# opportunities even with older compilers. Because some projects include |
|||
# these files into their repository, the result should be deterministic and |
|||
# diffable. Also, it must retain __FILE__/__LINE__ behavior through |
|||
# preprocessor directives. Whitespace and comments can be stripped as long |
|||
# as the other requirements are met. For some users it's preferable *not* |
|||
# to use #line directives in the combined source, so a separate variant is |
|||
# created for that, see: https://github.com/svaarala/duktape/pull/363. |
|||
|
|||
def create_source_prologue(license_file, authors_file): |
|||
res = [] |
|||
|
|||
# Because duktape.c/duktape.h/duk_config.h are often distributed or |
|||
# included in project sources as is, add a license reminder and |
|||
# Duktape version information to the duktape.c header (duktape.h |
|||
# already contains them). |
|||
|
|||
duk_major = duk_version / 10000 |
|||
duk_minor = duk_version / 100 % 100 |
|||
duk_patch = duk_version % 100 |
|||
res.append('/*') |
|||
res.append(' * Single source autogenerated distributable for Duktape %d.%d.%d.' % (duk_major, duk_minor, duk_patch)) |
|||
res.append(' *') |
|||
res.append(' * Git commit %s (%s).' % (git_commit, git_describe)) |
|||
res.append(' * Git branch %s.' % git_branch) |
|||
res.append(' *') |
|||
res.append(' * See Duktape AUTHORS.rst and LICENSE.txt for copyright and') |
|||
res.append(' * licensing information.') |
|||
res.append(' */') |
|||
res.append('') |
|||
|
|||
# Add LICENSE.txt and AUTHORS.rst to combined source so that they're automatically |
|||
# included and are up-to-date. |
|||
|
|||
res.append('/* LICENSE.txt */') |
|||
with open(license_file, 'rb') as f: |
|||
for line in f: |
|||
res.append(line.strip()) |
|||
res.append('') |
|||
res.append('/* AUTHORS.rst */') |
|||
with open(authors_file, 'rb') as f: |
|||
for line in f: |
|||
res.append(line.strip()) |
|||
|
|||
return '\n'.join(res) + '\n' |
|||
|
|||
def select_combined_sources(): |
|||
# These files must appear before the alphabetically sorted |
|||
# ones so that static variables get defined before they're |
|||
# used. We can't forward declare them because that would |
|||
# cause C++ issues (see GH-63). When changing, verify by |
|||
# compiling with g++. |
|||
handpick = [ |
|||
'duk_replacements.c', |
|||
'duk_debug_macros.c', |
|||
'duk_builtins.c', |
|||
'duk_error_macros.c', |
|||
'duk_unicode_support.c', |
|||
'duk_util_misc.c', |
|||
'duk_util_hashprime.c', |
|||
'duk_hobject_class.c' |
|||
] |
|||
|
|||
files = [] |
|||
for fn in handpick: |
|||
files.append(fn) |
|||
|
|||
for fn in sorted(os.listdir(os.path.join(outdir, 'src-separate'))): |
|||
f_ext = os.path.splitext(fn)[1] |
|||
if f_ext not in [ '.c' ]: |
|||
continue |
|||
if fn in files: |
|||
continue |
|||
files.append(fn) |
|||
|
|||
res = map(lambda x: os.path.join(outdir, 'src-separate', x), files) |
|||
#print(repr(files)) |
|||
#print(repr(res)) |
|||
return res |
|||
|
|||
with open(os.path.join(outdir, 'prologue.tmp'), 'wb') as f: |
|||
f.write(create_source_prologue(os.path.join(outdir, 'LICENSE.txt.tmp'), os.path.join(outdir, 'AUTHORS.rst.tmp'))) |
|||
|
|||
exec_print_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'combine_src.py'), |
|||
'--include-path', os.path.join(outdir, 'src-separate'), |
|||
'--include-exclude', 'duk_config.h', # don't inline |
|||
'--include-exclude', 'duktape.h', # don't inline |
|||
'--prologue', os.path.join(outdir, 'prologue.tmp'), |
|||
'--output-source', os.path.join(outdir, 'src', 'duktape.c'), |
|||
'--output-metadata', os.path.join(outdir, 'src', 'metadata.json'), |
|||
'--line-directives' |
|||
] + select_combined_sources()) |
|||
|
|||
exec_print_stdout([ |
|||
sys.executable, |
|||
os.path.join('tools', 'combine_src.py'), |
|||
'--include-path', os.path.join(outdir, 'src-separate'), |
|||
'--include-exclude', 'duk_config.h', # don't inline |
|||
'--include-exclude', 'duktape.h', # don't inline |
|||
'--prologue', os.path.join(outdir, 'prologue.tmp'), |
|||
'--output-source', os.path.join(outdir, 'src-noline', 'duktape.c'), |
|||
'--output-metadata', os.path.join(outdir, 'src-noline', 'metadata.json') |
|||
] + select_combined_sources()) |
|||
|
|||
# Clean up remaining temp files |
|||
delete_matching_files(outdir, lambda x: x[-4:] == '.tmp') |
|||
|
|||
print('Config-and-prepare finished successfully') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,37 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# UnicodeData.txt may contain ranges in addition to individual characters. |
|||
# Unpack the ranges into individual characters for the other scripts to use. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
|
|||
def main(): |
|||
f_in = open(sys.argv[1], 'rb') |
|||
f_out = open(sys.argv[2], 'wb') |
|||
while True: |
|||
line = f_in.readline() |
|||
if line == '' or line == '\n': |
|||
break |
|||
parts = line.split(';') # keep newline |
|||
if parts[1].endswith('First>'): |
|||
line2 = f_in.readline() |
|||
parts2 = line2.split(';') |
|||
if not parts2[1].endswith('Last>'): |
|||
raise Exception('cannot parse range') |
|||
cp1 = long(parts[0], 16) |
|||
cp2 = long(parts2[0], 16) |
|||
|
|||
suffix = ';'.join(parts[1:]) |
|||
for i in xrange(cp1, cp2 + 1): # inclusive |
|||
f_out.write('%04X;%s' % (i, suffix)) |
|||
else: |
|||
f_out.write(line) |
|||
|
|||
f_in.close() |
|||
f_out.flush() |
|||
f_out.close() |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,26 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Resolve a line number in the combined source into an uncombined file/line |
|||
# using a dist/src/metadata.json file. |
|||
# |
|||
# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345 |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
|
|||
def main(): |
|||
with open(sys.argv[1], 'rb') as f: |
|||
metadata = json.loads(f.read()) |
|||
lineno = int(sys.argv[2]) |
|||
|
|||
for e in reversed(metadata['line_map']): |
|||
if lineno >= e['combined_line']: |
|||
orig_lineno = e['original_line'] + (lineno - e['combined_line']) |
|||
print('%s:%d -> %s:%d' % ('duktape.c', lineno, |
|||
e['original_file'], orig_lineno)) |
|||
break |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,135 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Scan potential external strings from Ecmascript and C files. |
|||
# |
|||
# Very simplistic example with a lot of limitations: |
|||
# |
|||
# - Doesn't handle multiple variables in a variable declaration |
|||
# |
|||
# - Only extracts strings from C files, these may correspond to |
|||
# Duktape/C bindings (but in many cases don't) |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
|
|||
strmap = {} |
|||
|
|||
# Ecmascript function declaration |
|||
re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE) |
|||
|
|||
# Ecmascript variable declaration |
|||
# XXX: doesn't handle multiple variables |
|||
re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE) |
|||
|
|||
# Ecmascript variable assignment |
|||
re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE) |
|||
|
|||
# Ecmascript dotted property reference (also matches numbers like |
|||
# '4.0', which are separately rejected below) |
|||
re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE) |
|||
re_digits = re.compile(r'^\d+$', re.UNICODE) |
|||
|
|||
# Ecmascript or C string literal |
|||
re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE) |
|||
re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE) |
|||
|
|||
def strDecode(x): |
|||
# Need to decode hex, unicode, and other escapes. Python syntax |
|||
# is close enough to C and Ecmascript so use eval for now. |
|||
|
|||
try: |
|||
return eval('u' + x) # interpret as unicode string |
|||
except: |
|||
sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n') |
|||
return None |
|||
|
|||
def scan(f, fn): |
|||
global strmap |
|||
|
|||
# Scan rules depend on file type |
|||
if fn[-2:] == '.c': |
|||
use_funcname = False |
|||
use_vardecl = False |
|||
use_varassign = False |
|||
use_propref = False |
|||
use_strlit_dquot = True |
|||
use_strlit_squot = False |
|||
else: |
|||
use_funcname = True |
|||
use_vardecl = True |
|||
use_varassign = True |
|||
use_propref = True |
|||
use_strlit_dquot = True |
|||
use_strlit_squot = True |
|||
|
|||
for line in f: |
|||
# Assume input data is UTF-8 |
|||
line = line.decode('utf-8') |
|||
|
|||
if use_funcname: |
|||
for m in re_funcname.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_vardecl: |
|||
for m in re_vardecl.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_varassign: |
|||
for m in re_varassign.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_propref: |
|||
for m in re_propref.finditer(line): |
|||
parts = m.group(1).split('.') |
|||
if re_digits.match(parts[0]) is not None: |
|||
# Probably a number ('4.0' or such) |
|||
pass |
|||
else: |
|||
for part in parts: |
|||
strmap[part] = True |
|||
|
|||
if use_strlit_dquot: |
|||
for m in re_strlit_dquot.finditer(line): |
|||
s = strDecode(m.group(1)) |
|||
if s is not None: |
|||
strmap[s] = True |
|||
|
|||
if use_strlit_squot: |
|||
for m in re_strlit_squot.finditer(line): |
|||
s = strDecode(m.group(1)) |
|||
if s is not None: |
|||
strmap[s] = True |
|||
|
|||
def main(): |
|||
for fn in sys.argv[1:]: |
|||
f = open(fn, 'rb') |
|||
scan(f, fn) |
|||
f.close() |
|||
|
|||
strs = [] |
|||
strs_base64 = [] |
|||
doc = { |
|||
# Strings as Unicode strings |
|||
'scanned_strings': strs, |
|||
|
|||
# Strings as base64-encoded UTF-8 data, which should be ready |
|||
# to be used in C code (Duktape internal string representation |
|||
# is UTF-8) |
|||
'scanned_strings_base64': strs_base64 |
|||
} |
|||
k = strmap.keys() |
|||
k.sort() |
|||
for s in k: |
|||
strs.append(s) |
|||
t = s.encode('utf-8').encode('base64') |
|||
if len(t) > 0 and t[-1] == '\n': |
|||
t = t[0:-1] |
|||
strs_base64.append(t) |
|||
|
|||
print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,56 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Scan Duktape code base for references to built-in strings and built-in |
|||
# objects, i.e. for: |
|||
# |
|||
# - Strings which will need DUK_STRIDX_xxx constants and a place in the |
|||
# thr->strs[] array. |
|||
# |
|||
# - Objects which will need DUK_BIDX_xxx constants and a place in the |
|||
# thr->builtins[] array. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
|
|||
re_str_stridx = re.compile(r'DUK_STRIDX_(\w+)', re.MULTILINE) |
|||
re_str_heap = re.compile(r'DUK_HEAP_STRING_(\w+)', re.MULTILINE) |
|||
re_str_hthread = re.compile(r'DUK_HTHREAD_STRING_(\w+)', re.MULTILINE) |
|||
re_obj_bidx = re.compile(r'DUK_BIDX_(\w+)', re.MULTILINE) |
|||
|
|||
def main(): |
|||
str_defs = {} |
|||
obj_defs = {} |
|||
|
|||
for fn in sys.argv[1:]: |
|||
with open(fn, 'rb') as f: |
|||
d = f.read() |
|||
for m in re.finditer(re_str_stridx, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_str_heap, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_str_hthread, d): |
|||
str_defs[m.group(1)] = True |
|||
for m in re.finditer(re_obj_bidx, d): |
|||
obj_defs[m.group(1)] = True |
|||
|
|||
str_used = [] |
|||
for k in sorted(str_defs.keys()): |
|||
str_used.append('DUK_STRIDX_' + k) |
|||
|
|||
obj_used = [] |
|||
for k in sorted(obj_defs.keys()): |
|||
obj_used.append('DUK_BIDX_' + k) |
|||
|
|||
doc = { |
|||
'used_stridx_defines': str_used, |
|||
'used_bidx_defines': obj_used, |
|||
'count_used_stridx_defines': len(str_used), |
|||
'count_used_bidx_defines': len(obj_used) |
|||
} |
|||
print(json.dumps(doc, indent=4)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,4 +1,4 @@ |
|||
import os, sys, json, yaml |
|||
|
|||
if __name__ == '__main__': |
|||
print(json.dumps(yaml.load(sys.stdin))) |
|||
print(json.dumps(yaml.load(sys.stdin))) |
@ -1,257 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Combine a set of a source files into a single C file. |
|||
# |
|||
# Overview of the process: |
|||
# |
|||
# * Parse user supplied C files. Add automatic #undefs at the end |
|||
# of each C file to avoid defined bleeding from one file to another. |
|||
# |
|||
# * Combine the C files in specified order. If sources have ordering |
|||
# dependencies (depends on application), order may matter. |
|||
# |
|||
# * Process #include statements in the combined source, categorizing |
|||
# them either as "internal" (found in specified include path) or |
|||
# "external". Internal includes, unless explicitly excluded, are |
|||
# inlined into the result while extenal includes are left as is. |
|||
# Duplicate #include statements are replaced with a comment. |
|||
# |
|||
# At every step, source and header lines are represented with explicit |
|||
# line objects which keep track of original filename and line. The |
|||
# output contains #line directives, if necessary, to ensure error |
|||
# throwing and other diagnostic info will work in a useful manner when |
|||
# deployed. It's also possible to generate a combined source with no |
|||
# #line directives. |
|||
# |
|||
# Making the process deterministic is important, so that if users have |
|||
# diffs that they apply to the combined source, such diffs would apply |
|||
# for as long as possible. |
|||
# |
|||
# Limitations and notes: |
|||
# |
|||
# * While there are automatic #undef's for #define's introduced in each |
|||
# C file, it's not possible to "undefine" structs, unions, etc. If |
|||
# there are structs/unions/typedefs with conflicting names, these |
|||
# have to be resolved in the source files first. |
|||
# |
|||
# * Because duplicate #include statements are suppressed, currently |
|||
# assumes #include statements are not conditional. |
|||
# |
|||
# * A system header might be #include'd in multiple source files with |
|||
# different feature defines (like _BSD_SOURCE). Because the #include |
|||
# file will only appear once in the resulting source, the first |
|||
# occurrence wins. The result may not work correctly if the feature |
|||
# defines must actually be different between two or more source files. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
import optparse |
|||
|
|||
# Include path for finding include files which are amalgamated. |
|||
include_paths = [] |
|||
|
|||
# Include files specifically excluded from being inlined. |
|||
include_excluded = [] |
|||
|
|||
class File: |
|||
filename_full = None |
|||
filename = None |
|||
lines = None |
|||
|
|||
def __init__(self, filename, lines): |
|||
self.filename = os.path.basename(filename) |
|||
self.filename_full = filename |
|||
self.lines = lines |
|||
|
|||
class Line: |
|||
filename_full = None |
|||
filename = None |
|||
lineno = None |
|||
data = None |
|||
|
|||
def __init__(self, filename, lineno, data): |
|||
self.filename = os.path.basename(filename) |
|||
self.filename_full = filename |
|||
self.lineno = lineno |
|||
self.data = data |
|||
|
|||
def readFile(filename): |
|||
lines = [] |
|||
|
|||
with open(filename, 'rb') as f: |
|||
lineno = 0 |
|||
for line in f: |
|||
lineno += 1 |
|||
if len(line) > 0 and line[-1] == '\n': |
|||
line = line[:-1] |
|||
lines.append(Line(filename, lineno, line)) |
|||
|
|||
return File(filename, lines) |
|||
|
|||
def lookupInclude(incfn): |
|||
re_sep = re.compile(r'/|\\') |
|||
|
|||
inccomp = re.split(re_sep, incfn) # split include path, support / and \ |
|||
|
|||
for path in include_paths: |
|||
fn = apply(os.path.join, [ path ] + inccomp) |
|||
if os.path.exists(fn): |
|||
return fn # Return full path to first match |
|||
|
|||
return None |
|||
|
|||
def addAutomaticUndefs(f): |
|||
defined = {} |
|||
|
|||
re_def = re.compile(r'#define\s+(\w+).*$') |
|||
re_undef = re.compile(r'#undef\s+(\w+).*$') |
|||
|
|||
for line in f.lines: |
|||
m = re_def.match(line.data) |
|||
if m is not None: |
|||
#print('DEFINED: %s' % repr(m.group(1))) |
|||
defined[m.group(1)] = True |
|||
m = re_undef.match(line.data) |
|||
if m is not None: |
|||
# Could just ignore #undef's here: we'd then emit |
|||
# reliable #undef's (though maybe duplicates) at |
|||
# the end. |
|||
#print('UNDEFINED: %s' % repr(m.group(1))) |
|||
if defined.has_key(m.group(1)): |
|||
del defined[m.group(1)] |
|||
|
|||
# Undefine anything that seems to be left defined. This not a 100% |
|||
# process because some #undef's might be conditional which we don't |
|||
# track at the moment. Note that it's safe to #undef something that's |
|||
# not defined. |
|||
|
|||
keys = sorted(defined.keys()) # deterministic order |
|||
if len(keys) > 0: |
|||
#print('STILL DEFINED: %r' % repr(defined.keys())) |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '')) |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */')) |
|||
for k in keys: |
|||
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k)) |
|||
|
|||
def createCombined(files, prologue_filename, line_directives): |
|||
res = [] |
|||
line_map = [] # indicate combined source lines where uncombined file/line would change |
|||
metadata = { |
|||
'line_map': line_map |
|||
} |
|||
|
|||
emit_state = [ None, None ] # curr_filename, curr_lineno |
|||
|
|||
def emit(line): |
|||
if isinstance(line, (str, unicode)): |
|||
res.append(line) |
|||
emit_state[1] += 1 |
|||
else: |
|||
if line.filename != emit_state[0] or line.lineno != emit_state[1]: |
|||
if line_directives: |
|||
res.append('#line %d "%s"' % (line.lineno, line.filename)) |
|||
line_map.append({ 'original_file': line.filename, |
|||
'original_line': line.lineno, |
|||
'combined_line': len(res) + 1 }) |
|||
res.append(line.data) |
|||
emit_state[0] = line.filename |
|||
emit_state[1] = line.lineno + 1 |
|||
|
|||
included = {} # headers already included |
|||
|
|||
if prologue_filename is not None: |
|||
with open(prologue_filename, 'rb') as f: |
|||
for line in f.read().split('\n'): |
|||
res.append(line) |
|||
|
|||
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$') |
|||
|
|||
# Process a file, appending it to the result; the input may be a |
|||
# source or an include file. #include directives are handled |
|||
# recursively. |
|||
def processFile(f): |
|||
#print('Process file: ' + f.filename) |
|||
|
|||
for line in f.lines: |
|||
if not line.data.startswith('#include'): |
|||
emit(line) |
|||
continue |
|||
|
|||
m = re_inc.match(line.data) |
|||
if m is None: |
|||
raise Exception('Couldn\'t match #include line: %s' % repr(line.data)) |
|||
incpath = m.group(2) |
|||
if incpath in include_excluded: |
|||
# Specific include files excluded from the |
|||
# inlining / duplicate suppression process. |
|||
emit(line) # keep as is |
|||
continue |
|||
|
|||
if included.has_key(incpath): |
|||
# We suppress duplicate includes, both internal and |
|||
# external, based on the assumption that includes are |
|||
# not behind #ifdef checks. This is the case for |
|||
# Duktape (except for the include files excluded). |
|||
emit('/* #include %s -> already included */' % incpath) |
|||
continue |
|||
included[incpath] = True |
|||
|
|||
# An include file is considered "internal" and is amalgamated |
|||
# if it is found in the include path provided by the user. |
|||
|
|||
incfile = lookupInclude(incpath) |
|||
if incfile is not None: |
|||
#print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile))) |
|||
emit('/* #include %s */' % incpath) |
|||
processFile(readFile(incfile)) |
|||
else: |
|||
#print('Include considered external: %s' % repr(line.data)) |
|||
emit(line) # keep as is |
|||
|
|||
for f in files: |
|||
processFile(f) |
|||
|
|||
return '\n'.join(res) + '\n', metadata |
|||
|
|||
def main(): |
|||
global include_paths, include_excluded |
|||
|
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times') |
|||
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)') |
|||
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file') |
|||
parser.add_option('--output-source', dest='output_source', help='Output source filename') |
|||
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename') |
|||
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
assert(opts.include_paths is not None) |
|||
include_paths = opts.include_paths # global for easy access |
|||
include_excluded = opts.include_excluded |
|||
assert(opts.output_source) |
|||
assert(opts.output_metadata) |
|||
|
|||
print('Read input files, add automatic #undefs') |
|||
sources = args |
|||
files = [] |
|||
for fn in sources: |
|||
res = readFile(fn) |
|||
#print('Add automatic undefs for: ' + fn) |
|||
addAutomaticUndefs(res) |
|||
files.append(res) |
|||
|
|||
print('Create combined source file from %d source files' % len(files)) |
|||
combined_source, metadata = \ |
|||
createCombined(files, opts.prologue, opts.line_directives) |
|||
with open(opts.output_source, 'wb') as f: |
|||
f.write(combined_source) |
|||
with open(opts.output_metadata, 'wb') as f: |
|||
f.write(json.dumps(metadata, indent=4)) |
|||
|
|||
print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,246 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Helper to create an SPDX license file (http://spdx.org) |
|||
# |
|||
# This must be executed when the dist/ directory is otherwise complete, |
|||
# except for the SPDX license, so that the file lists and such contained |
|||
# in the SPDX license will be correct. |
|||
# |
|||
# The utility outputs RDF/XML to specified file: |
|||
# |
|||
# $ python create_spdx_license.py /tmp/license.spdx |
|||
# |
|||
# Then, validate with SPDXViewer and SPDXTools: |
|||
# |
|||
# $ java -jar SPDXViewer.jar /tmp/license.spdx |
|||
# $ java -jar java -jar spdx-tools-1.2.5-jar-with-dependencies.jar RdfToHtml /tmp/license.spdx /tmp/license.html |
|||
# |
|||
# Finally, copy to dist: |
|||
# |
|||
# $ cp /tmp/license.spdx dist/license.spdx |
|||
# |
|||
# SPDX FAQ indicates there is no standard extension for an SPDX license file |
|||
# but '.spdx' is a common practice. |
|||
# |
|||
# The algorithm to compute a "verification code", implemented in this file, |
|||
# can be verified as follows: |
|||
# |
|||
# # build dist tar.xz, copy to /tmp/duktape-N.N.N.tar.xz |
|||
# $ cd /tmp |
|||
# $ tar xvfJ duktape-N.N.N.tar.xz |
|||
# $ rm duktape-N.N.N/license.spdx # remove file excluded from verification code |
|||
# $ java -jar spdx-tools-1.2.5-jar-with-dependencies.jar GenerateVerificationCode /tmp/duktape-N.N.N/ |
|||
# |
|||
# Compare the resulting verification code manually with the one in license.spdx. |
|||
# |
|||
# Resources: |
|||
# |
|||
# - http://spdx.org/about-spdx/faqs |
|||
# - http://wiki.spdx.org/view/Technical_Team/Best_Practices |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import datetime |
|||
import sha |
|||
import rdflib |
|||
from rdflib import URIRef, BNode, Literal, Namespace |
|||
|
|||
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
|||
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') |
|||
XSD = Namespace('http://www.w3.org/2001/XMLSchema#') |
|||
SPDX = Namespace('http://spdx.org/rdf/terms#') |
|||
DOAP = Namespace('http://usefulinc.com/ns/doap#') |
|||
DUKTAPE = Namespace('http://duktape.org/rdf/terms#') |
|||
|
|||
def checksumFile(g, filename): |
|||
f = open(filename, 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
shasum = sha.sha(d).digest().encode('hex').lower() |
|||
|
|||
csum_node = BNode() |
|||
g.add((csum_node, RDF.type, SPDX.Checksum)) |
|||
g.add((csum_node, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) |
|||
g.add((csum_node, SPDX.checksumValue, Literal(shasum))) |
|||
|
|||
return csum_node |
|||
|
|||
def computePackageVerification(g, dirname, excluded): |
|||
# SPDX 1.2 Section 4.7 |
|||
# The SPDXTools command "GenerateVerificationCode" can be used to |
|||
# check the verification codes created. Note that you must manually |
|||
# remove "license.spdx" from the unpacked dist directory before |
|||
# computing the verification code. |
|||
|
|||
verify_node = BNode() |
|||
|
|||
hashes = [] |
|||
for dirpath, dirnames, filenames in os.walk(dirname): |
|||
for fn in filenames: |
|||
full_fn = os.path.join(dirpath, fn) |
|||
f = open(full_fn, 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
|
|||
if full_fn in excluded: |
|||
#print('excluded in verification: ' + full_fn) |
|||
continue |
|||
#print('included in verification: ' + full_fn) |
|||
|
|||
file_sha1 = sha.sha(d).digest().encode('hex').lower() |
|||
hashes.append(file_sha1) |
|||
|
|||
#print(repr(hashes)) |
|||
hashes.sort() |
|||
#print(repr(hashes)) |
|||
verify_code = sha.sha(''.join(hashes)).digest().encode('hex').lower() |
|||
|
|||
for fn in excluded: |
|||
g.add((verify_node, SPDX.packageVerificationCodeExcludedFile, Literal(fn))) |
|||
g.add((verify_node, SPDX.packageVerificationCodeValue, Literal(verify_code))) |
|||
|
|||
return verify_node |
|||
|
|||
def fileType(filename): |
|||
ign, ext = os.path.splitext(filename) |
|||
if ext in [ '.c', '.h', '.js' ]: |
|||
return SPDX.fileType_source |
|||
else: |
|||
return SPDX.fileType_other |
|||
|
|||
def getDuktapeVersion(): |
|||
f = open('./src/duktape.h') |
|||
re_ver = re.compile(r'^#define\s+DUK_VERSION\s+(\d+)L$') |
|||
for line in f: |
|||
line = line.strip() |
|||
m = re_ver.match(line) |
|||
if m is None: |
|||
continue |
|||
ver = int(m.group(1)) |
|||
return '%d.%d.%d' % ((ver / 10000) % 100, |
|||
(ver / 100) % 100, |
|||
ver % 100) |
|||
|
|||
raise Exception('could not figure out Duktape version') |
|||
|
|||
def main(): |
|||
outfile = sys.argv[1] |
|||
|
|||
if not os.path.exists('CONTRIBUTING.md') and os.path.exists('tests/ecmascript'): |
|||
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') |
|||
sys.exit(1) |
|||
os.chdir('dist') |
|||
if not os.path.exists('Makefile.cmdline'): |
|||
sys.stderr.write('Invalid CWD, must be in Duktape root with dist/ built') |
|||
sys.exit(1) |
|||
|
|||
duktape_version = getDuktapeVersion() |
|||
duktape_pkgname = 'duktape-' + duktape_version + '.tar.xz' |
|||
now = datetime.datetime.utcnow() |
|||
now = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute, now.second) |
|||
creation_date = Literal(now.isoformat() + 'Z', datatype=XSD.dateTime) |
|||
duktape_org = Literal('Organization: duktape.org') |
|||
mit_license = URIRef('http://spdx.org/licenses/MIT') |
|||
duktape_copyright = Literal('Copyright 2013-2016 Duktape authors (see AUTHORS.rst in the Duktape distributable)') |
|||
|
|||
g = rdflib.Graph() |
|||
|
|||
crea_node = BNode() |
|||
g.add((crea_node, RDF.type, SPDX.CreationInfo)) |
|||
g.add((crea_node, RDFS.comment, Literal(''))) |
|||
g.add((crea_node, SPDX.creator, duktape_org)) |
|||
g.add((crea_node, SPDX.created, creation_date)) |
|||
g.add((crea_node, SPDX.licenseListVersion, Literal('1.20'))) # http://spdx.org/licenses/ |
|||
|
|||
# 'name' should not include a version number (see best practices) |
|||
pkg_node = BNode() |
|||
g.add((pkg_node, RDF.type, SPDX.Package)) |
|||
g.add((pkg_node, SPDX.name, Literal('Duktape'))) |
|||
g.add((pkg_node, SPDX.versionInfo, Literal(duktape_version))) |
|||
g.add((pkg_node, SPDX.packageFileName, Literal(duktape_pkgname))) |
|||
g.add((pkg_node, SPDX.supplier, duktape_org)) |
|||
g.add((pkg_node, SPDX.originator, duktape_org)) |
|||
g.add((pkg_node, SPDX.downloadLocation, Literal('http://duktape.org/' + duktape_pkgname, datatype=XSD.anyURI))) |
|||
g.add((pkg_node, SPDX.homePage, Literal('http://duktape.org/', datatype=XSD.anyURI))) |
|||
verify_node = computePackageVerification(g, '.', [ './license.spdx' ]) |
|||
g.add((pkg_node, SPDX.packageVerificationCode, verify_node)) |
|||
# SPDX.checksum: omitted because license is inside the package |
|||
g.add((pkg_node, SPDX.sourceInfo, Literal('Official duktape.org release built from GitHub repo https://github.com/svaarala/duktape.'))) |
|||
|
|||
# NOTE: MIT license alone is sufficient for now, because Duktape, Lua, |
|||
# Murmurhash2, and CommonJS (though probably not even relevant for |
|||
# licensing) are all MIT. |
|||
g.add((pkg_node, SPDX.licenseConcluded, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseInfoFromFiles, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseDeclared, mit_license)) |
|||
g.add((pkg_node, SPDX.licenseComments, Literal('Duktape is copyrighted by its authors and licensed under the MIT license. MurmurHash2 is used internally, it is also under the MIT license. Duktape module loader is based on the CommonJS module loading specification (without sharing any code), CommonJS is under the MIT license.'))) |
|||
g.add((pkg_node, SPDX.copyrightText, duktape_copyright)) |
|||
g.add((pkg_node, SPDX.summary, Literal('Duktape Ecmascript interpreter'))) |
|||
g.add((pkg_node, SPDX.description, Literal('Duktape is an embeddable Javascript engine, with a focus on portability and compact footprint'))) |
|||
# hasFile properties added separately below |
|||
|
|||
#reviewed_node = BNode() |
|||
#g.add((reviewed_node, RDF.type, SPDX.Review)) |
|||
#g.add((reviewed_node, SPDX.reviewer, XXX)) |
|||
#g.add((reviewed_node, SPDX.reviewDate, XXX)) |
|||
#g.add((reviewed_node, RDFS.comment, '')) |
|||
|
|||
spdx_doc = BNode() |
|||
g.add((spdx_doc, RDF.type, SPDX.SpdxDocument)) |
|||
g.add((spdx_doc, SPDX.specVersion, Literal('SPDX-1.2'))) |
|||
g.add((spdx_doc, SPDX.dataLicense, URIRef('http://spdx.org/licenses/CC0-1.0'))) |
|||
g.add((spdx_doc, RDFS.comment, Literal('SPDX license for Duktape ' + duktape_version))) |
|||
g.add((spdx_doc, SPDX.creationInfo, crea_node)) |
|||
g.add((spdx_doc, SPDX.describesPackage, pkg_node)) |
|||
# SPDX.hasExtractedLicensingInfo |
|||
# SPDX.reviewed |
|||
# SPDX.referencesFile: added below |
|||
|
|||
for dirpath, dirnames, filenames in os.walk('.'): |
|||
for fn in filenames: |
|||
full_fn = os.path.join(dirpath, fn) |
|||
#print('# file: ' + full_fn) |
|||
|
|||
file_node = BNode() |
|||
g.add((file_node, RDF.type, SPDX.File)) |
|||
g.add((file_node, SPDX.fileName, Literal(full_fn))) |
|||
g.add((file_node, SPDX.fileType, fileType(full_fn))) |
|||
g.add((file_node, SPDX.checksum, checksumFile(g, full_fn))) |
|||
|
|||
# Here we assume that LICENSE.txt provides the actual "in file" |
|||
# licensing information, and everything else is implicitly under |
|||
# MIT license. |
|||
g.add((file_node, SPDX.licenseConcluded, mit_license)) |
|||
if full_fn == './LICENSE.txt': |
|||
g.add((file_node, SPDX.licenseInfoInFile, mit_license)) |
|||
else: |
|||
g.add((file_node, SPDX.licenseInfoInFile, URIRef(SPDX.none))) |
|||
|
|||
# SPDX.licenseComments |
|||
g.add((file_node, SPDX.copyrightText, duktape_copyright)) |
|||
# SPDX.noticeText |
|||
# SPDX.artifactOf |
|||
# SPDX.fileDependency |
|||
# SPDX.fileContributor |
|||
|
|||
# XXX: should referencesFile include all files? |
|||
g.add((spdx_doc, SPDX.referencesFile, file_node)) |
|||
|
|||
g.add((pkg_node, SPDX.hasFile, file_node)) |
|||
|
|||
# Serialize into RDF/XML directly. We could also serialize into |
|||
# N-Triples and use external tools (like 'rapper') to get cleaner, |
|||
# abbreviated output. |
|||
|
|||
#print('# Duktape SPDX license file (autogenerated)') |
|||
#print(g.serialize(format='turtle')) |
|||
#print(g.serialize(format='nt')) |
|||
f = open(outfile, 'wb') |
|||
#f.write(g.serialize(format='rdf/xml')) |
|||
f.write(g.serialize(format='xml')) |
|||
f.close() |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,30 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Throwaway utility to dump Ditz issues for grooming. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import yaml |
|||
|
|||
def main(): |
|||
def issueConstructor(loader, node): |
|||
return node |
|||
|
|||
yaml.add_constructor('!ditz.rubyforge.org,2008-03-06/issue', issueConstructor) |
|||
|
|||
for fn in os.listdir(sys.argv[1]): |
|||
if fn[0:6] != 'issue-': |
|||
continue |
|||
with open(os.path.join(sys.argv[1], fn), 'rb') as f: |
|||
doc = yaml.load(f) |
|||
tmp = {} |
|||
for k,v in doc.value: |
|||
tmp[k.value] = v.value |
|||
if tmp.get('status', '') != ':closed': |
|||
print('*** ' + fn) |
|||
print(tmp.get('title', u'NOTITLE').encode('utf-8') + '\n') |
|||
print(tmp.get('desc', u'').encode('utf-8') + '\n') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,49 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Create an array of C strings with Duktape built-in strings. |
|||
# Useful when using external strings. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
|
|||
def to_c_string(x): |
|||
res = '"' |
|||
term = False |
|||
for i, c in enumerate(x): |
|||
if term: |
|||
term = False |
|||
res += '" "' |
|||
|
|||
o = ord(c) |
|||
if o < 0x20 or o > 0x7e or c in '\'"\\': |
|||
# Terminate C string so that escape doesn't become |
|||
# ambiguous |
|||
res += '\\x%02x' % o |
|||
term = True |
|||
else: |
|||
res += c |
|||
res += '"' |
|||
return res |
|||
|
|||
def main(): |
|||
f = open(sys.argv[1], 'rb') |
|||
d = f.read() |
|||
f.close() |
|||
meta = json.loads(d) |
|||
|
|||
print('const char *duk_builtin_strings[] = {') |
|||
|
|||
strlist = meta['builtin_strings_base64'] |
|||
for i in xrange(len(strlist)): |
|||
s = strlist[i] |
|||
if i == len(strlist) - 1: |
|||
print(' %s' % to_c_string(s.decode('base64'))) |
|||
else: |
|||
print(' %s,' % to_c_string(s.decode('base64'))) |
|||
|
|||
print('};') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,130 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Utility to dump bytecode into a human readable form. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import struct |
|||
import optparse |
|||
|
|||
def decode_string(buf, off): |
|||
strlen, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
strdata = buf[off:off+strlen] |
|||
off += strlen |
|||
|
|||
return off, strdata |
|||
|
|||
def sanitize_string(val): |
|||
# Don't try to UTF-8 decode, just escape non-printable ASCII. |
|||
def f(c): |
|||
if ord(c) < 0x20 or ord(c) > 0x7e or c in '\'"': |
|||
return '\\x%02x' % ord(c) |
|||
else: |
|||
return c |
|||
return "'" + ''.join(map(f, val)) + "'" |
|||
|
|||
def decode_sanitize_string(buf, off): |
|||
off, val = decode_string(buf, off) |
|||
return off, sanitize_string(val) |
|||
|
|||
def dump_function(buf, off, ind): |
|||
count_inst, count_const, count_funcs = struct.unpack('>LLL', buf[off:off+12]) |
|||
off += 12 |
|||
print '%sInstructions: %d' % (ind, count_inst) |
|||
print '%sConstants: %d' % (ind, count_const) |
|||
print '%sInner functions: %d' % (ind, count_funcs) |
|||
|
|||
nregs, nargs, start_line, end_line = struct.unpack('>HHLL', buf[off:off+12]) |
|||
off += 12 |
|||
print '%sNregs: %d' % (ind, nregs) |
|||
print '%sNargs: %d' % (ind, nargs) |
|||
print '%sStart line number: %d' % (ind, start_line) |
|||
print '%sEnd line number: %d' % (ind, end_line) |
|||
|
|||
compfunc_flags, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%sduk_hcompiledfunction flags: 0x%08x' % (ind, compfunc_flags) |
|||
|
|||
for i in xrange(count_inst): |
|||
ins, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s %06d: %08lx' % (ind, i, ins) |
|||
|
|||
print '%sConstants:' % ind |
|||
for i in xrange(count_const): |
|||
const_type, = struct.unpack('B', buf[off:off+1]) |
|||
off += 1 |
|||
|
|||
if const_type == 0x00: |
|||
off, strdata = decode_sanitize_string(buf, off) |
|||
print '%s %06d: %s' % (ind, i, strdata) |
|||
elif const_type == 0x01: |
|||
num, = struct.unpack('>d', buf[off:off+8]) |
|||
off += 8 |
|||
print '%s %06d: %f' % (ind, i, num) |
|||
else: |
|||
raise Exception('invalid constant type: %d' % const_type) |
|||
|
|||
for i in xrange(count_funcs): |
|||
print '%sInner function %d:' % (ind, i) |
|||
off = dump_function(buf, off, ind + ' ') |
|||
|
|||
val, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s.length: %d' % (ind, val) |
|||
off, val = decode_sanitize_string(buf, off) |
|||
print '%s.name: %s' % (ind, val) |
|||
off, val = decode_sanitize_string(buf, off) |
|||
print '%s.fileName: %s' % (ind, val) |
|||
off, val = decode_string(buf, off) # actually a buffer |
|||
print '%s._Pc2line: %s' % (ind, val.encode('hex')) |
|||
|
|||
while True: |
|||
off, name = decode_string(buf, off) |
|||
if name == '': |
|||
break |
|||
name = sanitize_string(name) |
|||
val, = struct.unpack('>L', buf[off:off+4]) |
|||
off += 4 |
|||
print '%s_Varmap[%s] = %d' % (ind, name, val) |
|||
|
|||
idx = 0 |
|||
while True: |
|||
off, name = decode_string(buf, off) |
|||
if name == '': |
|||
break |
|||
name = sanitize_string(name) |
|||
print '%s_Formals[%d] = %s' % (ind, idx, name) |
|||
idx += 1 |
|||
|
|||
return off |
|||
|
|||
def dump_bytecode(buf, off, ind): |
|||
sig, ver = struct.unpack('BB', buf[off:off+2]) |
|||
off += 2 |
|||
if sig != 0xff: |
|||
raise Exception('invalid signature byte: %d' % sig) |
|||
if ver != 0x00: |
|||
raise Exception('unsupported bytecode version: %d' % ver) |
|||
print '%sBytecode version: 0x%02x' % (ind, ver) |
|||
|
|||
off = dump_function(buf, off, ind + ' ') |
|||
|
|||
return off |
|||
|
|||
def main(): |
|||
parser = optparse.OptionParser() |
|||
parser.add_option('--hex-decode', dest='hex_decode', default=False, action='store_true', help='Input file is ASCII hex encoded, decode before dump') |
|||
(opts, args) = parser.parse_args() |
|||
|
|||
with open(args[0], 'rb') as f: |
|||
d = f.read() |
|||
if opts.hex_decode: |
|||
d = d.strip() |
|||
d = d.decode('hex') |
|||
dump_bytecode(d, 0, '') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,57 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate equivalent year table needed by duk_bi_date.c. Based on: |
|||
# |
|||
# http://code.google.com/p/v8/source/browse/trunk/src/date.h#146 |
|||
# |
|||
|
|||
import datetime |
|||
import pytz |
|||
|
|||
def isleapyear(year): |
|||
if (year % 4) != 0: |
|||
return False |
|||
if (year % 100) != 0: |
|||
return True |
|||
if (year % 400) != 0: |
|||
return False |
|||
return True |
|||
|
|||
def eqyear(weekday, isleap): |
|||
# weekday: 0=Sunday, 1=Monday, ... |
|||
|
|||
if isleap: |
|||
recent_year = 1956 |
|||
else: |
|||
recent_year = 1967 |
|||
recent_year += (weekday * 12) % 28 |
|||
year = 2008 + (recent_year + 3 * 28 - 2008) % 28 |
|||
|
|||
# some assertions |
|||
# |
|||
# Note that Ecmascript internal weekday (0=Sunday) matches neither |
|||
# Python weekday() (0=Monday) nor isoweekday() (1=Monday, 7=Sunday). |
|||
# Python isoweekday() % 7 matches the Ecmascript weekday. |
|||
# https://docs.python.org/2/library/datetime.html#datetime.date.isoweekday |
|||
|
|||
dt = datetime.datetime(year, 1, 1, 0, 0, 0, 0, pytz.UTC) # Jan 1 00:00:00.000 UTC |
|||
#print(weekday, isleap, year, dt.isoweekday(), isleapyear(year)) |
|||
#print(repr(dt)) |
|||
#print(dt.isoformat()) |
|||
|
|||
if isleap != isleapyear(year): |
|||
raise Exception('internal error: equivalent year does not have same leap-year-ness') |
|||
pass |
|||
|
|||
if weekday != dt.isoweekday() % 7: |
|||
raise Exception('internal error: equivalent year does not begin with the same weekday') |
|||
pass |
|||
|
|||
return year |
|||
|
|||
def main(): |
|||
for i in xrange(14): |
|||
print(eqyear(i % 7, i >= 7)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,164 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Generate a size report from a Duktape library / executable. |
|||
# Write out useful information about function sizes in a variety |
|||
# of forms. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import subprocess |
|||
|
|||
#000000000040d200 <duk_to_hstring>: |
|||
# 40d200: 55 push %rbp |
|||
# 40d201: 89 f5 mov %esi,%ebp |
|||
|
|||
re_funcstart = re.compile(r'^[0-9a-fA-F]+\s<(.*?)>:$') |
|||
re_codeline = re.compile(r'^\s*([0-9a-fA-F]+):\s+((?:[0-9a-fA-F][0-9a-fA-F] )*[0-9a-fA-F][0-9a-fA-F])\s+(.*?)\s*$') |
|||
|
|||
def objdump(filename): |
|||
proc = subprocess.Popen(['objdump', '-D', filename], stdout=subprocess.PIPE) |
|||
curr_func = None |
|||
func_start = None |
|||
func_end = None |
|||
ret = {} |
|||
|
|||
def storeFunc(): |
|||
if curr_func is None or func_start is None or func_end is None: |
|||
return |
|||
ret[curr_func] = { |
|||
'name': curr_func, |
|||
'start': func_start, |
|||
'end': func_end, # exclusive |
|||
'length': func_end - func_start |
|||
} |
|||
|
|||
for line in proc.stdout: |
|||
line = line.strip() |
|||
|
|||
m = re_funcstart.match(line) |
|||
if m is not None: |
|||
if curr_func is not None: |
|||
storeFunc() |
|||
curr_func = m.group(1) |
|||
func_start = None |
|||
func_end = None |
|||
|
|||
m = re_codeline.match(line) |
|||
if m is not None: |
|||
func_addr = long(m.group(1), 16) |
|||
func_bytes = m.group(2) |
|||
func_nbytes = len(func_bytes.split(' ')) |
|||
func_instr = m.group(3) |
|||
if func_start is None: |
|||
func_start = func_addr |
|||
func_end = func_addr + func_nbytes |
|||
|
|||
storeFunc() |
|||
|
|||
return ret |
|||
|
|||
def filterFuncs(funcs): |
|||
todo = [] # avoid mutation while iterating |
|||
|
|||
def accept(fun): |
|||
n = fun['name'] |
|||
|
|||
if n in [ '.comment', |
|||
'.dynstr', |
|||
'.dynsym', |
|||
'.eh_frame_hdr', |
|||
'.interp', |
|||
'.rela.dyn', |
|||
'.rela.plt', |
|||
'_DYNAMIC', |
|||
'_GLOBAL_OFFSET_TABLE_', |
|||
'_IO_stdin_used', |
|||
'__CTOR_LIST__', |
|||
'__DTOR_LIST__', |
|||
'_fini', |
|||
'_init', |
|||
'_start', |
|||
'' ]: |
|||
return False |
|||
|
|||
for pfx in [ '.debug', '.gnu', '.note', |
|||
'__FRAME_', '__' ]: |
|||
if n.startswith(pfx): |
|||
return False |
|||
|
|||
return True |
|||
|
|||
for k in funcs.keys(): |
|||
if not accept(funcs[k]): |
|||
todo.append(k) |
|||
|
|||
for k in todo: |
|||
del funcs[k] |
|||
|
|||
def main(): |
|||
funcs = objdump(sys.argv[1]) |
|||
filterFuncs(funcs) |
|||
|
|||
funcs_keys = funcs.keys() |
|||
funcs_keys.sort() |
|||
combined_size_all = 0 |
|||
combined_size_duk = 0 |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
combined_size_all += fun['length'] |
|||
if fun['name'].startswith('duk_'): |
|||
combined_size_duk += fun['length'] |
|||
|
|||
f = sys.stdout |
|||
f.write('<html>') |
|||
f.write('<head>') |
|||
f.write('<title>Size dump for %s</title>' % sys.argv[1]) |
|||
f.write("""\ |
|||
<style type="text/css"> |
|||
tr:nth-child(2n) { |
|||
background: #eeeeee; |
|||
} |
|||
tr:nth-child(2n+1) { |
|||
background: #dddddd; |
|||
} |
|||
</style> |
|||
""") |
|||
f.write('</head>') |
|||
f.write('<body>') |
|||
|
|||
f.write('<h1>Summary</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><td>Entries</td><td>%d</td></tr>' % len(funcs_keys)) |
|||
f.write('<tr><td>Combined size (all)</td><td>%d</td></tr>' % combined_size_all) |
|||
f.write('<tr><td>Combined size (duk_*)</td><td>%d</td></tr>' % combined_size_duk) |
|||
f.write('</table>') |
|||
|
|||
f.write('<h1>Sorted by function name</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><th>Name</th><th>Bytes</th></tr>') |
|||
funcs_keys = funcs.keys() |
|||
funcs_keys.sort() |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length'])) |
|||
f.write('</table>') |
|||
|
|||
f.write('<h1>Sorted by size</h1>') |
|||
f.write('<table>') |
|||
f.write('<tr><th>Name</th><th>Bytes</th></tr>') |
|||
funcs_keys = funcs.keys() |
|||
def cmpSize(a,b): |
|||
return cmp(funcs[a]['length'], funcs[b]['length']) |
|||
funcs_keys.sort(cmp=cmpSize) |
|||
for k in funcs_keys: |
|||
fun = funcs[k] |
|||
f.write('<tr><td>%s</td><td>%d</td></tr>' % (fun['name'], fun['length'])) |
|||
f.write('</table>') |
|||
|
|||
f.write('</body>') |
|||
f.write('</html>') |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,124 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Find a sequence of duk_hobject hash sizes which have a desired 'ratio' |
|||
# and are primes. Prime hash sizes ensure that all probe sequence values |
|||
# (less than hash size) are relatively prime to hash size, i.e. cover the |
|||
# entire hash. Prime data is packed into about 1 byte/prime using a |
|||
# prediction-correction model. |
|||
# |
|||
# Also generates a set of probe steps which are relatively prime to every |
|||
# hash size. |
|||
|
|||
import sys |
|||
import math |
|||
|
|||
def is_prime(n): |
|||
if n == 0: |
|||
return False |
|||
if n == 1 or n == 2: |
|||
return True |
|||
|
|||
n_limit = int(math.ceil(float(n) ** 0.5)) + 1 |
|||
n_limit += 100 # paranoia |
|||
if n_limit >= n: |
|||
n_limit = n - 1 |
|||
for i in xrange(2,n_limit + 1): |
|||
if (n % i) == 0: |
|||
return False |
|||
return True |
|||
|
|||
def next_prime(n): |
|||
while True: |
|||
n += 1 |
|||
if is_prime(n): |
|||
return n |
|||
|
|||
def generate_sizes(min_size, max_size, step_ratio): |
|||
"Generate a set of hash sizes following a nice ratio." |
|||
|
|||
sizes = [] |
|||
ratios = [] |
|||
curr = next_prime(min_size) |
|||
next = curr |
|||
sizes.append(curr) |
|||
|
|||
step_ratio = float(step_ratio) / 1024 |
|||
|
|||
while True: |
|||
if next > max_size: |
|||
break |
|||
ratio = float(next) / float(curr) |
|||
if ratio < step_ratio: |
|||
next = next_prime(next) |
|||
continue |
|||
sys.stdout.write('.'); sys.stdout.flush() |
|||
sizes.append(next) |
|||
ratios.append(ratio) |
|||
curr = next |
|||
next = next_prime(int(next * step_ratio)) |
|||
|
|||
sys.stdout.write('\n'); sys.stdout.flush() |
|||
return sizes, ratios |
|||
|
|||
def generate_corrections(sizes, step_ratio): |
|||
"Generate a set of correction from a ratio-based predictor." |
|||
|
|||
# Generate a correction list for size list, assuming steps follow a certain |
|||
# ratio; this allows us to pack size list into one byte per size |
|||
|
|||
res = [] |
|||
|
|||
res.append(sizes[0]) # first entry is first size |
|||
|
|||
for i in xrange(1, len(sizes)): |
|||
prev = sizes[i - 1] |
|||
pred = int(prev * step_ratio) >> 10 |
|||
diff = int(sizes[i] - pred) |
|||
res.append(diff) |
|||
|
|||
if diff < 0 or diff > 127: |
|||
raise Exception('correction does not fit into 8 bits') |
|||
|
|||
res.append(-1) # negative denotes last end of list |
|||
return res |
|||
|
|||
def generate_probes(count, sizes): |
|||
res = [] |
|||
|
|||
# Generate probe values which are guaranteed to be relatively prime to |
|||
# all generated hash size primes. These don't have to be primes, but |
|||
# we currently use smallest non-conflicting primes here. |
|||
|
|||
i = 2 |
|||
while len(res) < count: |
|||
if is_prime(i) and (i not in sizes): |
|||
if i > 255: |
|||
raise Exception('probe step does not fit into 8 bits') |
|||
res.append(i) |
|||
i += 1 |
|||
continue |
|||
i += 1 |
|||
|
|||
return res |
|||
|
|||
# NB: these must match duk_hobject defines and code |
|||
step_ratio = 1177 # approximately (1.15 * (1 << 10)) |
|||
min_size = 16 |
|||
max_size = 2**32 - 1 |
|||
|
|||
sizes, ratios = generate_sizes(min_size, max_size, step_ratio) |
|||
corrections = generate_corrections(sizes, step_ratio) |
|||
probes = generate_probes(32, sizes) |
|||
print len(sizes) |
|||
print 'SIZES: ' + repr(sizes) |
|||
print 'RATIOS: ' + repr(ratios) |
|||
print 'CORRECTIONS: ' + repr(corrections) |
|||
print 'PROBES: ' + repr(probes) |
|||
|
|||
# highest 32-bit prime |
|||
i = 2**32 |
|||
while True: |
|||
i -= 1 |
|||
if is_prime(i): |
|||
print 'highest 32-bit prime is: %d (0x%08x)' % (i, i) |
|||
break |
@ -0,0 +1,73 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Size report of (stripped) object and source files. |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
|
|||
def getsize(fname): |
|||
return os.stat(fname).st_size |
|||
|
|||
def getlines(fname): |
|||
f = None |
|||
try: |
|||
f = open(fname, 'rb') |
|||
lines = f.read().split('\n') |
|||
return len(lines) |
|||
finally: |
|||
if f is not None: |
|||
f.close() |
|||
f = None |
|||
|
|||
def process(srcfile, objfile): |
|||
srcsize = getsize(srcfile) |
|||
srclines = getlines(srcfile) |
|||
srcbpl = float(srcsize) / float(srclines) |
|||
objsize = getsize(objfile) |
|||
objbpl = float(objsize) / float(srclines) |
|||
|
|||
return objsize, objbpl, srcsize, srclines, srcbpl |
|||
|
|||
def main(): |
|||
tot_srcsize = 0 |
|||
tot_srclines = 0 |
|||
tot_objsize = 0 |
|||
|
|||
tmp = [] |
|||
for i in sys.argv[1:]: |
|||
objfile = i |
|||
if i.endswith('.strip'): |
|||
objname = i[:-6] |
|||
else: |
|||
objname = i |
|||
base, ext = os.path.splitext(objname) |
|||
srcfile = base + '.c' |
|||
|
|||
objsize, objbpl, srcsize, srclines, srcbpl = process(srcfile, objfile) |
|||
srcbase = os.path.basename(srcfile) |
|||
objbase = os.path.basename(objname) # foo.o.strip -> present as foo.o |
|||
tot_srcsize += srcsize |
|||
tot_srclines += srclines |
|||
tot_objsize += objsize |
|||
tmp.append((srcbase, srcsize, srclines, srcbpl, objbase, objsize, objbpl)) |
|||
|
|||
def mycmp(a,b): |
|||
return cmp(a[5], b[5]) |
|||
|
|||
tmp.sort(cmp=mycmp, reverse=True) # sort by object size |
|||
fmt = '%-20s size=%-7d lines=%-6d bpl=%-6.3f --> %-20s size=%-7d bpl=%-6.3f' |
|||
for srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl in tmp: |
|||
print(fmt % (srcfile, srcsize, srclines, srcbpl, objfile, objsize, objbpl)) |
|||
|
|||
print('========================================================================') |
|||
print(fmt % ('TOTAL', tot_srcsize, tot_srclines, float(tot_srcsize) / float(tot_srclines), |
|||
'', tot_objsize, float(tot_objsize) / float(tot_srclines))) |
|||
|
|||
if __name__ == '__main__': |
|||
# Usage: |
|||
# |
|||
# $ strip *.o |
|||
# $ python genobjsizereport.py *.o |
|||
|
|||
main() |
@ -1,5 +0,0 @@ |
|||
import os, sys, json, yaml |
|||
|
|||
if __name__ == '__main__': |
|||
# Use safe_dump() instead of dump() to avoid tags like "!!python/unicode" |
|||
print(yaml.safe_dump(json.load(sys.stdin), default_flow_style=False)) |
File diff suppressed because it is too large
@ -0,0 +1,33 @@ |
|||
#!usr/bin/env python2 |
|||
# |
|||
# RDF graph diff, useful for diffing SPDX license for release checklist. |
|||
# |
|||
# Based on: |
|||
# |
|||
# - https://www.w3.org/2001/sw/wiki/How_to_diff_RDF |
|||
# - https://github.com/RDFLib/rdflib/blob/master/rdflib/compare.py |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
|
|||
def main(): |
|||
from rdflib import Graph |
|||
from rdflib.compare import to_isomorphic, graph_diff |
|||
|
|||
with open(sys.argv[1]) as f: |
|||
d1 = f.read() |
|||
with open(sys.argv[2]) as f: |
|||
d2 = f.read() |
|||
|
|||
print('Loading graph 1 from ' + sys.argv[1]) |
|||
g1 = Graph().parse(format='n3', data=d1) |
|||
|
|||
print('Loading graph 2 from ' + sys.argv[2]) |
|||
g2 = Graph().parse(format='n3', data=d2) |
|||
|
|||
iso1 = to_isomorphic(g1) |
|||
iso2 = to_isomorphic(g2) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,26 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Resolve a line number in the combined source into an uncombined file/line |
|||
# using a dist/src/metadata.json file. |
|||
# |
|||
# Usage: $ python resolve_combined_lineno.py dist/src/metadata.json 12345 |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import json |
|||
|
|||
def main(): |
|||
with open(sys.argv[1], 'rb') as f: |
|||
metadata = json.loads(f.read()) |
|||
lineno = int(sys.argv[2]) |
|||
|
|||
for e in reversed(metadata['line_map']): |
|||
if lineno >= e['combined_line']: |
|||
orig_lineno = e['original_line'] + (lineno - e['combined_line']) |
|||
print('%s:%d -> %s:%d' % ('duktape.c', lineno, |
|||
e['original_file'], orig_lineno)) |
|||
break |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,135 +0,0 @@ |
|||
#!/usr/bin/env python2 |
|||
# |
|||
# Scan potential external strings from Ecmascript and C files. |
|||
# |
|||
# Very simplistic example with a lot of limitations: |
|||
# |
|||
# - Doesn't handle multiple variables in a variable declaration |
|||
# |
|||
# - Only extracts strings from C files, these may correspond to |
|||
# Duktape/C bindings (but in many cases don't) |
|||
# |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
import json |
|||
|
|||
strmap = {} |
|||
|
|||
# Ecmascript function declaration |
|||
re_funcname = re.compile(r'function\s+(\w+)', re.UNICODE) |
|||
|
|||
# Ecmascript variable declaration |
|||
# XXX: doesn't handle multiple variables |
|||
re_vardecl = re.compile(r'var\s+(\w+)', re.UNICODE) |
|||
|
|||
# Ecmascript variable assignment |
|||
re_varassign = re.compile(r'(\w+)\s*=\s*', re.UNICODE) |
|||
|
|||
# Ecmascript dotted property reference (also matches numbers like |
|||
# '4.0', which are separately rejected below) |
|||
re_propref = re.compile(r'(\w+(?:\.\w+)+)', re.UNICODE) |
|||
re_digits = re.compile(r'^\d+$', re.UNICODE) |
|||
|
|||
# Ecmascript or C string literal |
|||
re_strlit_dquot = re.compile(r'("(?:\\"|\\\\|[^"])*")', re.UNICODE) |
|||
re_strlit_squot = re.compile(r'(\'(?:\\\'|\\\\|[^\'])*\')', re.UNICODE) |
|||
|
|||
def strDecode(x): |
|||
# Need to decode hex, unicode, and other escapes. Python syntax |
|||
# is close enough to C and Ecmascript so use eval for now. |
|||
|
|||
try: |
|||
return eval('u' + x) # interpret as unicode string |
|||
except: |
|||
sys.stderr.write('Failed to parse: ' + repr(x) + ', ignoring\n') |
|||
return None |
|||
|
|||
def scan(f, fn): |
|||
global strmap |
|||
|
|||
# Scan rules depend on file type |
|||
if fn[-2:] == '.c': |
|||
use_funcname = False |
|||
use_vardecl = False |
|||
use_varassign = False |
|||
use_propref = False |
|||
use_strlit_dquot = True |
|||
use_strlit_squot = False |
|||
else: |
|||
use_funcname = True |
|||
use_vardecl = True |
|||
use_varassign = True |
|||
use_propref = True |
|||
use_strlit_dquot = True |
|||
use_strlit_squot = True |
|||
|
|||
for line in f: |
|||
# Assume input data is UTF-8 |
|||
line = line.decode('utf-8') |
|||
|
|||
if use_funcname: |
|||
for m in re_funcname.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_vardecl: |
|||
for m in re_vardecl.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_varassign: |
|||
for m in re_varassign.finditer(line): |
|||
strmap[m.group(1)] = True |
|||
|
|||
if use_propref: |
|||
for m in re_propref.finditer(line): |
|||
parts = m.group(1).split('.') |
|||
if re_digits.match(parts[0]) is not None: |
|||
# Probably a number ('4.0' or such) |
|||
pass |
|||
else: |
|||
for part in parts: |
|||
strmap[part] = True |
|||
|
|||
if use_strlit_dquot: |
|||
for m in re_strlit_dquot.finditer(line): |
|||
s = strDecode(m.group(1)) |
|||
if s is not None: |
|||
strmap[s] = True |
|||
|
|||
if use_strlit_squot: |
|||
for m in re_strlit_squot.finditer(line): |
|||
s = strDecode(m.group(1)) |
|||
if s is not None: |
|||
strmap[s] = True |
|||
|
|||
def main(): |
|||
for fn in sys.argv[1:]: |
|||
f = open(fn, 'rb') |
|||
scan(f, fn) |
|||
f.close() |
|||
|
|||
strs = [] |
|||
strs_base64 = [] |
|||
doc = { |
|||
# Strings as Unicode strings |
|||
'scanned_strings': strs, |
|||
|
|||
# Strings as base64-encoded UTF-8 data, which should be ready |
|||
# to be used in C code (Duktape internal string representation |
|||
# is UTF-8) |
|||
'scanned_strings_base64': strs_base64 |
|||
} |
|||
k = strmap.keys() |
|||
k.sort() |
|||
for s in k: |
|||
strs.append(s) |
|||
t = s.encode('utf-8').encode('base64') |
|||
if len(t) > 0 and t[-1] == '\n': |
|||
t = t[0:-1] |
|||
strs_base64.append(t) |
|||
|
|||
print(json.dumps(doc, indent=4, ensure_ascii=True, sort_keys=True)) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
Loading…
Reference in new issue