#!/usr/bin/env python2 # # Build Duktape website. Must be run with cwd in the website/ directory. # import os import sys import traceback import time import datetime import shutil import re import tempfile import atexit import md5 import json import yaml from bs4 import BeautifulSoup, Tag colorize = True fancy_stack = True remove_fixme = True testcase_refs = False list_tags = False floating_list_tags = True fancy_releaselog = True dt_now = datetime.datetime.utcnow() def readFile(x): f = open(x, 'rb') data = f.read() f.close() return data def htmlEscape(x): res = '' esc = '&<>' for c in x: if ord(c) >= 0x20 and ord(c) <= 0x7e and c not in esc: res += c else: res += '%04x;' % ord(c) return res def getAutodeleteTempname(): tmp = tempfile.mktemp(suffix='duktape-website') def f(): os.remove(tmp) atexit.register(f) return tmp # also escapes text automatically def sourceHighlight(x, sourceLang): tmp1 = getAutodeleteTempname() tmp2 = getAutodeleteTempname() f = open(tmp1, 'wb') # FIXME f.write(x) f.close() # FIXME: safer execution os.system('source-highlight -s %s -c highlight.css --no-doc <"%s" >"%s"' % \ (sourceLang, tmp1, tmp2)) f = open(tmp2, 'rb') res = f.read() f.close() return res def rst2Html(filename): tmp1 = getAutodeleteTempname() # FIXME: safer execution os.system('rst2html "%s" >"%s"' % \ (filename, tmp1)) f = open(tmp1, 'rb') res = f.read() f.close() return res def getFileMd5(filename): if not os.path.exists(filename): return None f = open(filename, 'rb') d = f.read() f.close() return md5.md5(d).digest().encode('hex') def stripNewline(x): if len(x) > 0 and x[-1] == '\n': return x[:-1] return x def splitNewlineNoLastEmpty(x): assert(x is not None) res = x.split('\n') if len(res) > 0 and res[-1] == '': res = res[:-1] return res def validateAndParseHtml(data): # first parse as xml to get errors out ign_soup = BeautifulSoup(data, 'xml') # then parse as lenient html, no xml tags etc soup = BeautifulSoup(data) return soup re_stack_line = re.compile(r'^(\[[^\x5d]+\])(?:\s+->\s+(\[[^\x5d]+\]))?(?:\s+(.*?))?\s*$') def renderFancyStack(inp_line): # Support various notations here: # # [ a b c ] # [ a b c ] -> [ d e f ] # [ a b c ] -> [ d e f ] (if foo) # m = re_stack_line.match(inp_line) #print(inp_line) assert(m is not None) stacks = [ m.group(1) ] if m.group(2) is not None: stacks.append(m.group(2)) res = [] res.append('
') res.append('') for i in p: res.append(htmlEscape(i)) res.append('') res.append('
' + \ '%s' % htmlEscape(line) + \ '') res.append('
(No effect on value stack.)
') res.append('', assume it is raw HTML; otherwise # assume it is a single paragraph (with no markup) and generate # paragraph tags, escaping into HTML raw_html = False for i in p: if '
' in i: raw_html = True if raw_html: for i in p: res.append(i) else: res.append('
') for i in p: res.append(htmlEscape(i)) res.append('
') res.append('') for i in p: res.append(htmlEscape(i)) res.append('') res.append('
None.
') res.append('') p = doc['tags'] for idx, val in enumerate(p): if idx > 0: res.append(' ') res.append(htmlEscape(val)) res.append('
') res.append('..., get rid of new_elem = BeautifulSoup(colorized).tt # XXX: parse just a fragment - how? new_elem.name = 'pre' new_elem['class'] = cssClass if origTitle is not None: # Preserve title (hover tool tip) new_elem['title'] = origTitle elem.replace_with(new_elem) def transformFancyStacks(soup): for elem in soup.select('pre.stack'): input_str = elem.string if len(input_str) > 0 and input_str[0] == '\n': # hack for leading empty line input_str = input_str[1:] new_elem = BeautifulSoup(renderFancyStack(input_str)).div # XXX: fragment? elem.replace_with(new_elem) def transformRemoveClass(soup, cssClass): for elem in soup.select('.' + cssClass): elem.extract() def transformReadIncludes(soup, includeDirs): for elem in soup.select('*'): if not elem.has_key('include'): continue filename = elem['include'] del elem['include'] d = None for incdir in includeDirs: fn = os.path.join(incdir, filename) if os.path.exists(fn): f = open(fn, 'rb') d = f.read() f.close() break if d is None: raise Exception('cannot find include file: ' + repr(filename)) if filename.endswith('.html'): new_elem = BeautifulSoup(d).div elem.replace_with(new_elem) else: elem.string = d def transformVersionNumber(soup, verstr): for elem in soup.select('.duktape-version'): elem.replaceWith(verstr) def transformCurrentDate(soup): curr_date = '%04d-%02d-%02d' % (dt_now.year, dt_now.month, dt_now.day) for elem in soup.select('.current-date'): elem.replaceWith(curr_date) def transformAddHrBeforeH1(soup): for elem in soup.select('h1'): elem.insert_before(soup.new_tag('hr')) # Add automatic anchors so that a basename from an element with an explicit # ID is appended with dotted number(s). Note that headings do not actually # nest in the document, so this is now based on document order traversal and # keeping track of counts of headings at different levels, and the active # explicit IDs at each level. def transformAddAutoAnchorsNumbered(soup): level_counts = [ 0, 0, 0, 0, 0, 0 ] # h1, h2, h3, h4, h5, h6 level_ids = [ None, None, None, None, None, None ] # explicit IDs hdr_tags = { 'h1': 0, 'h2': 1, 'h3': 2, 'h4': 3, 'h5': 4, 'h6': 5 } changes = [] def _proc(root, state): idx = hdr_tags.get(root.name, None) if idx is None: return # bump count at matching level and zero lower levels level_counts[idx] += 1 for i in xrange(idx + 1, 6): level_counts[i] = 0 # set explicit ID for current level if root.has_key('id'): level_ids[idx] = root['id'] return # no explicit ID at current level, clear it level_ids[idx] = None # figure out an automatic ID: closest explicit ID + dotted # numbers to current level parts = [] for i in xrange(idx, -1, -1): # idx, idx-1, ..., 0 if level_ids[i] is not None: parts.append(level_ids[i]) break parts.append(str(level_counts[i])) if i == 0: parts.append('doc') # if no ID in path, use e.g. 'doc.1.2' parts.reverse() auto_id = '.'.join(parts) # avoid mutation: record changes to be made first # (adding 'id' would be OK, but this is more flexible # if explicit anchors are added instead / in addition # to 'id' attributes) changes.append((root, auto_id)) def _rec(root, state): if not isinstance(root, Tag): return _proc(root, state) for elem in root.children: _rec(elem, state) _rec(soup.select('body')[0], {}) for elem, auto_id in changes: elem['id'] = auto_id # Add automatic anchors where section headings are used to autogenerate # suitable names. This does not work very well: there are many subsections # with the name "Example" or "Limitations", for instance. Prepending the # parent name (or rather names of all the parents) would create very long # names. def transformAddAutoAnchorsNamed(soup): hdr_tags = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ] ids = {} def findAutoName(txt): # simple name sanitation, not very well thought out; goal is to get # nice web-like anchor names from whatever titles are present txt = txt.strip().lower() if len(txt) > 1 and txt[0] == '.': txt = txt[1:] # leading dot convention for API section names txt = txt.replace('c++', 'cpp') txt = txt.replace('. ', ' ') # e.g. 'vs.' -> 'vs' txt = txt.replace(', ', ' ') # e.g. 'foo, bar' -> 'foo bar' txt = txt.replace(' ', '_') res = '' for i,c in enumerate(txt): if (ord(c) >= ord('a') and ord(c) <= ord('z')) or \ (ord(c) >= ord('A') and ord(c) <= ord('Z')) or \ (ord(c) >= ord('0') and ord(c) <= ord('9') and i > 0) or \ c in '_': res += c elif c in '()[]{}?\'"': pass # eat else: res += '_' return res for elem in soup.select('*'): if not elem.has_key('id'): continue e_id = elem['id'] if ids.has_key(e_id): print('WARNING: duplicate id %s' % e_id) ids[e_id] = True # add automatic anchors for every other heading, with priority in # naming for higher level sections (e.g. h2 over h3) for hdr in hdr_tags: for elem in soup.select(hdr): if elem.has_key('id'): continue # already has an id anchor e_name = elem.text a_name = findAutoName(e_name) if ids.has_key(a_name): print('WARNING: cannot generate automatic anchor name for %s (already exists)' % e_name) continue ids[a_name] = True elem['id'] = a_name def transformAddHeadingLinks(soup): hdr_tags = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ] changes = [] for elem in soup.select('*'): if elem.name not in hdr_tags or not elem.has_key('id'): continue new_elem = soup.new_tag('a') new_elem['href'] = '#' + elem['id'] new_elem['class'] = 'sectionlink' new_elem.string = u'\u00a7' # section sign # avoid mutation while iterating changes.append((elem, new_elem)) for elem, new_elem in changes: if elem.has_key('class'): elem['class'].append('sectiontitle') else: elem['class'] = 'sectiontitle' elem.append(' ') elem.append(new_elem) def setNavSelected(soup, pagename): # pagename must match