duktape/util/combine_src.py


								#!/usr/bin/env python2

								#

								#  Combine a set of a source files into a single C file.

								#

								#  Overview of the process:

								#

								#    * Parse user supplied C files.  Add automatic #undefs at the end

								#      of each C file to avoid defined bleeding from one file to another.

								#

								#    * Combine the C files in specified order.  If sources have ordering

								#      dependencies (depends on application), order may matter.

								#

								#    * Process #include statements in the combined source, categorizing

								#      them either as "internal" (found in specified include path) or

								#      "external".  Internal includes, unless explicitly excluded, are

								#      inlined into the result while extenal includes are left as is.

								#      Duplicate #include statements are replaced with a comment.

								#

								#  At every step, source and header lines are represented with explicit

								#  line objects which keep track of original filename and line.  The

								#  output contains #line directives, if necessary, to ensure error

								#  throwing and other diagnostic info will work in a useful manner when

								#  deployed.  It's also possible to generate a combined source with no

								#  #line directives.

								#

								#  Making the process deterministic is important, so that if users have

								#  diffs that they apply to the combined source, such diffs would apply

								#  for as long as possible.

								#

								#  Limitations and notes:

								#

								#    * While there are automatic #undef's for #define's introduced in each

								#      C file, it's not possible to "undefine" structs, unions, etc.  If

								#      there are structs/unions/typedefs with conflicting names, these

								#      have to be resolved in the source files first.

								#

								#     * Because duplicate #include statements are suppressed, currently

								#       assumes #include statements are not conditional.

								#

								#     * A system header might be #include'd in multiple source files with

								#       different feature defines (like _BSD_SOURCE).  Because the #include

								#       file will only appear once in the resulting source, the first

								#       occurrence wins.  The result may not work correctly if the feature

								#       defines must actually be different between two or more source files.

								#


								import os

								import sys

								import re

								import json

								import optparse


								# Include path for finding include files which are amalgamated.

								include_paths = []


								# Include files specifically excluded from being inlined.

								include_excluded = []


								class File:

									filename_full = None

									filename = None

									lines = None


									def __init__(self, filename, lines):

										self.filename = os.path.basename(filename)

										self.filename_full = filename

										self.lines = lines


								class Line:

									filename_full = None

									filename = None

									lineno = None

									data = None


									def __init__(self, filename, lineno, data):

										self.filename = os.path.basename(filename)

										self.filename_full = filename

										self.lineno = lineno

										self.data = data


								def readFile(filename):

									lines = []


									with open(filename, 'rb') as f:

										lineno = 0

										for line in f:

											lineno += 1

											if len(line) > 0 and line[-1] == '\n':

												line = line[:-1]

											lines.append(Line(filename, lineno, line))


									return File(filename, lines)


								def lookupInclude(incfn):

									re_sep = re.compile(r'/|\\')


									inccomp = re.split(re_sep, incfn)  # split include path, support / and \


									for path in include_paths:

										fn = apply(os.path.join, [ path ] + inccomp)

										if os.path.exists(fn):

											return fn  # Return full path to first match


									return None


								def addAutomaticUndefs(f):

									defined = {}


									re_def = re.compile(r'#define\s+(\w+).*$')

									re_undef = re.compile(r'#undef\s+(\w+).*$')


									for line in f.lines:

										m = re_def.match(line.data)

										if m is not None:

											#print('DEFINED: %s' % repr(m.group(1)))

											defined[m.group(1)] = True

										m = re_undef.match(line.data)

										if m is not None:

											# Could just ignore #undef's here: we'd then emit

											# reliable #undef's (though maybe duplicates) at

											# the end.

											#print('UNDEFINED: %s' % repr(m.group(1)))

											if defined.has_key(m.group(1)):

												del defined[m.group(1)]


									# Undefine anything that seems to be left defined.  This not a 100%

									# process because some #undef's might be conditional which we don't

									# track at the moment.  Note that it's safe to #undef something that's

									# not defined.


									keys = sorted(defined.keys())  # deterministic order

									if len(keys) > 0:

										#print('STILL DEFINED: %r' % repr(defined.keys()))

										f.lines.append(Line(f.filename, len(f.lines) + 1, ''))

										f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))

										for k in keys:

											f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))


								def createCombined(files, prologue_filename, line_directives):

									res = []

									line_map = []   # indicate combined source lines where uncombined file/line would change

									metadata = {

										'line_map': line_map

									}


									emit_state = [ None, None ]  # curr_filename, curr_lineno


									def emit(line):

										if isinstance(line, (str, unicode)):

											res.append(line)

											emit_state[1] += 1

										else:

											if line.filename != emit_state[0] or line.lineno != emit_state[1]:

												if line_directives:

													res.append('#line %d "%s"' % (line.lineno, line.filename))

												line_map.append({ 'original_file': line.filename,

												                  'original_line': line.lineno,

												                  'combined_line': len(res) + 1 })

											res.append(line.data)

											emit_state[0] = line.filename

											emit_state[1] = line.lineno + 1


									included = {}  # headers already included


									if prologue_filename is not None:

										with open(prologue_filename, 'rb') as f:

											for line in f.read().split('\n'):

												res.append(line)


									re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')


									# Process a file, appending it to the result; the input may be a

									# source or an include file.  #include directives are handled

									# recursively.

									def processFile(f):

										#print('Process file: ' + f.filename)


										for line in f.lines:

											if not line.data.startswith('#include'):

												emit(line)

												continue


											m = re_inc.match(line.data)

											if m is None:

												raise Exception('Couldn\'t match #include line: %s' % repr(line.data))

											incpath = m.group(2)

											if incpath in include_excluded:

												# Specific include files excluded from the

												# inlining / duplicate suppression process.

												emit(line)  # keep as is

												continue


											if included.has_key(incpath):

												# We suppress duplicate includes, both internal and

												# external, based on the assumption that includes are

												# not behind #ifdef checks.  This is the case for

												# Duktape (except for the include files excluded).

												emit('/* #include %s -> already included */' % incpath)

												continue

											included[incpath] = True


											# An include file is considered "internal" and is amalgamated

											# if it is found in the include path provided by the user.


											incfile = lookupInclude(incpath)

											if incfile is not None:

												#print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))

												emit('/* #include %s */' % incpath)

												processFile(readFile(incfile))

											else:

												#print('Include considered external: %s' % repr(line.data))

												emit(line)  # keep as is


									for f in files:

										processFile(f)


									return '\n'.join(res) + '\n', metadata


								def main():

									global include_paths, include_excluded


									parser = optparse.OptionParser()

									parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')

									parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')

									parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')

									parser.add_option('--output-source', dest='output_source', help='Output source filename')

									parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')

									parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')

									(opts, args) = parser.parse_args()


									assert(opts.include_paths is not None)

									include_paths = opts.include_paths  # global for easy access

									include_excluded = opts.include_excluded

									assert(opts.output_source)

									assert(opts.output_metadata)


									print('Read input files, add automatic #undefs')

									sources = args

									files = []

									for fn in sources:

										res = readFile(fn)

										#print('Add automatic undefs for: ' + fn)

										addAutomaticUndefs(res)

										files.append(res)


									print('Create combined source file from %d source files' % len(files))

									combined_source, metadata = \

									    createCombined(files, opts.prologue, opts.line_directives)

									with open(opts.output_source, 'wb') as f:

										f.write(combined_source)

									with open(opts.output_metadata, 'wb') as f:

										f.write(json.dumps(metadata, indent=4))


									print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source))


								if __name__ == '__main__':

									main()