#!/usr/bin/env python2 # # Combine a set of a source files into a single C file. # # Overview of the process: # # * Parse user supplied C files. Add automatic #undefs at the end # of each C file to avoid defined bleeding from one file to another. # # * Combine the C files in specified order. If sources have ordering # dependencies (depends on application), order may matter. # # * Process #include statements in the combined source, categorizing # them either as "internal" (found in specified include path) or # "external". Internal includes, unless explicitly excluded, are # inlined into the result while extenal includes are left as is. # Duplicate #include statements are replaced with a comment. # # At every step, source and header lines are represented with explicit # line objects which keep track of original filename and line. The # output contains #line directives, if necessary, to ensure error # throwing and other diagnostic info will work in a useful manner when # deployed. It's also possible to generate a combined source with no # #line directives. # # Making the process deterministic is important, so that if users have # diffs that they apply to the combined source, such diffs would apply # for as long as possible. # # Limitations and notes: # # * While there are automatic #undef's for #define's introduced in each # C file, it's not possible to "undefine" structs, unions, etc. If # there are structs/unions/typedefs with conflicting names, these # have to be resolved in the source files first. # # * Because duplicate #include statements are suppressed, currently # assumes #include statements are not conditional. # # * A system header might be #include'd in multiple source files with # different feature defines (like _BSD_SOURCE). Because the #include # file will only appear once in the resulting source, the first # occurrence wins. The result may not work correctly if the feature # defines must actually be different between two or more source files. # import os import sys import re import json import optparse # Include path for finding include files which are amalgamated. include_paths = [] # Include files specifically excluded from being inlined. include_excluded = [] class File: filename_full = None filename = None lines = None def __init__(self, filename, lines): self.filename = os.path.basename(filename) self.filename_full = filename self.lines = lines class Line: filename_full = None filename = None lineno = None data = None def __init__(self, filename, lineno, data): self.filename = os.path.basename(filename) self.filename_full = filename self.lineno = lineno self.data = data def readFile(filename): lines = [] with open(filename, 'rb') as f: lineno = 0 for line in f: lineno += 1 if len(line) > 0 and line[-1] == '\n': line = line[:-1] lines.append(Line(filename, lineno, line)) return File(filename, lines) def lookupInclude(incfn): re_sep = re.compile(r'/|\\') inccomp = re.split(re_sep, incfn) # split include path, support / and \ for path in include_paths: fn = apply(os.path.join, [ path ] + inccomp) if os.path.exists(fn): return fn # Return full path to first match return None def addAutomaticUndefs(f): defined = {} re_def = re.compile(r'#define\s+(\w+).*$') re_undef = re.compile(r'#undef\s+(\w+).*$') for line in f.lines: m = re_def.match(line.data) if m is not None: #print('DEFINED: %s' % repr(m.group(1))) defined[m.group(1)] = True m = re_undef.match(line.data) if m is not None: # Could just ignore #undef's here: we'd then emit # reliable #undef's (though maybe duplicates) at # the end. #print('UNDEFINED: %s' % repr(m.group(1))) if defined.has_key(m.group(1)): del defined[m.group(1)] # Undefine anything that seems to be left defined. This not a 100% # process because some #undef's might be conditional which we don't # track at the moment. Note that it's safe to #undef something that's # not defined. keys = sorted(defined.keys()) # deterministic order if len(keys) > 0: #print('STILL DEFINED: %r' % repr(defined.keys())) f.lines.append(Line(f.filename, len(f.lines) + 1, '')) f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */')) for k in keys: f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k)) def createCombined(files, prologue_filename, line_directives): res = [] line_map = [] # indicate combined source lines where uncombined file/line would change metadata = { 'line_map': line_map } emit_state = [ None, None ] # curr_filename, curr_lineno def emit(line): if isinstance(line, (str, unicode)): res.append(line) emit_state[1] += 1 else: if line.filename != emit_state[0] or line.lineno != emit_state[1]: if line_directives: res.append('#line %d "%s"' % (line.lineno, line.filename)) line_map.append({ 'original_file': line.filename, 'original_line': line.lineno, 'combined_line': len(res) + 1 }) res.append(line.data) emit_state[0] = line.filename emit_state[1] = line.lineno + 1 included = {} # headers already included if prologue_filename is not None: with open(prologue_filename, 'rb') as f: for line in f.read().split('\n'): res.append(line) re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$') # Process a file, appending it to the result; the input may be a # source or an include file. #include directives are handled # recursively. def processFile(f): #print('Process file: ' + f.filename) for line in f.lines: if not line.data.startswith('#include'): emit(line) continue m = re_inc.match(line.data) if m is None: raise Exception('Couldn\'t match #include line: %s' % repr(line.data)) incpath = m.group(2) if incpath in include_excluded: # Specific include files excluded from the # inlining / duplicate suppression process. emit(line) # keep as is continue if included.has_key(incpath): # We suppress duplicate includes, both internal and # external, based on the assumption that includes are # not behind #ifdef checks. This is the case for # Duktape (except for the include files excluded). emit('/* #include %s -> already included */' % incpath) continue included[incpath] = True # An include file is considered "internal" and is amalgamated # if it is found in the include path provided by the user. incfile = lookupInclude(incpath) if incfile is not None: #print('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile))) emit('/* #include %s */' % incpath) processFile(readFile(incfile)) else: #print('Include considered external: %s' % repr(line.data)) emit(line) # keep as is for f in files: processFile(f) return '\n'.join(res) + '\n', metadata def main(): global include_paths, include_excluded parser = optparse.OptionParser() parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times') parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)') parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file') parser.add_option('--output-source', dest='output_source', help='Output source filename') parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename') parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source') (opts, args) = parser.parse_args() assert(opts.include_paths is not None) include_paths = opts.include_paths # global for easy access include_excluded = opts.include_excluded assert(opts.output_source) assert(opts.output_metadata) print('Read input files, add automatic #undefs') sources = args files = [] for fn in sources: res = readFile(fn) #print('Add automatic undefs for: ' + fn) addAutomaticUndefs(res) files.append(res) print('Create combined source file from %d source files' % len(files)) combined_source, metadata = \ createCombined(files, opts.prologue, opts.line_directives) with open(opts.output_source, 'wb') as f: f.write(combined_source) with open(opts.output_metadata, 'wb') as f: f.write(json.dumps(metadata, indent=4)) print('Wrote %d bytes to %s' % (len(combined_source), opts.output_source)) if __name__ == '__main__': main()