|
|
|
#!/usr/bin/env python2
|
|
|
|
#
|
|
|
|
# UnicodeData.txt may contain ranges in addition to individual characters.
|
|
|
|
# Unpack the ranges into individual characters for the other scripts to use.
|
|
|
|
#
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import optparse
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = optparse.OptionParser()
|
|
|
|
parser.add_option('--unicode-data', dest='unicode_data')
|
|
|
|
parser.add_option('--output', dest='output')
|
|
|
|
parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
|
|
|
|
parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
|
|
|
|
(opts, args) = parser.parse_args()
|
|
|
|
assert(opts.unicode_data is not None)
|
|
|
|
assert(opts.output is not None)
|
|
|
|
|
|
|
|
f_in = open(opts.unicode_data, 'rb')
|
|
|
|
f_out = open(opts.output, 'wb')
|
|
|
|
while True:
|
|
|
|
line = f_in.readline()
|
|
|
|
if line == '' or line == '\n':
|
|
|
|
break
|
|
|
|
parts = line.split(';') # keep newline
|
|
|
|
if parts[1].endswith('First>'):
|
|
|
|
line2 = f_in.readline()
|
|
|
|
parts2 = line2.split(';')
|
|
|
|
if not parts2[1].endswith('Last>'):
|
|
|
|
raise Exception('cannot parse range')
|
|
|
|
cp1 = long(parts[0], 16)
|
|
|
|
cp2 = long(parts2[0], 16)
|
|
|
|
|
|
|
|
tmp = parts[1:]
|
|
|
|
tmp[0] = '-""-'
|
|
|
|
suffix = ';'.join(tmp)
|
|
|
|
f_out.write(line)
|
|
|
|
for i in xrange(cp1 + 1, cp2):
|
|
|
|
f_out.write('%04X;%s' % (i, suffix))
|
|
|
|
f_out.write(line2)
|
|
|
|
else:
|
|
|
|
f_out.write(line)
|
|
|
|
|
|
|
|
f_in.close()
|
|
|
|
f_out.flush()
|
|
|
|
f_out.close()
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|