You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

51 lines
1.6 KiB

#!/usr/bin/env python2
#
# UnicodeData.txt may contain ranges in addition to individual characters.
# Unpack the ranges into individual characters for the other scripts to use.
#
import os
import sys
import optparse
def main():
parser = optparse.OptionParser()
parser.add_option('--unicode-data', dest='unicode_data')
parser.add_option('--output', dest='output')
parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
(opts, args) = parser.parse_args()
assert(opts.unicode_data is not None)
assert(opts.output is not None)
f_in = open(opts.unicode_data, 'rb')
f_out = open(opts.output, 'wb')
while True:
line = f_in.readline()
if line == '' or line == '\n':
break
parts = line.split(';') # keep newline
if parts[1].endswith('First>'):
line2 = f_in.readline()
parts2 = line2.split(';')
if not parts2[1].endswith('Last>'):
raise Exception('cannot parse range')
cp1 = long(parts[0], 16)
cp2 = long(parts2[0], 16)
tmp = parts[1:]
tmp[0] = '-""-'
suffix = ';'.join(tmp)
f_out.write(line)
for i in xrange(cp1 + 1, cp2):
f_out.write('%04X;%s' % (i, suffix))
f_out.write(line2)
else:
f_out.write(line)
f_in.close()
f_out.flush()
f_out.close()
if __name__ == '__main__':
main()