|
@ -5,16 +5,168 @@ |
|
|
#ifndef DUK_UNICODE_H_INCLUDED |
|
|
#ifndef DUK_UNICODE_H_INCLUDED |
|
|
#define DUK_UNICODE_H_INCLUDED |
|
|
#define DUK_UNICODE_H_INCLUDED |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* UTF-8 / XUTF-8 / CESU-8 constants |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
#define DUK_UNICODE_MAX_XUTF8_LENGTH 7 /* up to 36 bit codepoints */ |
|
|
#define DUK_UNICODE_MAX_XUTF8_LENGTH 7 /* up to 36 bit codepoints */ |
|
|
#define DUK_UNICODE_MAX_CESU8_LENGTH 6 /* all codepoints up to U+10FFFF */ |
|
|
#define DUK_UNICODE_MAX_CESU8_LENGTH 6 /* all codepoints up to U+10FFFF */ |
|
|
|
|
|
|
|
|
/* A few useful codepoints. Integer constants must be signed to avoid
|
|
|
/*
|
|
|
* unexpected coercions in comparisons. |
|
|
* Useful Unicode codepoints |
|
|
|
|
|
* |
|
|
|
|
|
* Integer constants must be signed to avoid unexpected coercions |
|
|
|
|
|
* in comparisons. |
|
|
*/ |
|
|
*/ |
|
|
|
|
|
|
|
|
#define DUK_UNICODE_CP_ZWNJ 0x200cL /* zero-width non-joiner */ |
|
|
#define DUK_UNICODE_CP_ZWNJ 0x200cL /* zero-width non-joiner */ |
|
|
#define DUK_UNICODE_CP_ZWJ 0x200dL /* zero-width joiner */ |
|
|
#define DUK_UNICODE_CP_ZWJ 0x200dL /* zero-width joiner */ |
|
|
#define DUK_UNICODE_CP_REPLACEMENT_CHARACTER 0xfffdL /* http://en.wikipedia.org/wiki/Replacement_character#Replacement_character */ |
|
|
#define DUK_UNICODE_CP_REPLACEMENT_CHARACTER 0xfffdL /* http://en.wikipedia.org/wiki/Replacement_character#Replacement_character */ |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* ASCII character constants |
|
|
|
|
|
* |
|
|
|
|
|
* C character literals like 'x' have a platform specific value and do |
|
|
|
|
|
* not match ASCII (UTF-8) values on e.g. EBCDIC platforms. So, use |
|
|
|
|
|
* these (admittedly awkward) constants instead. These constants must |
|
|
|
|
|
* also have signed values to avoid unexpected coercions in comparisons. |
|
|
|
|
|
* |
|
|
|
|
|
* http://en.wikipedia.org/wiki/ASCII
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
#define DUK_ASC_NUL 0x00 |
|
|
|
|
|
#define DUK_ASC_SOH 0x01 |
|
|
|
|
|
#define DUK_ASC_STX 0x02 |
|
|
|
|
|
#define DUK_ASC_ETX 0x03 |
|
|
|
|
|
#define DUK_ASC_EOT 0x04 |
|
|
|
|
|
#define DUK_ASC_ENQ 0x05 |
|
|
|
|
|
#define DUK_ASC_ACK 0x06 |
|
|
|
|
|
#define DUK_ASC_BEL 0x07 |
|
|
|
|
|
#define DUK_ASC_BS 0x08 |
|
|
|
|
|
#define DUK_ASC_HT 0x09 |
|
|
|
|
|
#define DUK_ASC_LF 0x0a |
|
|
|
|
|
#define DUK_ASC_VT 0x0b |
|
|
|
|
|
#define DUK_ASC_FF 0x0c |
|
|
|
|
|
#define DUK_ASC_CR 0x0d |
|
|
|
|
|
#define DUK_ASC_SO 0x0e |
|
|
|
|
|
#define DUK_ASC_SI 0x0f |
|
|
|
|
|
#define DUK_ASC_DLE 0x10 |
|
|
|
|
|
#define DUK_ASC_DC1 0x11 |
|
|
|
|
|
#define DUK_ASC_DC2 0x12 |
|
|
|
|
|
#define DUK_ASC_DC3 0x13 |
|
|
|
|
|
#define DUK_ASC_DC4 0x14 |
|
|
|
|
|
#define DUK_ASC_NAK 0x15 |
|
|
|
|
|
#define DUK_ASC_SYN 0x16 |
|
|
|
|
|
#define DUK_ASC_ETB 0x17 |
|
|
|
|
|
#define DUK_ASC_CAN 0x18 |
|
|
|
|
|
#define DUK_ASC_EM 0x19 |
|
|
|
|
|
#define DUK_ASC_SUB 0x1a |
|
|
|
|
|
#define DUK_ASC_ESC 0x1b |
|
|
|
|
|
#define DUK_ASC_FS 0x1c |
|
|
|
|
|
#define DUK_ASC_GS 0x1d |
|
|
|
|
|
#define DUK_ASC_RS 0x1e |
|
|
|
|
|
#define DUK_ASC_US 0x1f |
|
|
|
|
|
#define DUK_ASC_SPACE 0x20 |
|
|
|
|
|
#define DUK_ASC_EXCLAMATION 0x21 |
|
|
|
|
|
#define DUK_ASC_DOUBLEQUOTE 0x22 |
|
|
|
|
|
#define DUK_ASC_HASH 0x23 |
|
|
|
|
|
#define DUK_ASC_DOLLAR 0x24 |
|
|
|
|
|
#define DUK_ASC_PERCENT 0x25 |
|
|
|
|
|
#define DUK_ASC_AMP 0x26 |
|
|
|
|
|
#define DUK_ASC_SINGLEQUOTE 0x27 |
|
|
|
|
|
#define DUK_ASC_LPAREN 0x28 |
|
|
|
|
|
#define DUK_ASC_RPAREN 0x29 |
|
|
|
|
|
#define DUK_ASC_STAR 0x2a |
|
|
|
|
|
#define DUK_ASC_PLUS 0x2b |
|
|
|
|
|
#define DUK_ASC_COMMA 0x2c |
|
|
|
|
|
#define DUK_ASC_MINUS 0x2d |
|
|
|
|
|
#define DUK_ASC_PERIOD 0x2e |
|
|
|
|
|
#define DUK_ASC_SLASH 0x2f |
|
|
|
|
|
#define DUK_ASC_0 0x30 |
|
|
|
|
|
#define DUK_ASC_1 0x31 |
|
|
|
|
|
#define DUK_ASC_2 0x32 |
|
|
|
|
|
#define DUK_ASC_3 0x33 |
|
|
|
|
|
#define DUK_ASC_4 0x34 |
|
|
|
|
|
#define DUK_ASC_5 0x35 |
|
|
|
|
|
#define DUK_ASC_6 0x36 |
|
|
|
|
|
#define DUK_ASC_7 0x37 |
|
|
|
|
|
#define DUK_ASC_8 0x38 |
|
|
|
|
|
#define DUK_ASC_9 0x39 |
|
|
|
|
|
#define DUK_ASC_COLON 0x3a |
|
|
|
|
|
#define DUK_ASC_SEMICOLON 0x3b |
|
|
|
|
|
#define DUK_ASC_LANGLE 0x3c |
|
|
|
|
|
#define DUK_ASC_EQUALS 0x3d |
|
|
|
|
|
#define DUK_ASC_RANGLE 0x3e |
|
|
|
|
|
#define DUK_ASC_QUESTION 0x3f |
|
|
|
|
|
#define DUK_ASC_ATSIGN 0x40 |
|
|
|
|
|
#define DUK_ASC_UC_A 0x41 |
|
|
|
|
|
#define DUK_ASC_UC_B 0x42 |
|
|
|
|
|
#define DUK_ASC_UC_C 0x43 |
|
|
|
|
|
#define DUK_ASC_UC_D 0x44 |
|
|
|
|
|
#define DUK_ASC_UC_E 0x45 |
|
|
|
|
|
#define DUK_ASC_UC_F 0x46 |
|
|
|
|
|
#define DUK_ASC_UC_G 0x47 |
|
|
|
|
|
#define DUK_ASC_UC_H 0x48 |
|
|
|
|
|
#define DUK_ASC_UC_I 0x49 |
|
|
|
|
|
#define DUK_ASC_UC_J 0x4a |
|
|
|
|
|
#define DUK_ASC_UC_K 0x4b |
|
|
|
|
|
#define DUK_ASC_UC_L 0x4c |
|
|
|
|
|
#define DUK_ASC_UC_M 0x4d |
|
|
|
|
|
#define DUK_ASC_UC_N 0x4e |
|
|
|
|
|
#define DUK_ASC_UC_O 0x4f |
|
|
|
|
|
#define DUK_ASC_UC_P 0x50 |
|
|
|
|
|
#define DUK_ASC_UC_Q 0x51 |
|
|
|
|
|
#define DUK_ASC_UC_R 0x52 |
|
|
|
|
|
#define DUK_ASC_UC_S 0x53 |
|
|
|
|
|
#define DUK_ASC_UC_T 0x54 |
|
|
|
|
|
#define DUK_ASC_UC_U 0x55 |
|
|
|
|
|
#define DUK_ASC_UC_V 0x56 |
|
|
|
|
|
#define DUK_ASC_UC_W 0x57 |
|
|
|
|
|
#define DUK_ASC_UC_X 0x58 |
|
|
|
|
|
#define DUK_ASC_UC_Y 0x59 |
|
|
|
|
|
#define DUK_ASC_UC_Z 0x5a |
|
|
|
|
|
#define DUK_ASC_LBRACKET 0x5b |
|
|
|
|
|
#define DUK_ASC_BACKSLASH 0x5c |
|
|
|
|
|
#define DUK_ASC_RBRACKET 0x5d |
|
|
|
|
|
#define DUK_ASC_CARET 0x5e |
|
|
|
|
|
#define DUK_ASC_UNDERSCORE 0x5f |
|
|
|
|
|
#define DUK_ASC_GRAVE 0x60 |
|
|
|
|
|
#define DUK_ASC_LC_A 0x61 |
|
|
|
|
|
#define DUK_ASC_LC_B 0x62 |
|
|
|
|
|
#define DUK_ASC_LC_C 0x63 |
|
|
|
|
|
#define DUK_ASC_LC_D 0x64 |
|
|
|
|
|
#define DUK_ASC_LC_E 0x65 |
|
|
|
|
|
#define DUK_ASC_LC_F 0x66 |
|
|
|
|
|
#define DUK_ASC_LC_G 0x67 |
|
|
|
|
|
#define DUK_ASC_LC_H 0x68 |
|
|
|
|
|
#define DUK_ASC_LC_I 0x69 |
|
|
|
|
|
#define DUK_ASC_LC_J 0x6a |
|
|
|
|
|
#define DUK_ASC_LC_K 0x6b |
|
|
|
|
|
#define DUK_ASC_LC_L 0x6c |
|
|
|
|
|
#define DUK_ASC_LC_M 0x6d |
|
|
|
|
|
#define DUK_ASC_LC_N 0x6e |
|
|
|
|
|
#define DUK_ASC_LC_O 0x6f |
|
|
|
|
|
#define DUK_ASC_LC_P 0x70 |
|
|
|
|
|
#define DUK_ASC_LC_Q 0x71 |
|
|
|
|
|
#define DUK_ASC_LC_R 0x72 |
|
|
|
|
|
#define DUK_ASC_LC_S 0x73 |
|
|
|
|
|
#define DUK_ASC_LC_T 0x74 |
|
|
|
|
|
#define DUK_ASC_LC_U 0x75 |
|
|
|
|
|
#define DUK_ASC_LC_V 0x76 |
|
|
|
|
|
#define DUK_ASC_LC_W 0x77 |
|
|
|
|
|
#define DUK_ASC_LC_X 0x78 |
|
|
|
|
|
#define DUK_ASC_LC_Y 0x79 |
|
|
|
|
|
#define DUK_ASC_LC_Z 0x7a |
|
|
|
|
|
#define DUK_ASC_LCURLY 0x7b |
|
|
|
|
|
#define DUK_ASC_PIPE 0x7c |
|
|
|
|
|
#define DUK_ASC_RCURLY 0x7d |
|
|
|
|
|
#define DUK_ASC_TILDE 0x7e |
|
|
|
|
|
#define DUK_ASC_DEL 0x7f |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Unicode tables |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
#ifdef DUK_USE_SOURCE_NONBMP |
|
|
#ifdef DUK_USE_SOURCE_NONBMP |
|
|
#include "duk_unicode_ids_noa.h" |
|
|
#include "duk_unicode_ids_noa.h" |
|
|
#else |
|
|
#else |
|
|