unicode.js 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. const UNDEFINED_CODE_POINTS = new Set([
  2. 65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214,
  3. 393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894,
  4. 720895, 786430, 786431, 851966, 851967, 917502, 917503, 983038, 983039, 1048574,
  5. 1048575, 1114110, 1114111,
  6. ]);
  7. export const REPLACEMENT_CHARACTER = '\uFFFD';
  8. export var CODE_POINTS;
  9. (function (CODE_POINTS) {
  10. CODE_POINTS[CODE_POINTS["EOF"] = -1] = "EOF";
  11. CODE_POINTS[CODE_POINTS["NULL"] = 0] = "NULL";
  12. CODE_POINTS[CODE_POINTS["TABULATION"] = 9] = "TABULATION";
  13. CODE_POINTS[CODE_POINTS["CARRIAGE_RETURN"] = 13] = "CARRIAGE_RETURN";
  14. CODE_POINTS[CODE_POINTS["LINE_FEED"] = 10] = "LINE_FEED";
  15. CODE_POINTS[CODE_POINTS["FORM_FEED"] = 12] = "FORM_FEED";
  16. CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
  17. CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
  18. CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
  19. CODE_POINTS[CODE_POINTS["NUMBER_SIGN"] = 35] = "NUMBER_SIGN";
  20. CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
  21. CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
  22. CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
  23. CODE_POINTS[CODE_POINTS["SOLIDUS"] = 47] = "SOLIDUS";
  24. CODE_POINTS[CODE_POINTS["DIGIT_0"] = 48] = "DIGIT_0";
  25. CODE_POINTS[CODE_POINTS["DIGIT_9"] = 57] = "DIGIT_9";
  26. CODE_POINTS[CODE_POINTS["SEMICOLON"] = 59] = "SEMICOLON";
  27. CODE_POINTS[CODE_POINTS["LESS_THAN_SIGN"] = 60] = "LESS_THAN_SIGN";
  28. CODE_POINTS[CODE_POINTS["EQUALS_SIGN"] = 61] = "EQUALS_SIGN";
  29. CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
  30. CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
  31. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
  32. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_F"] = 70] = "LATIN_CAPITAL_F";
  33. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_X"] = 88] = "LATIN_CAPITAL_X";
  34. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
  35. CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
  36. CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
  37. CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
  38. CODE_POINTS[CODE_POINTS["LATIN_SMALL_F"] = 102] = "LATIN_SMALL_F";
  39. CODE_POINTS[CODE_POINTS["LATIN_SMALL_X"] = 120] = "LATIN_SMALL_X";
  40. CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
  41. CODE_POINTS[CODE_POINTS["REPLACEMENT_CHARACTER"] = 65533] = "REPLACEMENT_CHARACTER";
  42. })(CODE_POINTS = CODE_POINTS || (CODE_POINTS = {}));
  43. export const SEQUENCES = {
  44. DASH_DASH: '--',
  45. CDATA_START: '[CDATA[',
  46. DOCTYPE: 'doctype',
  47. SCRIPT: 'script',
  48. PUBLIC: 'public',
  49. SYSTEM: 'system',
  50. };
  51. //Surrogates
  52. export function isSurrogate(cp) {
  53. return cp >= 55296 && cp <= 57343;
  54. }
  55. export function isSurrogatePair(cp) {
  56. return cp >= 56320 && cp <= 57343;
  57. }
  58. export function getSurrogatePairCodePoint(cp1, cp2) {
  59. return (cp1 - 55296) * 1024 + 9216 + cp2;
  60. }
  61. //NOTE: excluding NULL and ASCII whitespace
  62. export function isControlCodePoint(cp) {
  63. return ((cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
  64. (cp >= 0x7f && cp <= 0x9f));
  65. }
  66. export function isUndefinedCodePoint(cp) {
  67. return (cp >= 64976 && cp <= 65007) || UNDEFINED_CODE_POINTS.has(cp);
  68. }
  69. //# sourceMappingURL=unicode.js.map