index.js 113 KB


  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.Tokenizer = exports.TokenizerMode = void 0;
  4. const preprocessor_js_1 = require("./preprocessor.js");
  5. const unicode_js_1 = require("../common/unicode.js");
  6. const token_js_1 = require("../common/token.js");
  7. const decode_js_1 = require("entities/lib/decode.js");
  8. const error_codes_js_1 = require("../common/error-codes.js");
  9. const html_js_1 = require("../common/html.js");
  10. //C1 Unicode control character reference replacements
  11. const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
  12. [0x80, 8364],
  13. [0x82, 8218],
  14. [0x83, 402],
  15. [0x84, 8222],
  16. [0x85, 8230],
  17. [0x86, 8224],
  18. [0x87, 8225],
  19. [0x88, 710],
  20. [0x89, 8240],
  21. [0x8a, 352],
  22. [0x8b, 8249],
  23. [0x8c, 338],
  24. [0x8e, 381],
  25. [0x91, 8216],
  26. [0x92, 8217],
  27. [0x93, 8220],
  28. [0x94, 8221],
  29. [0x95, 8226],
  30. [0x96, 8211],
  31. [0x97, 8212],
  32. [0x98, 732],
  33. [0x99, 8482],
  34. [0x9a, 353],
  35. [0x9b, 8250],
  36. [0x9c, 339],
  37. [0x9e, 382],
  38. [0x9f, 376],
  39. ]);
  40. //States
  41. var State;
  42. (function (State) {
  43. State[State["DATA"] = 0] = "DATA";
  44. State[State["RCDATA"] = 1] = "RCDATA";
  45. State[State["RAWTEXT"] = 2] = "RAWTEXT";
  46. State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA";
  47. State[State["PLAINTEXT"] = 4] = "PLAINTEXT";
  48. State[State["TAG_OPEN"] = 5] = "TAG_OPEN";
  49. State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN";
  50. State[State["TAG_NAME"] = 7] = "TAG_NAME";
  51. State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN";
  52. State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN";
  53. State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME";
  54. State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN";
  55. State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN";
  56. State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME";
  57. State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN";
  58. State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN";
  59. State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME";
  60. State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START";
  61. State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH";
  62. State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED";
  63. State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH";
  64. State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH";
  65. State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN";
  66. State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN";
  67. State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME";
  68. State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START";
  69. State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED";
  70. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH";
  71. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH";
  72. State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN";
  73. State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END";
  74. State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME";
  75. State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME";
  76. State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME";
  77. State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE";
  78. State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED";
  79. State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED";
  80. State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED";
  81. State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED";
  82. State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG";
  83. State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT";
  84. State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN";
  85. State[State["COMMENT_START"] = 42] = "COMMENT_START";
  86. State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH";
  87. State[State["COMMENT"] = 44] = "COMMENT";
  88. State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN";
  89. State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG";
  90. State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH";
  91. State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH";
  92. State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH";
  93. State[State["COMMENT_END"] = 50] = "COMMENT_END";
  94. State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG";
  95. State[State["DOCTYPE"] = 52] = "DOCTYPE";
  96. State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME";
  97. State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME";
  98. State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME";
  99. State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD";
  100. State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER";
  101. State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED";
  102. State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED";
  103. State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER";
  104. State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS";
  105. State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD";
  106. State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER";
  107. State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED";
  108. State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED";
  109. State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER";
  110. State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE";
  111. State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION";
  112. State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
  113. State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
  114. State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
  115. State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE";
  116. State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND";
  117. State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE";
  118. State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START";
  119. State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE";
  120. State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE";
  121. State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END";
  122. })(State || (State = {}));
  123. //Tokenizer initial states for different modes
  124. exports.TokenizerMode = {
  125. DATA: State.DATA,
  126. RCDATA: State.RCDATA,
  127. RAWTEXT: State.RAWTEXT,
  128. SCRIPT_DATA: State.SCRIPT_DATA,
  129. PLAINTEXT: State.PLAINTEXT,
  130. CDATA_SECTION: State.CDATA_SECTION,
  131. };
  132. //Utils
  133. //OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
  134. //this functions if they will be situated in another module due to context switch.
  135. //Always perform inlining check before modifying this functions ('node --trace-inlining').
  136. function isAsciiDigit(cp) {
  137. return cp >= unicode_js_1.CODE_POINTS.DIGIT_0 && cp <= unicode_js_1.CODE_POINTS.DIGIT_9;
  138. }
  139. function isAsciiUpper(cp) {
  140. return cp >= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_Z;
  141. }
  142. function isAsciiLower(cp) {
  143. return cp >= unicode_js_1.CODE_POINTS.LATIN_SMALL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_SMALL_Z;
  144. }
  145. function isAsciiLetter(cp) {
  146. return isAsciiLower(cp) || isAsciiUpper(cp);
  147. }
  148. function isAsciiAlphaNumeric(cp) {
  149. return isAsciiLetter(cp) || isAsciiDigit(cp);
  150. }
  151. function isAsciiUpperHexDigit(cp) {
  152. return cp >= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_F;
  153. }
  154. function isAsciiLowerHexDigit(cp) {
  155. return cp >= unicode_js_1.CODE_POINTS.LATIN_SMALL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_SMALL_F;
  156. }
  157. function isAsciiHexDigit(cp) {
  158. return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
  159. }
  160. function toAsciiLower(cp) {
  161. return cp + 32;
  162. }
  163. function isWhitespace(cp) {
  164. return cp === unicode_js_1.CODE_POINTS.SPACE || cp === unicode_js_1.CODE_POINTS.LINE_FEED || cp === unicode_js_1.CODE_POINTS.TABULATION || cp === unicode_js_1.CODE_POINTS.FORM_FEED;
  165. }
  166. function isEntityInAttributeInvalidEnd(nextCp) {
  167. return nextCp === unicode_js_1.CODE_POINTS.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
  168. }
  169. function isScriptDataDoubleEscapeSequenceEnd(cp) {
  170. return isWhitespace(cp) || cp === unicode_js_1.CODE_POINTS.SOLIDUS || cp === unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN;
  171. }
  172. //Tokenizer
  173. class Tokenizer {
  174. constructor(options, handler) {
  175. this.options = options;
  176. this.handler = handler;
  177. this.paused = false;
  178. /** Ensures that the parsing loop isn't run multiple times at once. */
  179. this.inLoop = false;
  180. /**
  181. * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
  182. * and that it is not an integration point for either MathML or HTML.
  183. *
  184. * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
  185. */
  186. this.inForeignNode = false;
  187. this.lastStartTagName = '';
  188. this.active = false;
  189. this.state = State.DATA;
  190. this.returnState = State.DATA;
  191. this.charRefCode = -1;
  192. this.consumedAfterSnapshot = -1;
  193. this.currentCharacterToken = null;
  194. this.currentToken = null;
  195. this.currentAttr = { name: '', value: '' };
  196. this.preprocessor = new preprocessor_js_1.Preprocessor(handler);
  197. this.currentLocation = this.getCurrentLocation(-1);
  198. }
  199. //Errors
  200. _err(code) {
  201. var _a, _b;
  202. (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code));
  203. }
  204. // NOTE: `offset` may never run across line boundaries.
  205. getCurrentLocation(offset) {
  206. if (!this.options.sourceCodeLocationInfo) {
  207. return null;
  208. }
  209. return {
  210. startLine: this.preprocessor.line,
  211. startCol: this.preprocessor.col - offset,
  212. startOffset: this.preprocessor.offset - offset,
  213. endLine: -1,
  214. endCol: -1,
  215. endOffset: -1,
  216. };
  217. }
  218. _runParsingLoop() {
  219. if (this.inLoop)
  220. return;
  221. this.inLoop = true;
  222. while (this.active && !this.paused) {
  223. this.consumedAfterSnapshot = 0;
  224. const cp = this._consume();
  225. if (!this._ensureHibernation()) {
  226. this._callState(cp);
  227. }
  228. }
  229. this.inLoop = false;
  230. }
  231. //API
  232. pause() {
  233. this.paused = true;
  234. }
  235. resume(writeCallback) {
  236. if (!this.paused) {
  237. throw new Error('Parser was already resumed');
  238. }
  239. this.paused = false;
  240. // Necessary for synchronous resume.
  241. if (this.inLoop)
  242. return;
  243. this._runParsingLoop();
  244. if (!this.paused) {
  245. writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
  246. }
  247. }
  248. write(chunk, isLastChunk, writeCallback) {
  249. this.active = true;
  250. this.preprocessor.write(chunk, isLastChunk);
  251. this._runParsingLoop();
  252. if (!this.paused) {
  253. writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
  254. }
  255. }
  256. insertHtmlAtCurrentPos(chunk) {
  257. this.active = true;
  258. this.preprocessor.insertHtmlAtCurrentPos(chunk);
  259. this._runParsingLoop();
  260. }
  261. //Hibernation
  262. _ensureHibernation() {
  263. if (this.preprocessor.endOfChunkHit) {
  264. this._unconsume(this.consumedAfterSnapshot);
  265. this.active = false;
  266. return true;
  267. }
  268. return false;
  269. }
  270. //Consumption
  271. _consume() {
  272. this.consumedAfterSnapshot++;
  273. return this.preprocessor.advance();
  274. }
  275. _unconsume(count) {
  276. this.consumedAfterSnapshot -= count;
  277. this.preprocessor.retreat(count);
  278. }
  279. _reconsumeInState(state, cp) {
  280. this.state = state;
  281. this._callState(cp);
  282. }
  283. _advanceBy(count) {
  284. this.consumedAfterSnapshot += count;
  285. for (let i = 0; i < count; i++) {
  286. this.preprocessor.advance();
  287. }
  288. }
  289. _consumeSequenceIfMatch(pattern, caseSensitive) {
  290. if (this.preprocessor.startsWith(pattern, caseSensitive)) {
  291. // We will already have consumed one character before calling this method.
  292. this._advanceBy(pattern.length - 1);
  293. return true;
  294. }
  295. return false;
  296. }
  297. //Token creation
  298. _createStartTagToken() {
  299. this.currentToken = {
  300. type: token_js_1.TokenType.START_TAG,
  301. tagName: '',
  302. tagID: html_js_1.TAG_ID.UNKNOWN,
  303. selfClosing: false,
  304. ackSelfClosing: false,
  305. attrs: [],
  306. location: this.getCurrentLocation(1),
  307. };
  308. }
  309. _createEndTagToken() {
  310. this.currentToken = {
  311. type: token_js_1.TokenType.END_TAG,
  312. tagName: '',
  313. tagID: html_js_1.TAG_ID.UNKNOWN,
  314. selfClosing: false,
  315. ackSelfClosing: false,
  316. attrs: [],
  317. location: this.getCurrentLocation(2),
  318. };
  319. }
  320. _createCommentToken(offset) {
  321. this.currentToken = {
  322. type: token_js_1.TokenType.COMMENT,
  323. data: '',
  324. location: this.getCurrentLocation(offset),
  325. };
  326. }
  327. _createDoctypeToken(initialName) {
  328. this.currentToken = {
  329. type: token_js_1.TokenType.DOCTYPE,
  330. name: initialName,
  331. forceQuirks: false,
  332. publicId: null,
  333. systemId: null,
  334. location: this.currentLocation,
  335. };
  336. }
  337. _createCharacterToken(type, chars) {
  338. this.currentCharacterToken = {
  339. type,
  340. chars,
  341. location: this.currentLocation,
  342. };
  343. }
  344. //Tag attributes
  345. _createAttr(attrNameFirstCh) {
  346. this.currentAttr = {
  347. name: attrNameFirstCh,
  348. value: '',
  349. };
  350. this.currentLocation = this.getCurrentLocation(0);
  351. }
  352. _leaveAttrName() {
  353. var _a;
  354. var _b;
  355. const token = this.currentToken;
  356. if ((0, token_js_1.getTokenAttr)(token, this.currentAttr.name) === null) {
  357. token.attrs.push(this.currentAttr);
  358. if (token.location && this.currentLocation) {
  359. const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null)));
  360. attrLocations[this.currentAttr.name] = this.currentLocation;
  361. // Set end location
  362. this._leaveAttrValue();
  363. }
  364. }
  365. else {
  366. this._err(error_codes_js_1.ERR.duplicateAttribute);
  367. }
  368. }
  369. _leaveAttrValue() {
  370. if (this.currentLocation) {
  371. this.currentLocation.endLine = this.preprocessor.line;
  372. this.currentLocation.endCol = this.preprocessor.col;
  373. this.currentLocation.endOffset = this.preprocessor.offset;
  374. }
  375. }
  376. //Token emission
  377. prepareToken(ct) {
  378. this._emitCurrentCharacterToken(ct.location);
  379. this.currentToken = null;
  380. if (ct.location) {
  381. ct.location.endLine = this.preprocessor.line;
  382. ct.location.endCol = this.preprocessor.col + 1;
  383. ct.location.endOffset = this.preprocessor.offset + 1;
  384. }
  385. this.currentLocation = this.getCurrentLocation(-1);
  386. }
  387. emitCurrentTagToken() {
  388. const ct = this.currentToken;
  389. this.prepareToken(ct);
  390. ct.tagID = (0, html_js_1.getTagID)(ct.tagName);
  391. if (ct.type === token_js_1.TokenType.START_TAG) {
  392. this.lastStartTagName = ct.tagName;
  393. this.handler.onStartTag(ct);
  394. }
  395. else {
  396. if (ct.attrs.length > 0) {
  397. this._err(error_codes_js_1.ERR.endTagWithAttributes);
  398. }
  399. if (ct.selfClosing) {
  400. this._err(error_codes_js_1.ERR.endTagWithTrailingSolidus);
  401. }
  402. this.handler.onEndTag(ct);
  403. }
  404. this.preprocessor.dropParsedChunk();
  405. }
  406. emitCurrentComment(ct) {
  407. this.prepareToken(ct);
  408. this.handler.onComment(ct);
  409. this.preprocessor.dropParsedChunk();
  410. }
  411. emitCurrentDoctype(ct) {
  412. this.prepareToken(ct);
  413. this.handler.onDoctype(ct);
  414. this.preprocessor.dropParsedChunk();
  415. }
  416. _emitCurrentCharacterToken(nextLocation) {
  417. if (this.currentCharacterToken) {
  418. //NOTE: if we have a pending character token, make it's end location equal to the
  419. //current token's start location.
  420. if (nextLocation && this.currentCharacterToken.location) {
  421. this.currentCharacterToken.location.endLine = nextLocation.startLine;
  422. this.currentCharacterToken.location.endCol = nextLocation.startCol;
  423. this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
  424. }
  425. switch (this.currentCharacterToken.type) {
  426. case token_js_1.TokenType.CHARACTER: {
  427. this.handler.onCharacter(this.currentCharacterToken);
  428. break;
  429. }
  430. case token_js_1.TokenType.NULL_CHARACTER: {
  431. this.handler.onNullCharacter(this.currentCharacterToken);
  432. break;
  433. }
  434. case token_js_1.TokenType.WHITESPACE_CHARACTER: {
  435. this.handler.onWhitespaceCharacter(this.currentCharacterToken);
  436. break;
  437. }
  438. }
  439. this.currentCharacterToken = null;
  440. }
  441. }
  442. _emitEOFToken() {
  443. const location = this.getCurrentLocation(0);
  444. if (location) {
  445. location.endLine = location.startLine;
  446. location.endCol = location.startCol;
  447. location.endOffset = location.startOffset;
  448. }
  449. this._emitCurrentCharacterToken(location);
  450. this.handler.onEof({ type: token_js_1.TokenType.EOF, location });
  451. this.active = false;
  452. }
  453. //Characters emission
  454. //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
  455. //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
  456. //If we have a sequence of characters that belong to the same group, the parser can process it
  457. //as a single solid character token.
  458. //So, there are 3 types of character tokens in parse5:
  459. //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
  460. //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
  461. //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
  462. _appendCharToCurrentCharacterToken(type, ch) {
  463. if (this.currentCharacterToken) {
  464. if (this.currentCharacterToken.type !== type) {
  465. this.currentLocation = this.getCurrentLocation(0);
  466. this._emitCurrentCharacterToken(this.currentLocation);
  467. this.preprocessor.dropParsedChunk();
  468. }
  469. else {
  470. this.currentCharacterToken.chars += ch;
  471. return;
  472. }
  473. }
  474. this._createCharacterToken(type, ch);
  475. }
  476. _emitCodePoint(cp) {
  477. const type = isWhitespace(cp)
  478. ? token_js_1.TokenType.WHITESPACE_CHARACTER
  479. : cp === unicode_js_1.CODE_POINTS.NULL
  480. ? token_js_1.TokenType.NULL_CHARACTER
  481. : token_js_1.TokenType.CHARACTER;
  482. this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
  483. }
  484. //NOTE: used when we emit characters explicitly.
  485. //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
  486. _emitChars(ch) {
  487. this._appendCharToCurrentCharacterToken(token_js_1.TokenType.CHARACTER, ch);
  488. }
  489. // Character reference helpers
  490. _matchNamedCharacterReference(cp) {
  491. let result = null;
  492. let excess = 0;
  493. let withoutSemicolon = false;
  494. for (let i = 0, current = decode_js_1.htmlDecodeTree[0]; i >= 0; cp = this._consume()) {
  495. i = (0, decode_js_1.determineBranch)(decode_js_1.htmlDecodeTree, current, i + 1, cp);
  496. if (i < 0)
  497. break;
  498. excess += 1;
  499. current = decode_js_1.htmlDecodeTree[i];
  500. const masked = current & decode_js_1.BinTrieFlags.VALUE_LENGTH;
  501. // If the branch is a value, store it and continue
  502. if (masked) {
  503. // The mask is the number of bytes of the value, including the current byte.
  504. const valueLength = (masked >> 14) - 1;
  505. // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
  506. // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
  507. if (cp !== unicode_js_1.CODE_POINTS.SEMICOLON &&
  508. this._isCharacterReferenceInAttribute() &&
  509. isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) {
  510. //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
  511. //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
  512. result = [unicode_js_1.CODE_POINTS.AMPERSAND];
  513. // Skip over the value.
  514. i += valueLength;
  515. }
  516. else {
  517. // If this is a surrogate pair, consume the next two bytes.
  518. result =
  519. valueLength === 0
  520. ? [decode_js_1.htmlDecodeTree[i] & ~decode_js_1.BinTrieFlags.VALUE_LENGTH]
  521. : valueLength === 1
  522. ? [decode_js_1.htmlDecodeTree[++i]]
  523. : [decode_js_1.htmlDecodeTree[++i], decode_js_1.htmlDecodeTree[++i]];
  524. excess = 0;
  525. withoutSemicolon = cp !== unicode_js_1.CODE_POINTS.SEMICOLON;
  526. }
  527. if (valueLength === 0) {
  528. // If the value is zero-length, we're done.
  529. this._consume();
  530. break;
  531. }
  532. }
  533. }
  534. this._unconsume(excess);
  535. if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
  536. this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference);
  537. }
  538. // We want to emit the error above on the code point after the entity.
  539. // We always consume one code point too many in the loop, and we wait to
  540. // unconsume it until after the error is emitted.
  541. this._unconsume(1);
  542. return result;
  543. }
  544. _isCharacterReferenceInAttribute() {
  545. return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
  546. this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
  547. this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED);
  548. }
  549. _flushCodePointConsumedAsCharacterReference(cp) {
  550. if (this._isCharacterReferenceInAttribute()) {
  551. this.currentAttr.value += String.fromCodePoint(cp);
  552. }
  553. else {
  554. this._emitCodePoint(cp);
  555. }
  556. }
  557. // Calling states this way turns out to be much faster than any other approach.
  558. _callState(cp) {
  559. switch (this.state) {
  560. case State.DATA: {
  561. this._stateData(cp);
  562. break;
  563. }
  564. case State.RCDATA: {
  565. this._stateRcdata(cp);
  566. break;
  567. }
  568. case State.RAWTEXT: {
  569. this._stateRawtext(cp);
  570. break;
  571. }
  572. case State.SCRIPT_DATA: {
  573. this._stateScriptData(cp);
  574. break;
  575. }
  576. case State.PLAINTEXT: {
  577. this._statePlaintext(cp);
  578. break;
  579. }
  580. case State.TAG_OPEN: {
  581. this._stateTagOpen(cp);
  582. break;
  583. }
  584. case State.END_TAG_OPEN: {
  585. this._stateEndTagOpen(cp);
  586. break;
  587. }
  588. case State.TAG_NAME: {
  589. this._stateTagName(cp);
  590. break;
  591. }
  592. case State.RCDATA_LESS_THAN_SIGN: {
  593. this._stateRcdataLessThanSign(cp);
  594. break;
  595. }
  596. case State.RCDATA_END_TAG_OPEN: {
  597. this._stateRcdataEndTagOpen(cp);
  598. break;
  599. }
  600. case State.RCDATA_END_TAG_NAME: {
  601. this._stateRcdataEndTagName(cp);
  602. break;
  603. }
  604. case State.RAWTEXT_LESS_THAN_SIGN: {
  605. this._stateRawtextLessThanSign(cp);
  606. break;
  607. }
  608. case State.RAWTEXT_END_TAG_OPEN: {
  609. this._stateRawtextEndTagOpen(cp);
  610. break;
  611. }
  612. case State.RAWTEXT_END_TAG_NAME: {
  613. this._stateRawtextEndTagName(cp);
  614. break;
  615. }
  616. case State.SCRIPT_DATA_LESS_THAN_SIGN: {
  617. this._stateScriptDataLessThanSign(cp);
  618. break;
  619. }
  620. case State.SCRIPT_DATA_END_TAG_OPEN: {
  621. this._stateScriptDataEndTagOpen(cp);
  622. break;
  623. }
  624. case State.SCRIPT_DATA_END_TAG_NAME: {
  625. this._stateScriptDataEndTagName(cp);
  626. break;
  627. }
  628. case State.SCRIPT_DATA_ESCAPE_START: {
  629. this._stateScriptDataEscapeStart(cp);
  630. break;
  631. }
  632. case State.SCRIPT_DATA_ESCAPE_START_DASH: {
  633. this._stateScriptDataEscapeStartDash(cp);
  634. break;
  635. }
  636. case State.SCRIPT_DATA_ESCAPED: {
  637. this._stateScriptDataEscaped(cp);
  638. break;
  639. }
  640. case State.SCRIPT_DATA_ESCAPED_DASH: {
  641. this._stateScriptDataEscapedDash(cp);
  642. break;
  643. }
  644. case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
  645. this._stateScriptDataEscapedDashDash(cp);
  646. break;
  647. }
  648. case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
  649. this._stateScriptDataEscapedLessThanSign(cp);
  650. break;
  651. }
  652. case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
  653. this._stateScriptDataEscapedEndTagOpen(cp);
  654. break;
  655. }
  656. case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
  657. this._stateScriptDataEscapedEndTagName(cp);
  658. break;
  659. }
  660. case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
  661. this._stateScriptDataDoubleEscapeStart(cp);
  662. break;
  663. }
  664. case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
  665. this._stateScriptDataDoubleEscaped(cp);
  666. break;
  667. }
  668. case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
  669. this._stateScriptDataDoubleEscapedDash(cp);
  670. break;
  671. }
  672. case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
  673. this._stateScriptDataDoubleEscapedDashDash(cp);
  674. break;
  675. }
  676. case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
  677. this._stateScriptDataDoubleEscapedLessThanSign(cp);
  678. break;
  679. }
  680. case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
  681. this._stateScriptDataDoubleEscapeEnd(cp);
  682. break;
  683. }
  684. case State.BEFORE_ATTRIBUTE_NAME: {
  685. this._stateBeforeAttributeName(cp);
  686. break;
  687. }
  688. case State.ATTRIBUTE_NAME: {
  689. this._stateAttributeName(cp);
  690. break;
  691. }
  692. case State.AFTER_ATTRIBUTE_NAME: {
  693. this._stateAfterAttributeName(cp);
  694. break;
  695. }
  696. case State.BEFORE_ATTRIBUTE_VALUE: {
  697. this._stateBeforeAttributeValue(cp);
  698. break;
  699. }
  700. case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
  701. this._stateAttributeValueDoubleQuoted(cp);
  702. break;
  703. }
  704. case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
  705. this._stateAttributeValueSingleQuoted(cp);
  706. break;
  707. }
  708. case State.ATTRIBUTE_VALUE_UNQUOTED: {
  709. this._stateAttributeValueUnquoted(cp);
  710. break;
  711. }
  712. case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
  713. this._stateAfterAttributeValueQuoted(cp);
  714. break;
  715. }
  716. case State.SELF_CLOSING_START_TAG: {
  717. this._stateSelfClosingStartTag(cp);
  718. break;
  719. }
  720. case State.BOGUS_COMMENT: {
  721. this._stateBogusComment(cp);
  722. break;
  723. }
  724. case State.MARKUP_DECLARATION_OPEN: {
  725. this._stateMarkupDeclarationOpen(cp);
  726. break;
  727. }
  728. case State.COMMENT_START: {
  729. this._stateCommentStart(cp);
  730. break;
  731. }
  732. case State.COMMENT_START_DASH: {
  733. this._stateCommentStartDash(cp);
  734. break;
  735. }
  736. case State.COMMENT: {
  737. this._stateComment(cp);
  738. break;
  739. }
  740. case State.COMMENT_LESS_THAN_SIGN: {
  741. this._stateCommentLessThanSign(cp);
  742. break;
  743. }
  744. case State.COMMENT_LESS_THAN_SIGN_BANG: {
  745. this._stateCommentLessThanSignBang(cp);
  746. break;
  747. }
  748. case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
  749. this._stateCommentLessThanSignBangDash(cp);
  750. break;
  751. }
  752. case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
  753. this._stateCommentLessThanSignBangDashDash(cp);
  754. break;
  755. }
  756. case State.COMMENT_END_DASH: {
  757. this._stateCommentEndDash(cp);
  758. break;
  759. }
  760. case State.COMMENT_END: {
  761. this._stateCommentEnd(cp);
  762. break;
  763. }
  764. case State.COMMENT_END_BANG: {
  765. this._stateCommentEndBang(cp);
  766. break;
  767. }
  768. case State.DOCTYPE: {
  769. this._stateDoctype(cp);
  770. break;
  771. }
  772. case State.BEFORE_DOCTYPE_NAME: {
  773. this._stateBeforeDoctypeName(cp);
  774. break;
  775. }
  776. case State.DOCTYPE_NAME: {
  777. this._stateDoctypeName(cp);
  778. break;
  779. }
  780. case State.AFTER_DOCTYPE_NAME: {
  781. this._stateAfterDoctypeName(cp);
  782. break;
  783. }
  784. case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
  785. this._stateAfterDoctypePublicKeyword(cp);
  786. break;
  787. }
  788. case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
  789. this._stateBeforeDoctypePublicIdentifier(cp);
  790. break;
  791. }
  792. case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
  793. this._stateDoctypePublicIdentifierDoubleQuoted(cp);
  794. break;
  795. }
  796. case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
  797. this._stateDoctypePublicIdentifierSingleQuoted(cp);
  798. break;
  799. }
  800. case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
  801. this._stateAfterDoctypePublicIdentifier(cp);
  802. break;
  803. }
  804. case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
  805. this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
  806. break;
  807. }
  808. case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
  809. this._stateAfterDoctypeSystemKeyword(cp);
  810. break;
  811. }
  812. case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
  813. this._stateBeforeDoctypeSystemIdentifier(cp);
  814. break;
  815. }
  816. case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
  817. this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
  818. break;
  819. }
  820. case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
  821. this._stateDoctypeSystemIdentifierSingleQuoted(cp);
  822. break;
  823. }
  824. case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
  825. this._stateAfterDoctypeSystemIdentifier(cp);
  826. break;
  827. }
  828. case State.BOGUS_DOCTYPE: {
  829. this._stateBogusDoctype(cp);
  830. break;
  831. }
  832. case State.CDATA_SECTION: {
  833. this._stateCdataSection(cp);
  834. break;
  835. }
  836. case State.CDATA_SECTION_BRACKET: {
  837. this._stateCdataSectionBracket(cp);
  838. break;
  839. }
  840. case State.CDATA_SECTION_END: {
  841. this._stateCdataSectionEnd(cp);
  842. break;
  843. }
  844. case State.CHARACTER_REFERENCE: {
  845. this._stateCharacterReference(cp);
  846. break;
  847. }
  848. case State.NAMED_CHARACTER_REFERENCE: {
  849. this._stateNamedCharacterReference(cp);
  850. break;
  851. }
  852. case State.AMBIGUOUS_AMPERSAND: {
  853. this._stateAmbiguousAmpersand(cp);
  854. break;
  855. }
  856. case State.NUMERIC_CHARACTER_REFERENCE: {
  857. this._stateNumericCharacterReference(cp);
  858. break;
  859. }
  860. case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
  861. this._stateHexademicalCharacterReferenceStart(cp);
  862. break;
  863. }
  864. case State.HEXADEMICAL_CHARACTER_REFERENCE: {
  865. this._stateHexademicalCharacterReference(cp);
  866. break;
  867. }
  868. case State.DECIMAL_CHARACTER_REFERENCE: {
  869. this._stateDecimalCharacterReference(cp);
  870. break;
  871. }
  872. case State.NUMERIC_CHARACTER_REFERENCE_END: {
  873. this._stateNumericCharacterReferenceEnd(cp);
  874. break;
  875. }
  876. default: {
  877. throw new Error('Unknown state');
  878. }
  879. }
  880. }
  881. // State machine
  882. // Data state
  883. //------------------------------------------------------------------
  884. _stateData(cp) {
  885. switch (cp) {
  886. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  887. this.state = State.TAG_OPEN;
  888. break;
  889. }
  890. case unicode_js_1.CODE_POINTS.AMPERSAND: {
  891. this.returnState = State.DATA;
  892. this.state = State.CHARACTER_REFERENCE;
  893. break;
  894. }
  895. case unicode_js_1.CODE_POINTS.NULL: {
  896. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  897. this._emitCodePoint(cp);
  898. break;
  899. }
  900. case unicode_js_1.CODE_POINTS.EOF: {
  901. this._emitEOFToken();
  902. break;
  903. }
  904. default: {
  905. this._emitCodePoint(cp);
  906. }
  907. }
  908. }
  909. // RCDATA state
  910. //------------------------------------------------------------------
  911. _stateRcdata(cp) {
  912. switch (cp) {
  913. case unicode_js_1.CODE_POINTS.AMPERSAND: {
  914. this.returnState = State.RCDATA;
  915. this.state = State.CHARACTER_REFERENCE;
  916. break;
  917. }
  918. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  919. this.state = State.RCDATA_LESS_THAN_SIGN;
  920. break;
  921. }
  922. case unicode_js_1.CODE_POINTS.NULL: {
  923. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  924. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  925. break;
  926. }
  927. case unicode_js_1.CODE_POINTS.EOF: {
  928. this._emitEOFToken();
  929. break;
  930. }
  931. default: {
  932. this._emitCodePoint(cp);
  933. }
  934. }
  935. }
  936. // RAWTEXT state
  937. //------------------------------------------------------------------
  938. _stateRawtext(cp) {
  939. switch (cp) {
  940. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  941. this.state = State.RAWTEXT_LESS_THAN_SIGN;
  942. break;
  943. }
  944. case unicode_js_1.CODE_POINTS.NULL: {
  945. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  946. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  947. break;
  948. }
  949. case unicode_js_1.CODE_POINTS.EOF: {
  950. this._emitEOFToken();
  951. break;
  952. }
  953. default: {
  954. this._emitCodePoint(cp);
  955. }
  956. }
  957. }
  958. // Script data state
  959. //------------------------------------------------------------------
  960. _stateScriptData(cp) {
  961. switch (cp) {
  962. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  963. this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
  964. break;
  965. }
  966. case unicode_js_1.CODE_POINTS.NULL: {
  967. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  968. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  969. break;
  970. }
  971. case unicode_js_1.CODE_POINTS.EOF: {
  972. this._emitEOFToken();
  973. break;
  974. }
  975. default: {
  976. this._emitCodePoint(cp);
  977. }
  978. }
  979. }
  980. // PLAINTEXT state
  981. //------------------------------------------------------------------
  982. _statePlaintext(cp) {
  983. switch (cp) {
  984. case unicode_js_1.CODE_POINTS.NULL: {
  985. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  986. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  987. break;
  988. }
  989. case unicode_js_1.CODE_POINTS.EOF: {
  990. this._emitEOFToken();
  991. break;
  992. }
  993. default: {
  994. this._emitCodePoint(cp);
  995. }
  996. }
  997. }
  998. // Tag open state
  999. //------------------------------------------------------------------
  1000. _stateTagOpen(cp) {
  1001. if (isAsciiLetter(cp)) {
  1002. this._createStartTagToken();
  1003. this.state = State.TAG_NAME;
  1004. this._stateTagName(cp);
  1005. }
  1006. else
  1007. switch (cp) {
  1008. case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
  1009. this.state = State.MARKUP_DECLARATION_OPEN;
  1010. break;
  1011. }
  1012. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1013. this.state = State.END_TAG_OPEN;
  1014. break;
  1015. }
  1016. case unicode_js_1.CODE_POINTS.QUESTION_MARK: {
  1017. this._err(error_codes_js_1.ERR.unexpectedQuestionMarkInsteadOfTagName);
  1018. this._createCommentToken(1);
  1019. this.state = State.BOGUS_COMMENT;
  1020. this._stateBogusComment(cp);
  1021. break;
  1022. }
  1023. case unicode_js_1.CODE_POINTS.EOF: {
  1024. this._err(error_codes_js_1.ERR.eofBeforeTagName);
  1025. this._emitChars('<');
  1026. this._emitEOFToken();
  1027. break;
  1028. }
  1029. default: {
  1030. this._err(error_codes_js_1.ERR.invalidFirstCharacterOfTagName);
  1031. this._emitChars('<');
  1032. this.state = State.DATA;
  1033. this._stateData(cp);
  1034. }
  1035. }
  1036. }
  1037. // End tag open state
  1038. //------------------------------------------------------------------
  1039. _stateEndTagOpen(cp) {
  1040. if (isAsciiLetter(cp)) {
  1041. this._createEndTagToken();
  1042. this.state = State.TAG_NAME;
  1043. this._stateTagName(cp);
  1044. }
  1045. else
  1046. switch (cp) {
  1047. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1048. this._err(error_codes_js_1.ERR.missingEndTagName);
  1049. this.state = State.DATA;
  1050. break;
  1051. }
  1052. case unicode_js_1.CODE_POINTS.EOF: {
  1053. this._err(error_codes_js_1.ERR.eofBeforeTagName);
  1054. this._emitChars('</');
  1055. this._emitEOFToken();
  1056. break;
  1057. }
  1058. default: {
  1059. this._err(error_codes_js_1.ERR.invalidFirstCharacterOfTagName);
  1060. this._createCommentToken(2);
  1061. this.state = State.BOGUS_COMMENT;
  1062. this._stateBogusComment(cp);
  1063. }
  1064. }
  1065. }
  1066. // Tag name state
  1067. //------------------------------------------------------------------
  1068. _stateTagName(cp) {
  1069. const token = this.currentToken;
  1070. switch (cp) {
  1071. case unicode_js_1.CODE_POINTS.SPACE:
  1072. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1073. case unicode_js_1.CODE_POINTS.TABULATION:
  1074. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1075. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1076. break;
  1077. }
  1078. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1079. this.state = State.SELF_CLOSING_START_TAG;
  1080. break;
  1081. }
  1082. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1083. this.state = State.DATA;
  1084. this.emitCurrentTagToken();
  1085. break;
  1086. }
  1087. case unicode_js_1.CODE_POINTS.NULL: {
  1088. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1089. token.tagName += unicode_js_1.REPLACEMENT_CHARACTER;
  1090. break;
  1091. }
  1092. case unicode_js_1.CODE_POINTS.EOF: {
  1093. this._err(error_codes_js_1.ERR.eofInTag);
  1094. this._emitEOFToken();
  1095. break;
  1096. }
  1097. default: {
  1098. token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  1099. }
  1100. }
  1101. }
  1102. // RCDATA less-than sign state
  1103. //------------------------------------------------------------------
  1104. _stateRcdataLessThanSign(cp) {
  1105. if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
  1106. this.state = State.RCDATA_END_TAG_OPEN;
  1107. }
  1108. else {
  1109. this._emitChars('<');
  1110. this.state = State.RCDATA;
  1111. this._stateRcdata(cp);
  1112. }
  1113. }
  1114. // RCDATA end tag open state
  1115. //------------------------------------------------------------------
  1116. _stateRcdataEndTagOpen(cp) {
  1117. if (isAsciiLetter(cp)) {
  1118. this.state = State.RCDATA_END_TAG_NAME;
  1119. this._stateRcdataEndTagName(cp);
  1120. }
  1121. else {
  1122. this._emitChars('</');
  1123. this.state = State.RCDATA;
  1124. this._stateRcdata(cp);
  1125. }
  1126. }
  1127. handleSpecialEndTag(_cp) {
  1128. if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
  1129. return !this._ensureHibernation();
  1130. }
  1131. this._createEndTagToken();
  1132. const token = this.currentToken;
  1133. token.tagName = this.lastStartTagName;
  1134. const cp = this.preprocessor.peek(this.lastStartTagName.length);
  1135. switch (cp) {
  1136. case unicode_js_1.CODE_POINTS.SPACE:
  1137. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1138. case unicode_js_1.CODE_POINTS.TABULATION:
  1139. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1140. this._advanceBy(this.lastStartTagName.length);
  1141. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1142. return false;
  1143. }
  1144. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1145. this._advanceBy(this.lastStartTagName.length);
  1146. this.state = State.SELF_CLOSING_START_TAG;
  1147. return false;
  1148. }
  1149. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1150. this._advanceBy(this.lastStartTagName.length);
  1151. this.emitCurrentTagToken();
  1152. this.state = State.DATA;
  1153. return false;
  1154. }
  1155. default: {
  1156. return !this._ensureHibernation();
  1157. }
  1158. }
  1159. }
  1160. // RCDATA end tag name state
  1161. //------------------------------------------------------------------
  1162. _stateRcdataEndTagName(cp) {
  1163. if (this.handleSpecialEndTag(cp)) {
  1164. this._emitChars('</');
  1165. this.state = State.RCDATA;
  1166. this._stateRcdata(cp);
  1167. }
  1168. }
  1169. // RAWTEXT less-than sign state
  1170. //------------------------------------------------------------------
  1171. _stateRawtextLessThanSign(cp) {
  1172. if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
  1173. this.state = State.RAWTEXT_END_TAG_OPEN;
  1174. }
  1175. else {
  1176. this._emitChars('<');
  1177. this.state = State.RAWTEXT;
  1178. this._stateRawtext(cp);
  1179. }
  1180. }
  1181. // RAWTEXT end tag open state
  1182. //------------------------------------------------------------------
  1183. _stateRawtextEndTagOpen(cp) {
  1184. if (isAsciiLetter(cp)) {
  1185. this.state = State.RAWTEXT_END_TAG_NAME;
  1186. this._stateRawtextEndTagName(cp);
  1187. }
  1188. else {
  1189. this._emitChars('</');
  1190. this.state = State.RAWTEXT;
  1191. this._stateRawtext(cp);
  1192. }
  1193. }
  1194. // RAWTEXT end tag name state
  1195. //------------------------------------------------------------------
  1196. _stateRawtextEndTagName(cp) {
  1197. if (this.handleSpecialEndTag(cp)) {
  1198. this._emitChars('</');
  1199. this.state = State.RAWTEXT;
  1200. this._stateRawtext(cp);
  1201. }
  1202. }
  1203. // Script data less-than sign state
  1204. //------------------------------------------------------------------
  1205. _stateScriptDataLessThanSign(cp) {
  1206. switch (cp) {
  1207. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1208. this.state = State.SCRIPT_DATA_END_TAG_OPEN;
  1209. break;
  1210. }
  1211. case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
  1212. this.state = State.SCRIPT_DATA_ESCAPE_START;
  1213. this._emitChars('<!');
  1214. break;
  1215. }
  1216. default: {
  1217. this._emitChars('<');
  1218. this.state = State.SCRIPT_DATA;
  1219. this._stateScriptData(cp);
  1220. }
  1221. }
  1222. }
  1223. // Script data end tag open state
  1224. //------------------------------------------------------------------
  1225. _stateScriptDataEndTagOpen(cp) {
  1226. if (isAsciiLetter(cp)) {
  1227. this.state = State.SCRIPT_DATA_END_TAG_NAME;
  1228. this._stateScriptDataEndTagName(cp);
  1229. }
  1230. else {
  1231. this._emitChars('</');
  1232. this.state = State.SCRIPT_DATA;
  1233. this._stateScriptData(cp);
  1234. }
  1235. }
  1236. // Script data end tag name state
  1237. //------------------------------------------------------------------
  1238. _stateScriptDataEndTagName(cp) {
  1239. if (this.handleSpecialEndTag(cp)) {
  1240. this._emitChars('</');
  1241. this.state = State.SCRIPT_DATA;
  1242. this._stateScriptData(cp);
  1243. }
  1244. }
  1245. // Script data escape start state
  1246. //------------------------------------------------------------------
  1247. _stateScriptDataEscapeStart(cp) {
  1248. if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
  1249. this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
  1250. this._emitChars('-');
  1251. }
  1252. else {
  1253. this.state = State.SCRIPT_DATA;
  1254. this._stateScriptData(cp);
  1255. }
  1256. }
  1257. // Script data escape start dash state
  1258. //------------------------------------------------------------------
  1259. _stateScriptDataEscapeStartDash(cp) {
  1260. if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
  1261. this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
  1262. this._emitChars('-');
  1263. }
  1264. else {
  1265. this.state = State.SCRIPT_DATA;
  1266. this._stateScriptData(cp);
  1267. }
  1268. }
  1269. // Script data escaped state
  1270. //------------------------------------------------------------------
  1271. _stateScriptDataEscaped(cp) {
  1272. switch (cp) {
  1273. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1274. this.state = State.SCRIPT_DATA_ESCAPED_DASH;
  1275. this._emitChars('-');
  1276. break;
  1277. }
  1278. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1279. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  1280. break;
  1281. }
  1282. case unicode_js_1.CODE_POINTS.NULL: {
  1283. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1284. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1285. break;
  1286. }
  1287. case unicode_js_1.CODE_POINTS.EOF: {
  1288. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1289. this._emitEOFToken();
  1290. break;
  1291. }
  1292. default: {
  1293. this._emitCodePoint(cp);
  1294. }
  1295. }
  1296. }
  1297. // Script data escaped dash state
  1298. //------------------------------------------------------------------
  1299. _stateScriptDataEscapedDash(cp) {
  1300. switch (cp) {
  1301. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1302. this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
  1303. this._emitChars('-');
  1304. break;
  1305. }
  1306. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1307. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  1308. break;
  1309. }
  1310. case unicode_js_1.CODE_POINTS.NULL: {
  1311. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1312. this.state = State.SCRIPT_DATA_ESCAPED;
  1313. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1314. break;
  1315. }
  1316. case unicode_js_1.CODE_POINTS.EOF: {
  1317. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1318. this._emitEOFToken();
  1319. break;
  1320. }
  1321. default: {
  1322. this.state = State.SCRIPT_DATA_ESCAPED;
  1323. this._emitCodePoint(cp);
  1324. }
  1325. }
  1326. }
  1327. // Script data escaped dash dash state
  1328. //------------------------------------------------------------------
  1329. _stateScriptDataEscapedDashDash(cp) {
  1330. switch (cp) {
  1331. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1332. this._emitChars('-');
  1333. break;
  1334. }
  1335. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1336. this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
  1337. break;
  1338. }
  1339. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1340. this.state = State.SCRIPT_DATA;
  1341. this._emitChars('>');
  1342. break;
  1343. }
  1344. case unicode_js_1.CODE_POINTS.NULL: {
  1345. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1346. this.state = State.SCRIPT_DATA_ESCAPED;
  1347. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1348. break;
  1349. }
  1350. case unicode_js_1.CODE_POINTS.EOF: {
  1351. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1352. this._emitEOFToken();
  1353. break;
  1354. }
  1355. default: {
  1356. this.state = State.SCRIPT_DATA_ESCAPED;
  1357. this._emitCodePoint(cp);
  1358. }
  1359. }
  1360. }
  1361. // Script data escaped less-than sign state
  1362. //------------------------------------------------------------------
  1363. _stateScriptDataEscapedLessThanSign(cp) {
  1364. if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
  1365. this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
  1366. }
  1367. else if (isAsciiLetter(cp)) {
  1368. this._emitChars('<');
  1369. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
  1370. this._stateScriptDataDoubleEscapeStart(cp);
  1371. }
  1372. else {
  1373. this._emitChars('<');
  1374. this.state = State.SCRIPT_DATA_ESCAPED;
  1375. this._stateScriptDataEscaped(cp);
  1376. }
  1377. }
  1378. // Script data escaped end tag open state
  1379. //------------------------------------------------------------------
  1380. _stateScriptDataEscapedEndTagOpen(cp) {
  1381. if (isAsciiLetter(cp)) {
  1382. this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
  1383. this._stateScriptDataEscapedEndTagName(cp);
  1384. }
  1385. else {
  1386. this._emitChars('</');
  1387. this.state = State.SCRIPT_DATA_ESCAPED;
  1388. this._stateScriptDataEscaped(cp);
  1389. }
  1390. }
  1391. // Script data escaped end tag name state
  1392. //------------------------------------------------------------------
  1393. _stateScriptDataEscapedEndTagName(cp) {
  1394. if (this.handleSpecialEndTag(cp)) {
  1395. this._emitChars('</');
  1396. this.state = State.SCRIPT_DATA_ESCAPED;
  1397. this._stateScriptDataEscaped(cp);
  1398. }
  1399. }
  1400. // Script data double escape start state
  1401. //------------------------------------------------------------------
  1402. _stateScriptDataDoubleEscapeStart(cp) {
  1403. if (this.preprocessor.startsWith(unicode_js_1.SEQUENCES.SCRIPT, false) &&
  1404. isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(unicode_js_1.SEQUENCES.SCRIPT.length))) {
  1405. this._emitCodePoint(cp);
  1406. for (let i = 0; i < unicode_js_1.SEQUENCES.SCRIPT.length; i++) {
  1407. this._emitCodePoint(this._consume());
  1408. }
  1409. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1410. }
  1411. else if (!this._ensureHibernation()) {
  1412. this.state = State.SCRIPT_DATA_ESCAPED;
  1413. this._stateScriptDataEscaped(cp);
  1414. }
  1415. }
  1416. // Script data double escaped state
  1417. //------------------------------------------------------------------
  1418. _stateScriptDataDoubleEscaped(cp) {
  1419. switch (cp) {
  1420. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1421. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
  1422. this._emitChars('-');
  1423. break;
  1424. }
  1425. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1426. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  1427. this._emitChars('<');
  1428. break;
  1429. }
  1430. case unicode_js_1.CODE_POINTS.NULL: {
  1431. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1432. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1433. break;
  1434. }
  1435. case unicode_js_1.CODE_POINTS.EOF: {
  1436. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1437. this._emitEOFToken();
  1438. break;
  1439. }
  1440. default: {
  1441. this._emitCodePoint(cp);
  1442. }
  1443. }
  1444. }
  1445. // Script data double escaped dash state
  1446. //------------------------------------------------------------------
  1447. _stateScriptDataDoubleEscapedDash(cp) {
  1448. switch (cp) {
  1449. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1450. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
  1451. this._emitChars('-');
  1452. break;
  1453. }
  1454. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1455. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  1456. this._emitChars('<');
  1457. break;
  1458. }
  1459. case unicode_js_1.CODE_POINTS.NULL: {
  1460. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1461. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1462. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1463. break;
  1464. }
  1465. case unicode_js_1.CODE_POINTS.EOF: {
  1466. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1467. this._emitEOFToken();
  1468. break;
  1469. }
  1470. default: {
  1471. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1472. this._emitCodePoint(cp);
  1473. }
  1474. }
  1475. }
  1476. // Script data double escaped dash dash state
  1477. //------------------------------------------------------------------
  1478. _stateScriptDataDoubleEscapedDashDash(cp) {
  1479. switch (cp) {
  1480. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1481. this._emitChars('-');
  1482. break;
  1483. }
  1484. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1485. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
  1486. this._emitChars('<');
  1487. break;
  1488. }
  1489. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1490. this.state = State.SCRIPT_DATA;
  1491. this._emitChars('>');
  1492. break;
  1493. }
  1494. case unicode_js_1.CODE_POINTS.NULL: {
  1495. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1496. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1497. this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
  1498. break;
  1499. }
  1500. case unicode_js_1.CODE_POINTS.EOF: {
  1501. this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
  1502. this._emitEOFToken();
  1503. break;
  1504. }
  1505. default: {
  1506. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1507. this._emitCodePoint(cp);
  1508. }
  1509. }
  1510. }
  1511. // Script data double escaped less-than sign state
  1512. //------------------------------------------------------------------
  1513. _stateScriptDataDoubleEscapedLessThanSign(cp) {
  1514. if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
  1515. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
  1516. this._emitChars('/');
  1517. }
  1518. else {
  1519. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1520. this._stateScriptDataDoubleEscaped(cp);
  1521. }
  1522. }
  1523. // Script data double escape end state
  1524. //------------------------------------------------------------------
  1525. _stateScriptDataDoubleEscapeEnd(cp) {
  1526. if (this.preprocessor.startsWith(unicode_js_1.SEQUENCES.SCRIPT, false) &&
  1527. isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(unicode_js_1.SEQUENCES.SCRIPT.length))) {
  1528. this._emitCodePoint(cp);
  1529. for (let i = 0; i < unicode_js_1.SEQUENCES.SCRIPT.length; i++) {
  1530. this._emitCodePoint(this._consume());
  1531. }
  1532. this.state = State.SCRIPT_DATA_ESCAPED;
  1533. }
  1534. else if (!this._ensureHibernation()) {
  1535. this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
  1536. this._stateScriptDataDoubleEscaped(cp);
  1537. }
  1538. }
  1539. // Before attribute name state
  1540. //------------------------------------------------------------------
  1541. _stateBeforeAttributeName(cp) {
  1542. switch (cp) {
  1543. case unicode_js_1.CODE_POINTS.SPACE:
  1544. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1545. case unicode_js_1.CODE_POINTS.TABULATION:
  1546. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1547. // Ignore whitespace
  1548. break;
  1549. }
  1550. case unicode_js_1.CODE_POINTS.SOLIDUS:
  1551. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN:
  1552. case unicode_js_1.CODE_POINTS.EOF: {
  1553. this.state = State.AFTER_ATTRIBUTE_NAME;
  1554. this._stateAfterAttributeName(cp);
  1555. break;
  1556. }
  1557. case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
  1558. this._err(error_codes_js_1.ERR.unexpectedEqualsSignBeforeAttributeName);
  1559. this._createAttr('=');
  1560. this.state = State.ATTRIBUTE_NAME;
  1561. break;
  1562. }
  1563. default: {
  1564. this._createAttr('');
  1565. this.state = State.ATTRIBUTE_NAME;
  1566. this._stateAttributeName(cp);
  1567. }
  1568. }
  1569. }
  1570. // Attribute name state
  1571. //------------------------------------------------------------------
  1572. _stateAttributeName(cp) {
  1573. switch (cp) {
  1574. case unicode_js_1.CODE_POINTS.SPACE:
  1575. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1576. case unicode_js_1.CODE_POINTS.TABULATION:
  1577. case unicode_js_1.CODE_POINTS.FORM_FEED:
  1578. case unicode_js_1.CODE_POINTS.SOLIDUS:
  1579. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN:
  1580. case unicode_js_1.CODE_POINTS.EOF: {
  1581. this._leaveAttrName();
  1582. this.state = State.AFTER_ATTRIBUTE_NAME;
  1583. this._stateAfterAttributeName(cp);
  1584. break;
  1585. }
  1586. case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
  1587. this._leaveAttrName();
  1588. this.state = State.BEFORE_ATTRIBUTE_VALUE;
  1589. break;
  1590. }
  1591. case unicode_js_1.CODE_POINTS.QUOTATION_MARK:
  1592. case unicode_js_1.CODE_POINTS.APOSTROPHE:
  1593. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1594. this._err(error_codes_js_1.ERR.unexpectedCharacterInAttributeName);
  1595. this.currentAttr.name += String.fromCodePoint(cp);
  1596. break;
  1597. }
  1598. case unicode_js_1.CODE_POINTS.NULL: {
  1599. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1600. this.currentAttr.name += unicode_js_1.REPLACEMENT_CHARACTER;
  1601. break;
  1602. }
  1603. default: {
  1604. this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  1605. }
  1606. }
  1607. }
  1608. // After attribute name state
  1609. //------------------------------------------------------------------
  1610. _stateAfterAttributeName(cp) {
  1611. switch (cp) {
  1612. case unicode_js_1.CODE_POINTS.SPACE:
  1613. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1614. case unicode_js_1.CODE_POINTS.TABULATION:
  1615. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1616. // Ignore whitespace
  1617. break;
  1618. }
  1619. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1620. this.state = State.SELF_CLOSING_START_TAG;
  1621. break;
  1622. }
  1623. case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
  1624. this.state = State.BEFORE_ATTRIBUTE_VALUE;
  1625. break;
  1626. }
  1627. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1628. this.state = State.DATA;
  1629. this.emitCurrentTagToken();
  1630. break;
  1631. }
  1632. case unicode_js_1.CODE_POINTS.EOF: {
  1633. this._err(error_codes_js_1.ERR.eofInTag);
  1634. this._emitEOFToken();
  1635. break;
  1636. }
  1637. default: {
  1638. this._createAttr('');
  1639. this.state = State.ATTRIBUTE_NAME;
  1640. this._stateAttributeName(cp);
  1641. }
  1642. }
  1643. }
  1644. // Before attribute value state
  1645. //------------------------------------------------------------------
  1646. _stateBeforeAttributeValue(cp) {
  1647. switch (cp) {
  1648. case unicode_js_1.CODE_POINTS.SPACE:
  1649. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1650. case unicode_js_1.CODE_POINTS.TABULATION:
  1651. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1652. // Ignore whitespace
  1653. break;
  1654. }
  1655. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  1656. this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
  1657. break;
  1658. }
  1659. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  1660. this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
  1661. break;
  1662. }
  1663. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1664. this._err(error_codes_js_1.ERR.missingAttributeValue);
  1665. this.state = State.DATA;
  1666. this.emitCurrentTagToken();
  1667. break;
  1668. }
  1669. default: {
  1670. this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
  1671. this._stateAttributeValueUnquoted(cp);
  1672. }
  1673. }
  1674. }
  1675. // Attribute value (double-quoted) state
  1676. //------------------------------------------------------------------
  1677. _stateAttributeValueDoubleQuoted(cp) {
  1678. switch (cp) {
  1679. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  1680. this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
  1681. break;
  1682. }
  1683. case unicode_js_1.CODE_POINTS.AMPERSAND: {
  1684. this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
  1685. this.state = State.CHARACTER_REFERENCE;
  1686. break;
  1687. }
  1688. case unicode_js_1.CODE_POINTS.NULL: {
  1689. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1690. this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
  1691. break;
  1692. }
  1693. case unicode_js_1.CODE_POINTS.EOF: {
  1694. this._err(error_codes_js_1.ERR.eofInTag);
  1695. this._emitEOFToken();
  1696. break;
  1697. }
  1698. default: {
  1699. this.currentAttr.value += String.fromCodePoint(cp);
  1700. }
  1701. }
  1702. }
  1703. // Attribute value (single-quoted) state
  1704. //------------------------------------------------------------------
  1705. _stateAttributeValueSingleQuoted(cp) {
  1706. switch (cp) {
  1707. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  1708. this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
  1709. break;
  1710. }
  1711. case unicode_js_1.CODE_POINTS.AMPERSAND: {
  1712. this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
  1713. this.state = State.CHARACTER_REFERENCE;
  1714. break;
  1715. }
  1716. case unicode_js_1.CODE_POINTS.NULL: {
  1717. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1718. this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
  1719. break;
  1720. }
  1721. case unicode_js_1.CODE_POINTS.EOF: {
  1722. this._err(error_codes_js_1.ERR.eofInTag);
  1723. this._emitEOFToken();
  1724. break;
  1725. }
  1726. default: {
  1727. this.currentAttr.value += String.fromCodePoint(cp);
  1728. }
  1729. }
  1730. }
  1731. // Attribute value (unquoted) state
  1732. //------------------------------------------------------------------
  1733. _stateAttributeValueUnquoted(cp) {
  1734. switch (cp) {
  1735. case unicode_js_1.CODE_POINTS.SPACE:
  1736. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1737. case unicode_js_1.CODE_POINTS.TABULATION:
  1738. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1739. this._leaveAttrValue();
  1740. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1741. break;
  1742. }
  1743. case unicode_js_1.CODE_POINTS.AMPERSAND: {
  1744. this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED;
  1745. this.state = State.CHARACTER_REFERENCE;
  1746. break;
  1747. }
  1748. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1749. this._leaveAttrValue();
  1750. this.state = State.DATA;
  1751. this.emitCurrentTagToken();
  1752. break;
  1753. }
  1754. case unicode_js_1.CODE_POINTS.NULL: {
  1755. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1756. this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
  1757. break;
  1758. }
  1759. case unicode_js_1.CODE_POINTS.QUOTATION_MARK:
  1760. case unicode_js_1.CODE_POINTS.APOSTROPHE:
  1761. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN:
  1762. case unicode_js_1.CODE_POINTS.EQUALS_SIGN:
  1763. case unicode_js_1.CODE_POINTS.GRAVE_ACCENT: {
  1764. this._err(error_codes_js_1.ERR.unexpectedCharacterInUnquotedAttributeValue);
  1765. this.currentAttr.value += String.fromCodePoint(cp);
  1766. break;
  1767. }
  1768. case unicode_js_1.CODE_POINTS.EOF: {
  1769. this._err(error_codes_js_1.ERR.eofInTag);
  1770. this._emitEOFToken();
  1771. break;
  1772. }
  1773. default: {
  1774. this.currentAttr.value += String.fromCodePoint(cp);
  1775. }
  1776. }
  1777. }
  1778. // After attribute value (quoted) state
  1779. //------------------------------------------------------------------
  1780. _stateAfterAttributeValueQuoted(cp) {
  1781. switch (cp) {
  1782. case unicode_js_1.CODE_POINTS.SPACE:
  1783. case unicode_js_1.CODE_POINTS.LINE_FEED:
  1784. case unicode_js_1.CODE_POINTS.TABULATION:
  1785. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  1786. this._leaveAttrValue();
  1787. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1788. break;
  1789. }
  1790. case unicode_js_1.CODE_POINTS.SOLIDUS: {
  1791. this._leaveAttrValue();
  1792. this.state = State.SELF_CLOSING_START_TAG;
  1793. break;
  1794. }
  1795. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1796. this._leaveAttrValue();
  1797. this.state = State.DATA;
  1798. this.emitCurrentTagToken();
  1799. break;
  1800. }
  1801. case unicode_js_1.CODE_POINTS.EOF: {
  1802. this._err(error_codes_js_1.ERR.eofInTag);
  1803. this._emitEOFToken();
  1804. break;
  1805. }
  1806. default: {
  1807. this._err(error_codes_js_1.ERR.missingWhitespaceBetweenAttributes);
  1808. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1809. this._stateBeforeAttributeName(cp);
  1810. }
  1811. }
  1812. }
  1813. // Self-closing start tag state
  1814. //------------------------------------------------------------------
  1815. _stateSelfClosingStartTag(cp) {
  1816. switch (cp) {
  1817. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1818. const token = this.currentToken;
  1819. token.selfClosing = true;
  1820. this.state = State.DATA;
  1821. this.emitCurrentTagToken();
  1822. break;
  1823. }
  1824. case unicode_js_1.CODE_POINTS.EOF: {
  1825. this._err(error_codes_js_1.ERR.eofInTag);
  1826. this._emitEOFToken();
  1827. break;
  1828. }
  1829. default: {
  1830. this._err(error_codes_js_1.ERR.unexpectedSolidusInTag);
  1831. this.state = State.BEFORE_ATTRIBUTE_NAME;
  1832. this._stateBeforeAttributeName(cp);
  1833. }
  1834. }
  1835. }
  1836. // Bogus comment state
  1837. //------------------------------------------------------------------
  1838. _stateBogusComment(cp) {
  1839. const token = this.currentToken;
  1840. switch (cp) {
  1841. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1842. this.state = State.DATA;
  1843. this.emitCurrentComment(token);
  1844. break;
  1845. }
  1846. case unicode_js_1.CODE_POINTS.EOF: {
  1847. this.emitCurrentComment(token);
  1848. this._emitEOFToken();
  1849. break;
  1850. }
  1851. case unicode_js_1.CODE_POINTS.NULL: {
  1852. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1853. token.data += unicode_js_1.REPLACEMENT_CHARACTER;
  1854. break;
  1855. }
  1856. default: {
  1857. token.data += String.fromCodePoint(cp);
  1858. }
  1859. }
  1860. }
  1861. // Markup declaration open state
  1862. //------------------------------------------------------------------
  1863. _stateMarkupDeclarationOpen(cp) {
  1864. if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DASH_DASH, true)) {
  1865. this._createCommentToken(unicode_js_1.SEQUENCES.DASH_DASH.length + 1);
  1866. this.state = State.COMMENT_START;
  1867. }
  1868. else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DOCTYPE, false)) {
  1869. // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
  1870. this.currentLocation = this.getCurrentLocation(unicode_js_1.SEQUENCES.DOCTYPE.length + 1);
  1871. this.state = State.DOCTYPE;
  1872. }
  1873. else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.CDATA_START, true)) {
  1874. if (this.inForeignNode) {
  1875. this.state = State.CDATA_SECTION;
  1876. }
  1877. else {
  1878. this._err(error_codes_js_1.ERR.cdataInHtmlContent);
  1879. this._createCommentToken(unicode_js_1.SEQUENCES.CDATA_START.length + 1);
  1880. this.currentToken.data = '[CDATA[';
  1881. this.state = State.BOGUS_COMMENT;
  1882. }
  1883. }
  1884. //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
  1885. //results are no longer valid and we will need to start over.
  1886. else if (!this._ensureHibernation()) {
  1887. this._err(error_codes_js_1.ERR.incorrectlyOpenedComment);
  1888. this._createCommentToken(2);
  1889. this.state = State.BOGUS_COMMENT;
  1890. this._stateBogusComment(cp);
  1891. }
  1892. }
  1893. // Comment start state
  1894. //------------------------------------------------------------------
  1895. _stateCommentStart(cp) {
  1896. switch (cp) {
  1897. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1898. this.state = State.COMMENT_START_DASH;
  1899. break;
  1900. }
  1901. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1902. this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment);
  1903. this.state = State.DATA;
  1904. const token = this.currentToken;
  1905. this.emitCurrentComment(token);
  1906. break;
  1907. }
  1908. default: {
  1909. this.state = State.COMMENT;
  1910. this._stateComment(cp);
  1911. }
  1912. }
  1913. }
  1914. // Comment start dash state
  1915. //------------------------------------------------------------------
  1916. _stateCommentStartDash(cp) {
  1917. const token = this.currentToken;
  1918. switch (cp) {
  1919. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1920. this.state = State.COMMENT_END;
  1921. break;
  1922. }
  1923. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  1924. this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment);
  1925. this.state = State.DATA;
  1926. this.emitCurrentComment(token);
  1927. break;
  1928. }
  1929. case unicode_js_1.CODE_POINTS.EOF: {
  1930. this._err(error_codes_js_1.ERR.eofInComment);
  1931. this.emitCurrentComment(token);
  1932. this._emitEOFToken();
  1933. break;
  1934. }
  1935. default: {
  1936. token.data += '-';
  1937. this.state = State.COMMENT;
  1938. this._stateComment(cp);
  1939. }
  1940. }
  1941. }
  1942. // Comment state
  1943. //------------------------------------------------------------------
  1944. _stateComment(cp) {
  1945. const token = this.currentToken;
  1946. switch (cp) {
  1947. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  1948. this.state = State.COMMENT_END_DASH;
  1949. break;
  1950. }
  1951. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1952. token.data += '<';
  1953. this.state = State.COMMENT_LESS_THAN_SIGN;
  1954. break;
  1955. }
  1956. case unicode_js_1.CODE_POINTS.NULL: {
  1957. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  1958. token.data += unicode_js_1.REPLACEMENT_CHARACTER;
  1959. break;
  1960. }
  1961. case unicode_js_1.CODE_POINTS.EOF: {
  1962. this._err(error_codes_js_1.ERR.eofInComment);
  1963. this.emitCurrentComment(token);
  1964. this._emitEOFToken();
  1965. break;
  1966. }
  1967. default: {
  1968. token.data += String.fromCodePoint(cp);
  1969. }
  1970. }
  1971. }
  1972. // Comment less-than sign state
  1973. //------------------------------------------------------------------
  1974. _stateCommentLessThanSign(cp) {
  1975. const token = this.currentToken;
  1976. switch (cp) {
  1977. case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
  1978. token.data += '!';
  1979. this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
  1980. break;
  1981. }
  1982. case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
  1983. token.data += '<';
  1984. break;
  1985. }
  1986. default: {
  1987. this.state = State.COMMENT;
  1988. this._stateComment(cp);
  1989. }
  1990. }
  1991. }
  1992. // Comment less-than sign bang state
  1993. //------------------------------------------------------------------
  1994. _stateCommentLessThanSignBang(cp) {
  1995. if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
  1996. this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
  1997. }
  1998. else {
  1999. this.state = State.COMMENT;
  2000. this._stateComment(cp);
  2001. }
  2002. }
  2003. // Comment less-than sign bang dash state
  2004. //------------------------------------------------------------------
  2005. _stateCommentLessThanSignBangDash(cp) {
  2006. if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
  2007. this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
  2008. }
  2009. else {
  2010. this.state = State.COMMENT_END_DASH;
  2011. this._stateCommentEndDash(cp);
  2012. }
  2013. }
  2014. // Comment less-than sign bang dash dash state
  2015. //------------------------------------------------------------------
  2016. _stateCommentLessThanSignBangDashDash(cp) {
  2017. if (cp !== unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN && cp !== unicode_js_1.CODE_POINTS.EOF) {
  2018. this._err(error_codes_js_1.ERR.nestedComment);
  2019. }
  2020. this.state = State.COMMENT_END;
  2021. this._stateCommentEnd(cp);
  2022. }
  2023. // Comment end dash state
  2024. //------------------------------------------------------------------
  2025. _stateCommentEndDash(cp) {
  2026. const token = this.currentToken;
  2027. switch (cp) {
  2028. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  2029. this.state = State.COMMENT_END;
  2030. break;
  2031. }
  2032. case unicode_js_1.CODE_POINTS.EOF: {
  2033. this._err(error_codes_js_1.ERR.eofInComment);
  2034. this.emitCurrentComment(token);
  2035. this._emitEOFToken();
  2036. break;
  2037. }
  2038. default: {
  2039. token.data += '-';
  2040. this.state = State.COMMENT;
  2041. this._stateComment(cp);
  2042. }
  2043. }
  2044. }
  2045. // Comment end state
  2046. //------------------------------------------------------------------
  2047. _stateCommentEnd(cp) {
  2048. const token = this.currentToken;
  2049. switch (cp) {
  2050. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2051. this.state = State.DATA;
  2052. this.emitCurrentComment(token);
  2053. break;
  2054. }
  2055. case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
  2056. this.state = State.COMMENT_END_BANG;
  2057. break;
  2058. }
  2059. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  2060. token.data += '-';
  2061. break;
  2062. }
  2063. case unicode_js_1.CODE_POINTS.EOF: {
  2064. this._err(error_codes_js_1.ERR.eofInComment);
  2065. this.emitCurrentComment(token);
  2066. this._emitEOFToken();
  2067. break;
  2068. }
  2069. default: {
  2070. token.data += '--';
  2071. this.state = State.COMMENT;
  2072. this._stateComment(cp);
  2073. }
  2074. }
  2075. }
  2076. // Comment end bang state
  2077. //------------------------------------------------------------------
  2078. _stateCommentEndBang(cp) {
  2079. const token = this.currentToken;
  2080. switch (cp) {
  2081. case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
  2082. token.data += '--!';
  2083. this.state = State.COMMENT_END_DASH;
  2084. break;
  2085. }
  2086. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2087. this._err(error_codes_js_1.ERR.incorrectlyClosedComment);
  2088. this.state = State.DATA;
  2089. this.emitCurrentComment(token);
  2090. break;
  2091. }
  2092. case unicode_js_1.CODE_POINTS.EOF: {
  2093. this._err(error_codes_js_1.ERR.eofInComment);
  2094. this.emitCurrentComment(token);
  2095. this._emitEOFToken();
  2096. break;
  2097. }
  2098. default: {
  2099. token.data += '--!';
  2100. this.state = State.COMMENT;
  2101. this._stateComment(cp);
  2102. }
  2103. }
  2104. }
  2105. // DOCTYPE state
  2106. //------------------------------------------------------------------
  2107. _stateDoctype(cp) {
  2108. switch (cp) {
  2109. case unicode_js_1.CODE_POINTS.SPACE:
  2110. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2111. case unicode_js_1.CODE_POINTS.TABULATION:
  2112. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2113. this.state = State.BEFORE_DOCTYPE_NAME;
  2114. break;
  2115. }
  2116. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2117. this.state = State.BEFORE_DOCTYPE_NAME;
  2118. this._stateBeforeDoctypeName(cp);
  2119. break;
  2120. }
  2121. case unicode_js_1.CODE_POINTS.EOF: {
  2122. this._err(error_codes_js_1.ERR.eofInDoctype);
  2123. this._createDoctypeToken(null);
  2124. const token = this.currentToken;
  2125. token.forceQuirks = true;
  2126. this.emitCurrentDoctype(token);
  2127. this._emitEOFToken();
  2128. break;
  2129. }
  2130. default: {
  2131. this._err(error_codes_js_1.ERR.missingWhitespaceBeforeDoctypeName);
  2132. this.state = State.BEFORE_DOCTYPE_NAME;
  2133. this._stateBeforeDoctypeName(cp);
  2134. }
  2135. }
  2136. }
  2137. // Before DOCTYPE name state
  2138. //------------------------------------------------------------------
  2139. _stateBeforeDoctypeName(cp) {
  2140. if (isAsciiUpper(cp)) {
  2141. this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
  2142. this.state = State.DOCTYPE_NAME;
  2143. }
  2144. else
  2145. switch (cp) {
  2146. case unicode_js_1.CODE_POINTS.SPACE:
  2147. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2148. case unicode_js_1.CODE_POINTS.TABULATION:
  2149. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2150. // Ignore whitespace
  2151. break;
  2152. }
  2153. case unicode_js_1.CODE_POINTS.NULL: {
  2154. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2155. this._createDoctypeToken(unicode_js_1.REPLACEMENT_CHARACTER);
  2156. this.state = State.DOCTYPE_NAME;
  2157. break;
  2158. }
  2159. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2160. this._err(error_codes_js_1.ERR.missingDoctypeName);
  2161. this._createDoctypeToken(null);
  2162. const token = this.currentToken;
  2163. token.forceQuirks = true;
  2164. this.emitCurrentDoctype(token);
  2165. this.state = State.DATA;
  2166. break;
  2167. }
  2168. case unicode_js_1.CODE_POINTS.EOF: {
  2169. this._err(error_codes_js_1.ERR.eofInDoctype);
  2170. this._createDoctypeToken(null);
  2171. const token = this.currentToken;
  2172. token.forceQuirks = true;
  2173. this.emitCurrentDoctype(token);
  2174. this._emitEOFToken();
  2175. break;
  2176. }
  2177. default: {
  2178. this._createDoctypeToken(String.fromCodePoint(cp));
  2179. this.state = State.DOCTYPE_NAME;
  2180. }
  2181. }
  2182. }
  2183. // DOCTYPE name state
  2184. //------------------------------------------------------------------
  2185. _stateDoctypeName(cp) {
  2186. const token = this.currentToken;
  2187. switch (cp) {
  2188. case unicode_js_1.CODE_POINTS.SPACE:
  2189. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2190. case unicode_js_1.CODE_POINTS.TABULATION:
  2191. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2192. this.state = State.AFTER_DOCTYPE_NAME;
  2193. break;
  2194. }
  2195. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2196. this.state = State.DATA;
  2197. this.emitCurrentDoctype(token);
  2198. break;
  2199. }
  2200. case unicode_js_1.CODE_POINTS.NULL: {
  2201. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2202. token.name += unicode_js_1.REPLACEMENT_CHARACTER;
  2203. break;
  2204. }
  2205. case unicode_js_1.CODE_POINTS.EOF: {
  2206. this._err(error_codes_js_1.ERR.eofInDoctype);
  2207. token.forceQuirks = true;
  2208. this.emitCurrentDoctype(token);
  2209. this._emitEOFToken();
  2210. break;
  2211. }
  2212. default: {
  2213. token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
  2214. }
  2215. }
  2216. }
  2217. // After DOCTYPE name state
  2218. //------------------------------------------------------------------
  2219. _stateAfterDoctypeName(cp) {
  2220. const token = this.currentToken;
  2221. switch (cp) {
  2222. case unicode_js_1.CODE_POINTS.SPACE:
  2223. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2224. case unicode_js_1.CODE_POINTS.TABULATION:
  2225. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2226. // Ignore whitespace
  2227. break;
  2228. }
  2229. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2230. this.state = State.DATA;
  2231. this.emitCurrentDoctype(token);
  2232. break;
  2233. }
  2234. case unicode_js_1.CODE_POINTS.EOF: {
  2235. this._err(error_codes_js_1.ERR.eofInDoctype);
  2236. token.forceQuirks = true;
  2237. this.emitCurrentDoctype(token);
  2238. this._emitEOFToken();
  2239. break;
  2240. }
  2241. default: {
  2242. if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.PUBLIC, false)) {
  2243. this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
  2244. }
  2245. else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.SYSTEM, false)) {
  2246. this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
  2247. }
  2248. //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
  2249. //results are no longer valid and we will need to start over.
  2250. else if (!this._ensureHibernation()) {
  2251. this._err(error_codes_js_1.ERR.invalidCharacterSequenceAfterDoctypeName);
  2252. token.forceQuirks = true;
  2253. this.state = State.BOGUS_DOCTYPE;
  2254. this._stateBogusDoctype(cp);
  2255. }
  2256. }
  2257. }
  2258. }
  2259. // After DOCTYPE public keyword state
  2260. //------------------------------------------------------------------
  2261. _stateAfterDoctypePublicKeyword(cp) {
  2262. const token = this.currentToken;
  2263. switch (cp) {
  2264. case unicode_js_1.CODE_POINTS.SPACE:
  2265. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2266. case unicode_js_1.CODE_POINTS.TABULATION:
  2267. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2268. this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
  2269. break;
  2270. }
  2271. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2272. this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword);
  2273. token.publicId = '';
  2274. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
  2275. break;
  2276. }
  2277. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2278. this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword);
  2279. token.publicId = '';
  2280. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
  2281. break;
  2282. }
  2283. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2284. this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier);
  2285. token.forceQuirks = true;
  2286. this.state = State.DATA;
  2287. this.emitCurrentDoctype(token);
  2288. break;
  2289. }
  2290. case unicode_js_1.CODE_POINTS.EOF: {
  2291. this._err(error_codes_js_1.ERR.eofInDoctype);
  2292. token.forceQuirks = true;
  2293. this.emitCurrentDoctype(token);
  2294. this._emitEOFToken();
  2295. break;
  2296. }
  2297. default: {
  2298. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier);
  2299. token.forceQuirks = true;
  2300. this.state = State.BOGUS_DOCTYPE;
  2301. this._stateBogusDoctype(cp);
  2302. }
  2303. }
  2304. }
  2305. // Before DOCTYPE public identifier state
  2306. //------------------------------------------------------------------
  2307. _stateBeforeDoctypePublicIdentifier(cp) {
  2308. const token = this.currentToken;
  2309. switch (cp) {
  2310. case unicode_js_1.CODE_POINTS.SPACE:
  2311. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2312. case unicode_js_1.CODE_POINTS.TABULATION:
  2313. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2314. // Ignore whitespace
  2315. break;
  2316. }
  2317. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2318. token.publicId = '';
  2319. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
  2320. break;
  2321. }
  2322. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2323. token.publicId = '';
  2324. this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
  2325. break;
  2326. }
  2327. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2328. this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier);
  2329. token.forceQuirks = true;
  2330. this.state = State.DATA;
  2331. this.emitCurrentDoctype(token);
  2332. break;
  2333. }
  2334. case unicode_js_1.CODE_POINTS.EOF: {
  2335. this._err(error_codes_js_1.ERR.eofInDoctype);
  2336. token.forceQuirks = true;
  2337. this.emitCurrentDoctype(token);
  2338. this._emitEOFToken();
  2339. break;
  2340. }
  2341. default: {
  2342. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier);
  2343. token.forceQuirks = true;
  2344. this.state = State.BOGUS_DOCTYPE;
  2345. this._stateBogusDoctype(cp);
  2346. }
  2347. }
  2348. }
  2349. // DOCTYPE public identifier (double-quoted) state
  2350. //------------------------------------------------------------------
  2351. _stateDoctypePublicIdentifierDoubleQuoted(cp) {
  2352. const token = this.currentToken;
  2353. switch (cp) {
  2354. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2355. this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
  2356. break;
  2357. }
  2358. case unicode_js_1.CODE_POINTS.NULL: {
  2359. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2360. token.publicId += unicode_js_1.REPLACEMENT_CHARACTER;
  2361. break;
  2362. }
  2363. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2364. this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier);
  2365. token.forceQuirks = true;
  2366. this.emitCurrentDoctype(token);
  2367. this.state = State.DATA;
  2368. break;
  2369. }
  2370. case unicode_js_1.CODE_POINTS.EOF: {
  2371. this._err(error_codes_js_1.ERR.eofInDoctype);
  2372. token.forceQuirks = true;
  2373. this.emitCurrentDoctype(token);
  2374. this._emitEOFToken();
  2375. break;
  2376. }
  2377. default: {
  2378. token.publicId += String.fromCodePoint(cp);
  2379. }
  2380. }
  2381. }
  2382. // DOCTYPE public identifier (single-quoted) state
  2383. //------------------------------------------------------------------
  2384. _stateDoctypePublicIdentifierSingleQuoted(cp) {
  2385. const token = this.currentToken;
  2386. switch (cp) {
  2387. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2388. this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
  2389. break;
  2390. }
  2391. case unicode_js_1.CODE_POINTS.NULL: {
  2392. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2393. token.publicId += unicode_js_1.REPLACEMENT_CHARACTER;
  2394. break;
  2395. }
  2396. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2397. this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier);
  2398. token.forceQuirks = true;
  2399. this.emitCurrentDoctype(token);
  2400. this.state = State.DATA;
  2401. break;
  2402. }
  2403. case unicode_js_1.CODE_POINTS.EOF: {
  2404. this._err(error_codes_js_1.ERR.eofInDoctype);
  2405. token.forceQuirks = true;
  2406. this.emitCurrentDoctype(token);
  2407. this._emitEOFToken();
  2408. break;
  2409. }
  2410. default: {
  2411. token.publicId += String.fromCodePoint(cp);
  2412. }
  2413. }
  2414. }
  2415. // After DOCTYPE public identifier state
  2416. //------------------------------------------------------------------
  2417. _stateAfterDoctypePublicIdentifier(cp) {
  2418. const token = this.currentToken;
  2419. switch (cp) {
  2420. case unicode_js_1.CODE_POINTS.SPACE:
  2421. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2422. case unicode_js_1.CODE_POINTS.TABULATION:
  2423. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2424. this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
  2425. break;
  2426. }
  2427. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2428. this.state = State.DATA;
  2429. this.emitCurrentDoctype(token);
  2430. break;
  2431. }
  2432. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2433. this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
  2434. token.systemId = '';
  2435. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  2436. break;
  2437. }
  2438. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2439. this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
  2440. token.systemId = '';
  2441. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  2442. break;
  2443. }
  2444. case unicode_js_1.CODE_POINTS.EOF: {
  2445. this._err(error_codes_js_1.ERR.eofInDoctype);
  2446. token.forceQuirks = true;
  2447. this.emitCurrentDoctype(token);
  2448. this._emitEOFToken();
  2449. break;
  2450. }
  2451. default: {
  2452. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  2453. token.forceQuirks = true;
  2454. this.state = State.BOGUS_DOCTYPE;
  2455. this._stateBogusDoctype(cp);
  2456. }
  2457. }
  2458. }
  2459. // Between DOCTYPE public and system identifiers state
  2460. //------------------------------------------------------------------
  2461. _stateBetweenDoctypePublicAndSystemIdentifiers(cp) {
  2462. const token = this.currentToken;
  2463. switch (cp) {
  2464. case unicode_js_1.CODE_POINTS.SPACE:
  2465. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2466. case unicode_js_1.CODE_POINTS.TABULATION:
  2467. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2468. // Ignore whitespace
  2469. break;
  2470. }
  2471. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2472. this.emitCurrentDoctype(token);
  2473. this.state = State.DATA;
  2474. break;
  2475. }
  2476. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2477. token.systemId = '';
  2478. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  2479. break;
  2480. }
  2481. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2482. token.systemId = '';
  2483. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  2484. break;
  2485. }
  2486. case unicode_js_1.CODE_POINTS.EOF: {
  2487. this._err(error_codes_js_1.ERR.eofInDoctype);
  2488. token.forceQuirks = true;
  2489. this.emitCurrentDoctype(token);
  2490. this._emitEOFToken();
  2491. break;
  2492. }
  2493. default: {
  2494. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  2495. token.forceQuirks = true;
  2496. this.state = State.BOGUS_DOCTYPE;
  2497. this._stateBogusDoctype(cp);
  2498. }
  2499. }
  2500. }
  2501. // After DOCTYPE system keyword state
  2502. //------------------------------------------------------------------
  2503. _stateAfterDoctypeSystemKeyword(cp) {
  2504. const token = this.currentToken;
  2505. switch (cp) {
  2506. case unicode_js_1.CODE_POINTS.SPACE:
  2507. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2508. case unicode_js_1.CODE_POINTS.TABULATION:
  2509. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2510. this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
  2511. break;
  2512. }
  2513. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2514. this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword);
  2515. token.systemId = '';
  2516. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  2517. break;
  2518. }
  2519. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2520. this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword);
  2521. token.systemId = '';
  2522. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  2523. break;
  2524. }
  2525. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2526. this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier);
  2527. token.forceQuirks = true;
  2528. this.state = State.DATA;
  2529. this.emitCurrentDoctype(token);
  2530. break;
  2531. }
  2532. case unicode_js_1.CODE_POINTS.EOF: {
  2533. this._err(error_codes_js_1.ERR.eofInDoctype);
  2534. token.forceQuirks = true;
  2535. this.emitCurrentDoctype(token);
  2536. this._emitEOFToken();
  2537. break;
  2538. }
  2539. default: {
  2540. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  2541. token.forceQuirks = true;
  2542. this.state = State.BOGUS_DOCTYPE;
  2543. this._stateBogusDoctype(cp);
  2544. }
  2545. }
  2546. }
  2547. // Before DOCTYPE system identifier state
  2548. //------------------------------------------------------------------
  2549. _stateBeforeDoctypeSystemIdentifier(cp) {
  2550. const token = this.currentToken;
  2551. switch (cp) {
  2552. case unicode_js_1.CODE_POINTS.SPACE:
  2553. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2554. case unicode_js_1.CODE_POINTS.TABULATION:
  2555. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2556. // Ignore whitespace
  2557. break;
  2558. }
  2559. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2560. token.systemId = '';
  2561. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
  2562. break;
  2563. }
  2564. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2565. token.systemId = '';
  2566. this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
  2567. break;
  2568. }
  2569. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2570. this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier);
  2571. token.forceQuirks = true;
  2572. this.state = State.DATA;
  2573. this.emitCurrentDoctype(token);
  2574. break;
  2575. }
  2576. case unicode_js_1.CODE_POINTS.EOF: {
  2577. this._err(error_codes_js_1.ERR.eofInDoctype);
  2578. token.forceQuirks = true;
  2579. this.emitCurrentDoctype(token);
  2580. this._emitEOFToken();
  2581. break;
  2582. }
  2583. default: {
  2584. this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
  2585. token.forceQuirks = true;
  2586. this.state = State.BOGUS_DOCTYPE;
  2587. this._stateBogusDoctype(cp);
  2588. }
  2589. }
  2590. }
  2591. // DOCTYPE system identifier (double-quoted) state
  2592. //------------------------------------------------------------------
  2593. _stateDoctypeSystemIdentifierDoubleQuoted(cp) {
  2594. const token = this.currentToken;
  2595. switch (cp) {
  2596. case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
  2597. this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
  2598. break;
  2599. }
  2600. case unicode_js_1.CODE_POINTS.NULL: {
  2601. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2602. token.systemId += unicode_js_1.REPLACEMENT_CHARACTER;
  2603. break;
  2604. }
  2605. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2606. this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier);
  2607. token.forceQuirks = true;
  2608. this.emitCurrentDoctype(token);
  2609. this.state = State.DATA;
  2610. break;
  2611. }
  2612. case unicode_js_1.CODE_POINTS.EOF: {
  2613. this._err(error_codes_js_1.ERR.eofInDoctype);
  2614. token.forceQuirks = true;
  2615. this.emitCurrentDoctype(token);
  2616. this._emitEOFToken();
  2617. break;
  2618. }
  2619. default: {
  2620. token.systemId += String.fromCodePoint(cp);
  2621. }
  2622. }
  2623. }
  2624. // DOCTYPE system identifier (single-quoted) state
  2625. //------------------------------------------------------------------
  2626. _stateDoctypeSystemIdentifierSingleQuoted(cp) {
  2627. const token = this.currentToken;
  2628. switch (cp) {
  2629. case unicode_js_1.CODE_POINTS.APOSTROPHE: {
  2630. this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
  2631. break;
  2632. }
  2633. case unicode_js_1.CODE_POINTS.NULL: {
  2634. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2635. token.systemId += unicode_js_1.REPLACEMENT_CHARACTER;
  2636. break;
  2637. }
  2638. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2639. this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier);
  2640. token.forceQuirks = true;
  2641. this.emitCurrentDoctype(token);
  2642. this.state = State.DATA;
  2643. break;
  2644. }
  2645. case unicode_js_1.CODE_POINTS.EOF: {
  2646. this._err(error_codes_js_1.ERR.eofInDoctype);
  2647. token.forceQuirks = true;
  2648. this.emitCurrentDoctype(token);
  2649. this._emitEOFToken();
  2650. break;
  2651. }
  2652. default: {
  2653. token.systemId += String.fromCodePoint(cp);
  2654. }
  2655. }
  2656. }
  2657. // After DOCTYPE system identifier state
  2658. //------------------------------------------------------------------
  2659. _stateAfterDoctypeSystemIdentifier(cp) {
  2660. const token = this.currentToken;
  2661. switch (cp) {
  2662. case unicode_js_1.CODE_POINTS.SPACE:
  2663. case unicode_js_1.CODE_POINTS.LINE_FEED:
  2664. case unicode_js_1.CODE_POINTS.TABULATION:
  2665. case unicode_js_1.CODE_POINTS.FORM_FEED: {
  2666. // Ignore whitespace
  2667. break;
  2668. }
  2669. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2670. this.emitCurrentDoctype(token);
  2671. this.state = State.DATA;
  2672. break;
  2673. }
  2674. case unicode_js_1.CODE_POINTS.EOF: {
  2675. this._err(error_codes_js_1.ERR.eofInDoctype);
  2676. token.forceQuirks = true;
  2677. this.emitCurrentDoctype(token);
  2678. this._emitEOFToken();
  2679. break;
  2680. }
  2681. default: {
  2682. this._err(error_codes_js_1.ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
  2683. this.state = State.BOGUS_DOCTYPE;
  2684. this._stateBogusDoctype(cp);
  2685. }
  2686. }
  2687. }
  2688. // Bogus DOCTYPE state
  2689. //------------------------------------------------------------------
  2690. _stateBogusDoctype(cp) {
  2691. const token = this.currentToken;
  2692. switch (cp) {
  2693. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2694. this.emitCurrentDoctype(token);
  2695. this.state = State.DATA;
  2696. break;
  2697. }
  2698. case unicode_js_1.CODE_POINTS.NULL: {
  2699. this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
  2700. break;
  2701. }
  2702. case unicode_js_1.CODE_POINTS.EOF: {
  2703. this.emitCurrentDoctype(token);
  2704. this._emitEOFToken();
  2705. break;
  2706. }
  2707. default:
  2708. // Do nothing
  2709. }
  2710. }
  2711. // CDATA section state
  2712. //------------------------------------------------------------------
  2713. _stateCdataSection(cp) {
  2714. switch (cp) {
  2715. case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: {
  2716. this.state = State.CDATA_SECTION_BRACKET;
  2717. break;
  2718. }
  2719. case unicode_js_1.CODE_POINTS.EOF: {
  2720. this._err(error_codes_js_1.ERR.eofInCdata);
  2721. this._emitEOFToken();
  2722. break;
  2723. }
  2724. default: {
  2725. this._emitCodePoint(cp);
  2726. }
  2727. }
  2728. }
  2729. // CDATA section bracket state
  2730. //------------------------------------------------------------------
  2731. _stateCdataSectionBracket(cp) {
  2732. if (cp === unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET) {
  2733. this.state = State.CDATA_SECTION_END;
  2734. }
  2735. else {
  2736. this._emitChars(']');
  2737. this.state = State.CDATA_SECTION;
  2738. this._stateCdataSection(cp);
  2739. }
  2740. }
  2741. // CDATA section end state
  2742. //------------------------------------------------------------------
  2743. _stateCdataSectionEnd(cp) {
  2744. switch (cp) {
  2745. case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
  2746. this.state = State.DATA;
  2747. break;
  2748. }
  2749. case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: {
  2750. this._emitChars(']');
  2751. break;
  2752. }
  2753. default: {
  2754. this._emitChars(']]');
  2755. this.state = State.CDATA_SECTION;
  2756. this._stateCdataSection(cp);
  2757. }
  2758. }
  2759. }
  2760. // Character reference state
  2761. //------------------------------------------------------------------
  2762. _stateCharacterReference(cp) {
  2763. if (cp === unicode_js_1.CODE_POINTS.NUMBER_SIGN) {
  2764. this.state = State.NUMERIC_CHARACTER_REFERENCE;
  2765. }
  2766. else if (isAsciiAlphaNumeric(cp)) {
  2767. this.state = State.NAMED_CHARACTER_REFERENCE;
  2768. this._stateNamedCharacterReference(cp);
  2769. }
  2770. else {
  2771. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND);
  2772. this._reconsumeInState(this.returnState, cp);
  2773. }
  2774. }
  2775. // Named character reference state
  2776. //------------------------------------------------------------------
  2777. _stateNamedCharacterReference(cp) {
  2778. const matchResult = this._matchNamedCharacterReference(cp);
  2779. //NOTE: Matching can be abrupted by hibernation. In that case, match
  2780. //results are no longer valid and we will need to start over.
  2781. if (this._ensureHibernation()) {
  2782. // Stay in the state, try again.
  2783. }
  2784. else if (matchResult) {
  2785. for (let i = 0; i < matchResult.length; i++) {
  2786. this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
  2787. }
  2788. this.state = this.returnState;
  2789. }
  2790. else {
  2791. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND);
  2792. this.state = State.AMBIGUOUS_AMPERSAND;
  2793. }
  2794. }
  2795. // Ambiguos ampersand state
  2796. //------------------------------------------------------------------
  2797. _stateAmbiguousAmpersand(cp) {
  2798. if (isAsciiAlphaNumeric(cp)) {
  2799. this._flushCodePointConsumedAsCharacterReference(cp);
  2800. }
  2801. else {
  2802. if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) {
  2803. this._err(error_codes_js_1.ERR.unknownNamedCharacterReference);
  2804. }
  2805. this._reconsumeInState(this.returnState, cp);
  2806. }
  2807. }
  2808. // Numeric character reference state
  2809. //------------------------------------------------------------------
  2810. _stateNumericCharacterReference(cp) {
  2811. this.charRefCode = 0;
  2812. if (cp === unicode_js_1.CODE_POINTS.LATIN_SMALL_X || cp === unicode_js_1.CODE_POINTS.LATIN_CAPITAL_X) {
  2813. this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
  2814. }
  2815. // Inlined decimal character reference start state
  2816. else if (isAsciiDigit(cp)) {
  2817. this.state = State.DECIMAL_CHARACTER_REFERENCE;
  2818. this._stateDecimalCharacterReference(cp);
  2819. }
  2820. else {
  2821. this._err(error_codes_js_1.ERR.absenceOfDigitsInNumericCharacterReference);
  2822. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND);
  2823. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.NUMBER_SIGN);
  2824. this._reconsumeInState(this.returnState, cp);
  2825. }
  2826. }
  2827. // Hexademical character reference start state
  2828. //------------------------------------------------------------------
  2829. _stateHexademicalCharacterReferenceStart(cp) {
  2830. if (isAsciiHexDigit(cp)) {
  2831. this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
  2832. this._stateHexademicalCharacterReference(cp);
  2833. }
  2834. else {
  2835. this._err(error_codes_js_1.ERR.absenceOfDigitsInNumericCharacterReference);
  2836. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND);
  2837. this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.NUMBER_SIGN);
  2838. this._unconsume(2);
  2839. this.state = this.returnState;
  2840. }
  2841. }
  2842. // Hexademical character reference state
  2843. //------------------------------------------------------------------
  2844. _stateHexademicalCharacterReference(cp) {
  2845. if (isAsciiUpperHexDigit(cp)) {
  2846. this.charRefCode = this.charRefCode * 16 + cp - 0x37;
  2847. }
  2848. else if (isAsciiLowerHexDigit(cp)) {
  2849. this.charRefCode = this.charRefCode * 16 + cp - 0x57;
  2850. }
  2851. else if (isAsciiDigit(cp)) {
  2852. this.charRefCode = this.charRefCode * 16 + cp - 0x30;
  2853. }
  2854. else if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) {
  2855. this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
  2856. }
  2857. else {
  2858. this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference);
  2859. this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
  2860. this._stateNumericCharacterReferenceEnd(cp);
  2861. }
  2862. }
  2863. // Decimal character reference state
  2864. //------------------------------------------------------------------
  2865. _stateDecimalCharacterReference(cp) {
  2866. if (isAsciiDigit(cp)) {
  2867. this.charRefCode = this.charRefCode * 10 + cp - 0x30;
  2868. }
  2869. else if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) {
  2870. this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
  2871. }
  2872. else {
  2873. this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference);
  2874. this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
  2875. this._stateNumericCharacterReferenceEnd(cp);
  2876. }
  2877. }
  2878. // Numeric character reference end state
  2879. //------------------------------------------------------------------
  2880. _stateNumericCharacterReferenceEnd(cp) {
  2881. if (this.charRefCode === unicode_js_1.CODE_POINTS.NULL) {
  2882. this._err(error_codes_js_1.ERR.nullCharacterReference);
  2883. this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER;
  2884. }
  2885. else if (this.charRefCode > 1114111) {
  2886. this._err(error_codes_js_1.ERR.characterReferenceOutsideUnicodeRange);
  2887. this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER;
  2888. }
  2889. else if ((0, unicode_js_1.isSurrogate)(this.charRefCode)) {
  2890. this._err(error_codes_js_1.ERR.surrogateCharacterReference);
  2891. this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER;
  2892. }
  2893. else if ((0, unicode_js_1.isUndefinedCodePoint)(this.charRefCode)) {
  2894. this._err(error_codes_js_1.ERR.noncharacterCharacterReference);
  2895. }
  2896. else if ((0, unicode_js_1.isControlCodePoint)(this.charRefCode) || this.charRefCode === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) {
  2897. this._err(error_codes_js_1.ERR.controlCharacterReference);
  2898. const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
  2899. if (replacement !== undefined) {
  2900. this.charRefCode = replacement;
  2901. }
  2902. }
  2903. this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
  2904. this._reconsumeInState(this.returnState, cp);
  2905. }
  2906. }
  2907. exports.Tokenizer = Tokenizer;
  2908. //# sourceMappingURL=index.js.map