index.d.ts 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import { Preprocessor } from './preprocessor.js';
  2. import { type CharacterToken, type DoctypeToken, type TagToken, type EOFToken, type CommentToken } from '../common/token.js';
  3. import { type ParserErrorHandler } from '../common/error-codes.js';
  4. declare const enum State {
  5. DATA = 0,
  6. RCDATA = 1,
  7. RAWTEXT = 2,
  8. SCRIPT_DATA = 3,
  9. PLAINTEXT = 4,
  10. TAG_OPEN = 5,
  11. END_TAG_OPEN = 6,
  12. TAG_NAME = 7,
  13. RCDATA_LESS_THAN_SIGN = 8,
  14. RCDATA_END_TAG_OPEN = 9,
  15. RCDATA_END_TAG_NAME = 10,
  16. RAWTEXT_LESS_THAN_SIGN = 11,
  17. RAWTEXT_END_TAG_OPEN = 12,
  18. RAWTEXT_END_TAG_NAME = 13,
  19. SCRIPT_DATA_LESS_THAN_SIGN = 14,
  20. SCRIPT_DATA_END_TAG_OPEN = 15,
  21. SCRIPT_DATA_END_TAG_NAME = 16,
  22. SCRIPT_DATA_ESCAPE_START = 17,
  23. SCRIPT_DATA_ESCAPE_START_DASH = 18,
  24. SCRIPT_DATA_ESCAPED = 19,
  25. SCRIPT_DATA_ESCAPED_DASH = 20,
  26. SCRIPT_DATA_ESCAPED_DASH_DASH = 21,
  27. SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 22,
  28. SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 23,
  29. SCRIPT_DATA_ESCAPED_END_TAG_NAME = 24,
  30. SCRIPT_DATA_DOUBLE_ESCAPE_START = 25,
  31. SCRIPT_DATA_DOUBLE_ESCAPED = 26,
  32. SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 27,
  33. SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 28,
  34. SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 29,
  35. SCRIPT_DATA_DOUBLE_ESCAPE_END = 30,
  36. BEFORE_ATTRIBUTE_NAME = 31,
  37. ATTRIBUTE_NAME = 32,
  38. AFTER_ATTRIBUTE_NAME = 33,
  39. BEFORE_ATTRIBUTE_VALUE = 34,
  40. ATTRIBUTE_VALUE_DOUBLE_QUOTED = 35,
  41. ATTRIBUTE_VALUE_SINGLE_QUOTED = 36,
  42. ATTRIBUTE_VALUE_UNQUOTED = 37,
  43. AFTER_ATTRIBUTE_VALUE_QUOTED = 38,
  44. SELF_CLOSING_START_TAG = 39,
  45. BOGUS_COMMENT = 40,
  46. MARKUP_DECLARATION_OPEN = 41,
  47. COMMENT_START = 42,
  48. COMMENT_START_DASH = 43,
  49. COMMENT = 44,
  50. COMMENT_LESS_THAN_SIGN = 45,
  51. COMMENT_LESS_THAN_SIGN_BANG = 46,
  52. COMMENT_LESS_THAN_SIGN_BANG_DASH = 47,
  53. COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 48,
  54. COMMENT_END_DASH = 49,
  55. COMMENT_END = 50,
  56. COMMENT_END_BANG = 51,
  57. DOCTYPE = 52,
  58. BEFORE_DOCTYPE_NAME = 53,
  59. DOCTYPE_NAME = 54,
  60. AFTER_DOCTYPE_NAME = 55,
  61. AFTER_DOCTYPE_PUBLIC_KEYWORD = 56,
  62. BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 57,
  63. DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 58,
  64. DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 59,
  65. AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 60,
  66. BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 61,
  67. AFTER_DOCTYPE_SYSTEM_KEYWORD = 62,
  68. BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 63,
  69. DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 64,
  70. DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 65,
  71. AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 66,
  72. BOGUS_DOCTYPE = 67,
  73. CDATA_SECTION = 68,
  74. CDATA_SECTION_BRACKET = 69,
  75. CDATA_SECTION_END = 70,
  76. CHARACTER_REFERENCE = 71,
  77. NAMED_CHARACTER_REFERENCE = 72,
  78. AMBIGUOUS_AMPERSAND = 73,
  79. NUMERIC_CHARACTER_REFERENCE = 74,
  80. HEXADEMICAL_CHARACTER_REFERENCE_START = 75,
  81. HEXADEMICAL_CHARACTER_REFERENCE = 76,
  82. DECIMAL_CHARACTER_REFERENCE = 77,
  83. NUMERIC_CHARACTER_REFERENCE_END = 78
  84. }
  85. export declare const TokenizerMode: {
  86. readonly DATA: State.DATA;
  87. readonly RCDATA: State.RCDATA;
  88. readonly RAWTEXT: State.RAWTEXT;
  89. readonly SCRIPT_DATA: State.SCRIPT_DATA;
  90. readonly PLAINTEXT: State.PLAINTEXT;
  91. readonly CDATA_SECTION: State.CDATA_SECTION;
  92. };
  93. export interface TokenizerOptions {
  94. sourceCodeLocationInfo?: boolean;
  95. }
  96. export interface TokenHandler {
  97. onComment(token: CommentToken): void;
  98. onDoctype(token: DoctypeToken): void;
  99. onStartTag(token: TagToken): void;
  100. onEndTag(token: TagToken): void;
  101. onEof(token: EOFToken): void;
  102. onCharacter(token: CharacterToken): void;
  103. onNullCharacter(token: CharacterToken): void;
  104. onWhitespaceCharacter(token: CharacterToken): void;
  105. onParseError?: ParserErrorHandler | null;
  106. }
  107. export declare class Tokenizer {
  108. private options;
  109. private handler;
  110. preprocessor: Preprocessor;
  111. private paused;
  112. /** Ensures that the parsing loop isn't run multiple times at once. */
  113. private inLoop;
  114. /**
  115. * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
  116. * and that it is not an integration point for either MathML or HTML.
  117. *
  118. * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
  119. */
  120. inForeignNode: boolean;
  121. lastStartTagName: string;
  122. active: boolean;
  123. state: State;
  124. private returnState;
  125. private charRefCode;
  126. private consumedAfterSnapshot;
  127. private currentLocation;
  128. private currentCharacterToken;
  129. private currentToken;
  130. private currentAttr;
  131. constructor(options: TokenizerOptions, handler: TokenHandler);
  132. private _err;
  133. private getCurrentLocation;
  134. private _runParsingLoop;
  135. pause(): void;
  136. resume(writeCallback?: () => void): void;
  137. write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void;
  138. insertHtmlAtCurrentPos(chunk: string): void;
  139. private _ensureHibernation;
  140. private _consume;
  141. private _unconsume;
  142. private _reconsumeInState;
  143. private _advanceBy;
  144. private _consumeSequenceIfMatch;
  145. private _createStartTagToken;
  146. private _createEndTagToken;
  147. private _createCommentToken;
  148. private _createDoctypeToken;
  149. private _createCharacterToken;
  150. private _createAttr;
  151. private _leaveAttrName;
  152. private _leaveAttrValue;
  153. private prepareToken;
  154. private emitCurrentTagToken;
  155. private emitCurrentComment;
  156. private emitCurrentDoctype;
  157. private _emitCurrentCharacterToken;
  158. private _emitEOFToken;
  159. private _appendCharToCurrentCharacterToken;
  160. private _emitCodePoint;
  161. private _emitChars;
  162. private _matchNamedCharacterReference;
  163. private _isCharacterReferenceInAttribute;
  164. private _flushCodePointConsumedAsCharacterReference;
  165. private _callState;
  166. private _stateData;
  167. private _stateRcdata;
  168. private _stateRawtext;
  169. private _stateScriptData;
  170. private _statePlaintext;
  171. private _stateTagOpen;
  172. private _stateEndTagOpen;
  173. private _stateTagName;
  174. private _stateRcdataLessThanSign;
  175. private _stateRcdataEndTagOpen;
  176. private handleSpecialEndTag;
  177. private _stateRcdataEndTagName;
  178. private _stateRawtextLessThanSign;
  179. private _stateRawtextEndTagOpen;
  180. private _stateRawtextEndTagName;
  181. private _stateScriptDataLessThanSign;
  182. private _stateScriptDataEndTagOpen;
  183. private _stateScriptDataEndTagName;
  184. private _stateScriptDataEscapeStart;
  185. private _stateScriptDataEscapeStartDash;
  186. private _stateScriptDataEscaped;
  187. private _stateScriptDataEscapedDash;
  188. private _stateScriptDataEscapedDashDash;
  189. private _stateScriptDataEscapedLessThanSign;
  190. private _stateScriptDataEscapedEndTagOpen;
  191. private _stateScriptDataEscapedEndTagName;
  192. private _stateScriptDataDoubleEscapeStart;
  193. private _stateScriptDataDoubleEscaped;
  194. private _stateScriptDataDoubleEscapedDash;
  195. private _stateScriptDataDoubleEscapedDashDash;
  196. private _stateScriptDataDoubleEscapedLessThanSign;
  197. private _stateScriptDataDoubleEscapeEnd;
  198. private _stateBeforeAttributeName;
  199. private _stateAttributeName;
  200. private _stateAfterAttributeName;
  201. private _stateBeforeAttributeValue;
  202. private _stateAttributeValueDoubleQuoted;
  203. private _stateAttributeValueSingleQuoted;
  204. private _stateAttributeValueUnquoted;
  205. private _stateAfterAttributeValueQuoted;
  206. private _stateSelfClosingStartTag;
  207. private _stateBogusComment;
  208. private _stateMarkupDeclarationOpen;
  209. private _stateCommentStart;
  210. private _stateCommentStartDash;
  211. private _stateComment;
  212. private _stateCommentLessThanSign;
  213. private _stateCommentLessThanSignBang;
  214. private _stateCommentLessThanSignBangDash;
  215. private _stateCommentLessThanSignBangDashDash;
  216. private _stateCommentEndDash;
  217. private _stateCommentEnd;
  218. private _stateCommentEndBang;
  219. private _stateDoctype;
  220. private _stateBeforeDoctypeName;
  221. private _stateDoctypeName;
  222. private _stateAfterDoctypeName;
  223. private _stateAfterDoctypePublicKeyword;
  224. private _stateBeforeDoctypePublicIdentifier;
  225. private _stateDoctypePublicIdentifierDoubleQuoted;
  226. private _stateDoctypePublicIdentifierSingleQuoted;
  227. private _stateAfterDoctypePublicIdentifier;
  228. private _stateBetweenDoctypePublicAndSystemIdentifiers;
  229. private _stateAfterDoctypeSystemKeyword;
  230. private _stateBeforeDoctypeSystemIdentifier;
  231. private _stateDoctypeSystemIdentifierDoubleQuoted;
  232. private _stateDoctypeSystemIdentifierSingleQuoted;
  233. private _stateAfterDoctypeSystemIdentifier;
  234. private _stateBogusDoctype;
  235. private _stateCdataSection;
  236. private _stateCdataSectionBracket;
  237. private _stateCdataSectionEnd;
  238. private _stateCharacterReference;
  239. private _stateNamedCharacterReference;
  240. private _stateAmbiguousAmpersand;
  241. private _stateNumericCharacterReference;
  242. private _stateHexademicalCharacterReferenceStart;
  243. private _stateHexademicalCharacterReference;
  244. private _stateDecimalCharacterReference;
  245. private _stateNumericCharacterReferenceEnd;
  246. }
  247. export {};
  248. //# sourceMappingURL=index.d.ts.map