saxes.js 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.SaxesParser = exports.EVENTS = void 0;
  4. const ed5 = require("xmlchars/xml/1.0/ed5");
  5. const ed2 = require("xmlchars/xml/1.1/ed2");
  6. const NSed3 = require("xmlchars/xmlns/1.0/ed3");
  7. var isS = ed5.isS;
  8. var isChar10 = ed5.isChar;
  9. var isNameStartChar = ed5.isNameStartChar;
  10. var isNameChar = ed5.isNameChar;
  11. var S_LIST = ed5.S_LIST;
  12. var NAME_RE = ed5.NAME_RE;
  13. var isChar11 = ed2.isChar;
  14. var isNCNameStartChar = NSed3.isNCNameStartChar;
  15. var isNCNameChar = NSed3.isNCNameChar;
  16. var NC_NAME_RE = NSed3.NC_NAME_RE;
  17. const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
  18. const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
  19. const rootNS = {
  20. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
  21. __proto__: null,
  22. xml: XML_NAMESPACE,
  23. xmlns: XMLNS_NAMESPACE,
  24. };
  25. const XML_ENTITIES = {
  26. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
  27. __proto__: null,
  28. amp: "&",
  29. gt: ">",
  30. lt: "<",
  31. quot: "\"",
  32. apos: "'",
  33. };
  34. // EOC: end-of-chunk
  35. const EOC = -1;
  36. const NL_LIKE = -2;
  37. const S_BEGIN = 0; // Initial state.
  38. const S_BEGIN_WHITESPACE = 1; // leading whitespace
  39. const S_DOCTYPE = 2; // <!DOCTYPE
  40. const S_DOCTYPE_QUOTE = 3; // <!DOCTYPE "//blah
  41. const S_DTD = 4; // <!DOCTYPE "//blah" [ ...
  42. const S_DTD_QUOTED = 5; // <!DOCTYPE "//blah" [ "foo
  43. const S_DTD_OPEN_WAKA = 6;
  44. const S_DTD_OPEN_WAKA_BANG = 7;
  45. const S_DTD_COMMENT = 8; // <!--
  46. const S_DTD_COMMENT_ENDING = 9; // <!-- blah -
  47. const S_DTD_COMMENT_ENDED = 10; // <!-- blah --
  48. const S_DTD_PI = 11; // <?
  49. const S_DTD_PI_ENDING = 12; // <?hi "there" ?
  50. const S_TEXT = 13; // general stuff
  51. const S_ENTITY = 14; // &amp and such
  52. const S_OPEN_WAKA = 15; // <
  53. const S_OPEN_WAKA_BANG = 16; // <!...
  54. const S_COMMENT = 17; // <!--
  55. const S_COMMENT_ENDING = 18; // <!-- blah -
  56. const S_COMMENT_ENDED = 19; // <!-- blah --
  57. const S_CDATA = 20; // <![CDATA[ something
  58. const S_CDATA_ENDING = 21; // ]
  59. const S_CDATA_ENDING_2 = 22; // ]]
  60. const S_PI_FIRST_CHAR = 23; // <?hi, first char
  61. const S_PI_REST = 24; // <?hi, rest of the name
  62. const S_PI_BODY = 25; // <?hi there
  63. const S_PI_ENDING = 26; // <?hi "there" ?
  64. const S_XML_DECL_NAME_START = 27; // <?xml
  65. const S_XML_DECL_NAME = 28; // <?xml foo
  66. const S_XML_DECL_EQ = 29; // <?xml foo=
  67. const S_XML_DECL_VALUE_START = 30; // <?xml foo=
  68. const S_XML_DECL_VALUE = 31; // <?xml foo="bar"
  69. const S_XML_DECL_SEPARATOR = 32; // <?xml foo="bar"
  70. const S_XML_DECL_ENDING = 33; // <?xml ... ?
  71. const S_OPEN_TAG = 34; // <strong
  72. const S_OPEN_TAG_SLASH = 35; // <strong /
  73. const S_ATTRIB = 36; // <a
  74. const S_ATTRIB_NAME = 37; // <a foo
  75. const S_ATTRIB_NAME_SAW_WHITE = 38; // <a foo _
  76. const S_ATTRIB_VALUE = 39; // <a foo=
  77. const S_ATTRIB_VALUE_QUOTED = 40; // <a foo="bar
  78. const S_ATTRIB_VALUE_CLOSED = 41; // <a foo="bar"
  79. const S_ATTRIB_VALUE_UNQUOTED = 42; // <a foo=bar
  80. const S_CLOSE_TAG = 43; // </a
  81. const S_CLOSE_TAG_SAW_WHITE = 44; // </a >
  82. const TAB = 9;
  83. const NL = 0xA;
  84. const CR = 0xD;
  85. const SPACE = 0x20;
  86. const BANG = 0x21;
  87. const DQUOTE = 0x22;
  88. const AMP = 0x26;
  89. const SQUOTE = 0x27;
  90. const MINUS = 0x2D;
  91. const FORWARD_SLASH = 0x2F;
  92. const SEMICOLON = 0x3B;
  93. const LESS = 0x3C;
  94. const EQUAL = 0x3D;
  95. const GREATER = 0x3E;
  96. const QUESTION = 0x3F;
  97. const OPEN_BRACKET = 0x5B;
  98. const CLOSE_BRACKET = 0x5D;
  99. const NEL = 0x85;
  100. const LS = 0x2028; // Line Separator
  101. const isQuote = (c) => c === DQUOTE || c === SQUOTE;
  102. const QUOTES = [DQUOTE, SQUOTE];
  103. const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
  104. const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
  105. const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
  106. const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
  107. function nsPairCheck(parser, prefix, uri) {
  108. switch (prefix) {
  109. case "xml":
  110. if (uri !== XML_NAMESPACE) {
  111. parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
  112. }
  113. break;
  114. case "xmlns":
  115. if (uri !== XMLNS_NAMESPACE) {
  116. parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
  117. }
  118. break;
  119. default:
  120. }
  121. switch (uri) {
  122. case XMLNS_NAMESPACE:
  123. parser.fail(prefix === "" ?
  124. `the default namespace may not be set to ${uri}.` :
  125. `may not assign a prefix (even "xmlns") to the URI \
  126. ${XMLNS_NAMESPACE}.`);
  127. break;
  128. case XML_NAMESPACE:
  129. switch (prefix) {
  130. case "xml":
  131. // Assinging the XML namespace to "xml" is fine.
  132. break;
  133. case "":
  134. parser.fail(`the default namespace may not be set to ${uri}.`);
  135. break;
  136. default:
  137. parser.fail("may not assign the xml namespace to another prefix.");
  138. }
  139. break;
  140. default:
  141. }
  142. }
  143. function nsMappingCheck(parser, mapping) {
  144. for (const local of Object.keys(mapping)) {
  145. nsPairCheck(parser, local, mapping[local]);
  146. }
  147. }
  148. const isNCName = (name) => NC_NAME_RE.test(name);
  149. const isName = (name) => NAME_RE.test(name);
  150. const FORBIDDEN_START = 0;
  151. const FORBIDDEN_BRACKET = 1;
  152. const FORBIDDEN_BRACKET_BRACKET = 2;
  153. /**
  154. * The list of supported events.
  155. */
  156. exports.EVENTS = [
  157. "xmldecl",
  158. "text",
  159. "processinginstruction",
  160. "doctype",
  161. "comment",
  162. "opentagstart",
  163. "attribute",
  164. "opentag",
  165. "closetag",
  166. "cdata",
  167. "error",
  168. "end",
  169. "ready",
  170. ];
  171. const EVENT_NAME_TO_HANDLER_NAME = {
  172. xmldecl: "xmldeclHandler",
  173. text: "textHandler",
  174. processinginstruction: "piHandler",
  175. doctype: "doctypeHandler",
  176. comment: "commentHandler",
  177. opentagstart: "openTagStartHandler",
  178. attribute: "attributeHandler",
  179. opentag: "openTagHandler",
  180. closetag: "closeTagHandler",
  181. cdata: "cdataHandler",
  182. error: "errorHandler",
  183. end: "endHandler",
  184. ready: "readyHandler",
  185. };
  186. // eslint-disable-next-line @typescript-eslint/ban-types
  187. class SaxesParser {
  188. /**
  189. * @param opt The parser options.
  190. */
  191. constructor(opt) {
  192. this.opt = opt !== null && opt !== void 0 ? opt : {};
  193. this.fragmentOpt = !!this.opt.fragment;
  194. const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
  195. this.trackPosition = this.opt.position !== false;
  196. this.fileName = this.opt.fileName;
  197. if (xmlnsOpt) {
  198. // This is the function we use to perform name checks on PIs and entities.
  199. // When namespaces are used, colons are not allowed in PI target names or
  200. // entity names. So the check depends on whether namespaces are used. See:
  201. //
  202. // https://www.w3.org/XML/xml-names-19990114-errata.html
  203. // NE08
  204. //
  205. this.nameStartCheck = isNCNameStartChar;
  206. this.nameCheck = isNCNameChar;
  207. this.isName = isNCName;
  208. // eslint-disable-next-line @typescript-eslint/unbound-method
  209. this.processAttribs = this.processAttribsNS;
  210. // eslint-disable-next-line @typescript-eslint/unbound-method
  211. this.pushAttrib = this.pushAttribNS;
  212. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
  213. this.ns = Object.assign({ __proto__: null }, rootNS);
  214. const additional = this.opt.additionalNamespaces;
  215. if (additional != null) {
  216. nsMappingCheck(this, additional);
  217. Object.assign(this.ns, additional);
  218. }
  219. }
  220. else {
  221. this.nameStartCheck = isNameStartChar;
  222. this.nameCheck = isNameChar;
  223. this.isName = isName;
  224. // eslint-disable-next-line @typescript-eslint/unbound-method
  225. this.processAttribs = this.processAttribsPlain;
  226. // eslint-disable-next-line @typescript-eslint/unbound-method
  227. this.pushAttrib = this.pushAttribPlain;
  228. }
  229. //
  230. // The order of the members in this table needs to correspond to the state
  231. // numbers given to the states that correspond to the methods being recorded
  232. // here.
  233. //
  234. this.stateTable = [
  235. /* eslint-disable @typescript-eslint/unbound-method */
  236. this.sBegin,
  237. this.sBeginWhitespace,
  238. this.sDoctype,
  239. this.sDoctypeQuote,
  240. this.sDTD,
  241. this.sDTDQuoted,
  242. this.sDTDOpenWaka,
  243. this.sDTDOpenWakaBang,
  244. this.sDTDComment,
  245. this.sDTDCommentEnding,
  246. this.sDTDCommentEnded,
  247. this.sDTDPI,
  248. this.sDTDPIEnding,
  249. this.sText,
  250. this.sEntity,
  251. this.sOpenWaka,
  252. this.sOpenWakaBang,
  253. this.sComment,
  254. this.sCommentEnding,
  255. this.sCommentEnded,
  256. this.sCData,
  257. this.sCDataEnding,
  258. this.sCDataEnding2,
  259. this.sPIFirstChar,
  260. this.sPIRest,
  261. this.sPIBody,
  262. this.sPIEnding,
  263. this.sXMLDeclNameStart,
  264. this.sXMLDeclName,
  265. this.sXMLDeclEq,
  266. this.sXMLDeclValueStart,
  267. this.sXMLDeclValue,
  268. this.sXMLDeclSeparator,
  269. this.sXMLDeclEnding,
  270. this.sOpenTag,
  271. this.sOpenTagSlash,
  272. this.sAttrib,
  273. this.sAttribName,
  274. this.sAttribNameSawWhite,
  275. this.sAttribValue,
  276. this.sAttribValueQuoted,
  277. this.sAttribValueClosed,
  278. this.sAttribValueUnquoted,
  279. this.sCloseTag,
  280. this.sCloseTagSawWhite,
  281. /* eslint-enable @typescript-eslint/unbound-method */
  282. ];
  283. this._init();
  284. }
  285. /**
  286. * Indicates whether or not the parser is closed. If ``true``, wait for
  287. * the ``ready`` event to write again.
  288. */
  289. get closed() {
  290. return this._closed;
  291. }
  292. _init() {
  293. var _a;
  294. this.openWakaBang = "";
  295. this.text = "";
  296. this.name = "";
  297. this.piTarget = "";
  298. this.entity = "";
  299. this.q = null;
  300. this.tags = [];
  301. this.tag = null;
  302. this.topNS = null;
  303. this.chunk = "";
  304. this.chunkPosition = 0;
  305. this.i = 0;
  306. this.prevI = 0;
  307. this.carriedFromPrevious = undefined;
  308. this.forbiddenState = FORBIDDEN_START;
  309. this.attribList = [];
  310. // The logic is organized so as to minimize the need to check
  311. // this.opt.fragment while parsing.
  312. const { fragmentOpt } = this;
  313. this.state = fragmentOpt ? S_TEXT : S_BEGIN;
  314. // We want these to be all true if we are dealing with a fragment.
  315. this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot =
  316. this.sawRoot = fragmentOpt;
  317. // An XML declaration is intially possible only when parsing whole
  318. // documents.
  319. this.xmlDeclPossible = !fragmentOpt;
  320. this.xmlDeclExpects = ["version"];
  321. this.entityReturnState = undefined;
  322. let { defaultXMLVersion } = this.opt;
  323. if (defaultXMLVersion === undefined) {
  324. if (this.opt.forceXMLVersion === true) {
  325. throw new Error("forceXMLVersion set but defaultXMLVersion is not set");
  326. }
  327. defaultXMLVersion = "1.0";
  328. }
  329. this.setXMLVersion(defaultXMLVersion);
  330. this.positionAtNewLine = 0;
  331. this.doctype = false;
  332. this._closed = false;
  333. this.xmlDecl = {
  334. version: undefined,
  335. encoding: undefined,
  336. standalone: undefined,
  337. };
  338. this.line = 1;
  339. this.column = 0;
  340. this.ENTITIES = Object.create(XML_ENTITIES);
  341. (_a = this.readyHandler) === null || _a === void 0 ? void 0 : _a.call(this);
  342. }
  343. /**
  344. * The stream position the parser is currently looking at. This field is
  345. * zero-based.
  346. *
  347. * This field is not based on counting Unicode characters but is to be
  348. * interpreted as a plain index into a JavaScript string.
  349. */
  350. get position() {
  351. return this.chunkPosition + this.i;
  352. }
  353. /**
  354. * The column number of the next character to be read by the parser. *
  355. * This field is zero-based. (The first column in a line is 0.)
  356. *
  357. * This field reports the index at which the next character would be in the
  358. * line if the line were represented as a JavaScript string. Note that this
  359. * *can* be different to a count based on the number of *Unicode characters*
  360. * due to how JavaScript handles astral plane characters.
  361. *
  362. * See [[column]] for a number that corresponds to a count of Unicode
  363. * characters.
  364. */
  365. get columnIndex() {
  366. return this.position - this.positionAtNewLine;
  367. }
  368. /**
  369. * Set an event listener on an event. The parser supports one handler per
  370. * event type. If you try to set an event handler over an existing handler,
  371. * the old handler is silently overwritten.
  372. *
  373. * @param name The event to listen to.
  374. *
  375. * @param handler The handler to set.
  376. */
  377. on(name, handler) {
  378. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access
  379. this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler;
  380. }
  381. /**
  382. * Unset an event handler.
  383. *
  384. * @parma name The event to stop listening to.
  385. */
  386. off(name) {
  387. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access
  388. this[EVENT_NAME_TO_HANDLER_NAME[name]] = undefined;
  389. }
  390. /**
  391. * Make an error object. The error object will have a message that contains
  392. * the ``fileName`` option passed at the creation of the parser. If position
  393. * tracking was turned on, it will also have line and column number
  394. * information.
  395. *
  396. * @param message The message describing the error to report.
  397. *
  398. * @returns An error object with a properly formatted message.
  399. */
  400. makeError(message) {
  401. var _a;
  402. let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : "";
  403. if (this.trackPosition) {
  404. if (msg.length > 0) {
  405. msg += ":";
  406. }
  407. msg += `${this.line}:${this.column}`;
  408. }
  409. if (msg.length > 0) {
  410. msg += ": ";
  411. }
  412. return new Error(msg + message);
  413. }
  414. /**
  415. * Report a parsing error. This method is made public so that client code may
  416. * check for issues that are outside the scope of this project and can report
  417. * errors.
  418. *
  419. * @param message The error to report.
  420. *
  421. * @returns this
  422. */
  423. fail(message) {
  424. const err = this.makeError(message);
  425. const handler = this.errorHandler;
  426. if (handler === undefined) {
  427. throw err;
  428. }
  429. else {
  430. handler(err);
  431. }
  432. return this;
  433. }
  434. /**
  435. * Write a XML data to the parser.
  436. *
  437. * @param chunk The XML data to write.
  438. *
  439. * @returns this
  440. */
  441. // We do need object for the type here. Yes, it often causes problems
  442. // but not in this case.
  443. write(chunk) {
  444. if (this.closed) {
  445. return this.fail("cannot write after close; assign an onready handler.");
  446. }
  447. let end = false;
  448. if (chunk === null) {
  449. // We cannot return immediately because carriedFromPrevious may need
  450. // processing.
  451. end = true;
  452. chunk = "";
  453. }
  454. else if (typeof chunk === "object") {
  455. chunk = chunk.toString();
  456. }
  457. // We checked if performing a pre-decomposition of the string into an array
  458. // of single complete characters (``Array.from(chunk)``) would be faster
  459. // than the current repeated calls to ``charCodeAt``. As of August 2018, it
  460. // isn't. (There may be Node-specific code that would perform faster than
  461. // ``Array.from`` but don't want to be dependent on Node.)
  462. if (this.carriedFromPrevious !== undefined) {
  463. // The previous chunk had char we must carry over.
  464. chunk = `${this.carriedFromPrevious}${chunk}`;
  465. this.carriedFromPrevious = undefined;
  466. }
  467. let limit = chunk.length;
  468. const lastCode = chunk.charCodeAt(limit - 1);
  469. if (!end &&
  470. // A trailing CR or surrogate must be carried over to the next
  471. // chunk.
  472. (lastCode === CR || (lastCode >= 0xD800 && lastCode <= 0xDBFF))) {
  473. // The chunk ends with a character that must be carried over. We cannot
  474. // know how to handle it until we get the next chunk or the end of the
  475. // stream. So save it for later.
  476. this.carriedFromPrevious = chunk[limit - 1];
  477. limit--;
  478. chunk = chunk.slice(0, limit);
  479. }
  480. const { stateTable } = this;
  481. this.chunk = chunk;
  482. this.i = 0;
  483. while (this.i < limit) {
  484. // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument
  485. stateTable[this.state].call(this);
  486. }
  487. this.chunkPosition += limit;
  488. return end ? this.end() : this;
  489. }
  490. /**
  491. * Close the current stream. Perform final well-formedness checks and reset
  492. * the parser tstate.
  493. *
  494. * @returns this
  495. */
  496. close() {
  497. return this.write(null);
  498. }
  499. /**
  500. * Get a single code point out of the current chunk. This updates the current
  501. * position if we do position tracking.
  502. *
  503. * This is the algorithm to use for XML 1.0.
  504. *
  505. * @returns The character read.
  506. */
  507. getCode10() {
  508. const { chunk, i } = this;
  509. this.prevI = i;
  510. // Yes, we do this instead of doing this.i++. Doing it this way, we do not
  511. // read this.i again, which is a bit faster.
  512. this.i = i + 1;
  513. if (i >= chunk.length) {
  514. return EOC;
  515. }
  516. // Using charCodeAt and handling the surrogates ourselves is faster
  517. // than using codePointAt.
  518. const code = chunk.charCodeAt(i);
  519. this.column++;
  520. if (code < 0xD800) {
  521. if (code >= SPACE || code === TAB) {
  522. return code;
  523. }
  524. switch (code) {
  525. case NL:
  526. this.line++;
  527. this.column = 0;
  528. this.positionAtNewLine = this.position;
  529. return NL;
  530. case CR:
  531. // We may get NaN if we read past the end of the chunk, which is fine.
  532. if (chunk.charCodeAt(i + 1) === NL) {
  533. // A \r\n sequence is converted to \n so we have to skip over the
  534. // next character. We already know it has a size of 1 so ++ is fine
  535. // here.
  536. this.i = i + 2;
  537. }
  538. // Otherwise, a \r is just converted to \n, so we don't have to skip
  539. // ahead.
  540. // In either case, \r becomes \n.
  541. this.line++;
  542. this.column = 0;
  543. this.positionAtNewLine = this.position;
  544. return NL_LIKE;
  545. default:
  546. // If we get here, then code < SPACE and it is not NL CR or TAB.
  547. this.fail("disallowed character.");
  548. return code;
  549. }
  550. }
  551. if (code > 0xDBFF) {
  552. // This is a specialized version of isChar10 that takes into account
  553. // that in this context code > 0xDBFF and code <= 0xFFFF. So it does not
  554. // test cases that don't need testing.
  555. if (!(code >= 0xE000 && code <= 0xFFFD)) {
  556. this.fail("disallowed character.");
  557. }
  558. return code;
  559. }
  560. const final = 0x10000 + ((code - 0xD800) * 0x400) +
  561. (chunk.charCodeAt(i + 1) - 0xDC00);
  562. this.i = i + 2;
  563. // This is a specialized version of isChar10 that takes into account that in
  564. // this context necessarily final >= 0x10000.
  565. if (final > 0x10FFFF) {
  566. this.fail("disallowed character.");
  567. }
  568. return final;
  569. }
  570. /**
  571. * Get a single code point out of the current chunk. This updates the current
  572. * position if we do position tracking.
  573. *
  574. * This is the algorithm to use for XML 1.1.
  575. *
  576. * @returns {number} The character read.
  577. */
  578. getCode11() {
  579. const { chunk, i } = this;
  580. this.prevI = i;
  581. // Yes, we do this instead of doing this.i++. Doing it this way, we do not
  582. // read this.i again, which is a bit faster.
  583. this.i = i + 1;
  584. if (i >= chunk.length) {
  585. return EOC;
  586. }
  587. // Using charCodeAt and handling the surrogates ourselves is faster
  588. // than using codePointAt.
  589. const code = chunk.charCodeAt(i);
  590. this.column++;
  591. if (code < 0xD800) {
  592. if ((code > 0x1F && code < 0x7F) || (code > 0x9F && code !== LS) ||
  593. code === TAB) {
  594. return code;
  595. }
  596. switch (code) {
  597. case NL: // 0xA
  598. this.line++;
  599. this.column = 0;
  600. this.positionAtNewLine = this.position;
  601. return NL;
  602. case CR: { // 0xD
  603. // We may get NaN if we read past the end of the chunk, which is
  604. // fine.
  605. const next = chunk.charCodeAt(i + 1);
  606. if (next === NL || next === NEL) {
  607. // A CR NL or CR NEL sequence is converted to NL so we have to skip
  608. // over the next character. We already know it has a size of 1.
  609. this.i = i + 2;
  610. }
  611. // Otherwise, a CR is just converted to NL, no skip.
  612. }
  613. /* yes, fall through */
  614. case NEL: // 0x85
  615. case LS: // Ox2028
  616. this.line++;
  617. this.column = 0;
  618. this.positionAtNewLine = this.position;
  619. return NL_LIKE;
  620. default:
  621. this.fail("disallowed character.");
  622. return code;
  623. }
  624. }
  625. if (code > 0xDBFF) {
  626. // This is a specialized version of isCharAndNotRestricted that takes into
  627. // account that in this context code > 0xDBFF and code <= 0xFFFF. So it
  628. // does not test cases that don't need testing.
  629. if (!(code >= 0xE000 && code <= 0xFFFD)) {
  630. this.fail("disallowed character.");
  631. }
  632. return code;
  633. }
  634. const final = 0x10000 + ((code - 0xD800) * 0x400) +
  635. (chunk.charCodeAt(i + 1) - 0xDC00);
  636. this.i = i + 2;
  637. // This is a specialized version of isCharAndNotRestricted that takes into
  638. // account that in this context necessarily final >= 0x10000.
  639. if (final > 0x10FFFF) {
  640. this.fail("disallowed character.");
  641. }
  642. return final;
  643. }
  644. /**
  645. * Like ``getCode`` but with the return value normalized so that ``NL`` is
  646. * returned for ``NL_LIKE``.
  647. */
  648. getCodeNorm() {
  649. const c = this.getCode();
  650. return c === NL_LIKE ? NL : c;
  651. }
  652. unget() {
  653. this.i = this.prevI;
  654. this.column--;
  655. }
  656. /**
  657. * Capture characters into a buffer until encountering one of a set of
  658. * characters.
  659. *
  660. * @param chars An array of codepoints. Encountering a character in the array
  661. * ends the capture. (``chars`` may safely contain ``NL``.)
  662. *
  663. * @return The character code that made the capture end, or ``EOC`` if we hit
  664. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  665. * instead.
  666. */
  667. captureTo(chars) {
  668. let { i: start } = this;
  669. const { chunk } = this;
  670. // eslint-disable-next-line no-constant-condition
  671. while (true) {
  672. const c = this.getCode();
  673. const isNLLike = c === NL_LIKE;
  674. const final = isNLLike ? NL : c;
  675. if (final === EOC || chars.includes(final)) {
  676. this.text += chunk.slice(start, this.prevI);
  677. return final;
  678. }
  679. if (isNLLike) {
  680. this.text += `${chunk.slice(start, this.prevI)}\n`;
  681. start = this.i;
  682. }
  683. }
  684. }
  685. /**
  686. * Capture characters into a buffer until encountering a character.
  687. *
  688. * @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT
  689. * CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
  690. *
  691. * @return ``true`` if we ran into the character. Otherwise, we ran into the
  692. * end of the current chunk.
  693. */
  694. captureToChar(char) {
  695. let { i: start } = this;
  696. const { chunk } = this;
  697. // eslint-disable-next-line no-constant-condition
  698. while (true) {
  699. let c = this.getCode();
  700. switch (c) {
  701. case NL_LIKE:
  702. this.text += `${chunk.slice(start, this.prevI)}\n`;
  703. start = this.i;
  704. c = NL;
  705. break;
  706. case EOC:
  707. this.text += chunk.slice(start);
  708. return false;
  709. default:
  710. }
  711. if (c === char) {
  712. this.text += chunk.slice(start, this.prevI);
  713. return true;
  714. }
  715. }
  716. }
  717. /**
  718. * Capture characters that satisfy ``isNameChar`` into the ``name`` field of
  719. * this parser.
  720. *
  721. * @return The character code that made the test fail, or ``EOC`` if we hit
  722. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  723. * instead.
  724. */
  725. captureNameChars() {
  726. const { chunk, i: start } = this;
  727. // eslint-disable-next-line no-constant-condition
  728. while (true) {
  729. const c = this.getCode();
  730. if (c === EOC) {
  731. this.name += chunk.slice(start);
  732. return EOC;
  733. }
  734. // NL is not a name char so we don't have to test specifically for it.
  735. if (!isNameChar(c)) {
  736. this.name += chunk.slice(start, this.prevI);
  737. return c === NL_LIKE ? NL : c;
  738. }
  739. }
  740. }
  741. /**
  742. * Skip white spaces.
  743. *
  744. * @return The character that ended the skip, or ``EOC`` if we hit
  745. * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
  746. * instead.
  747. */
  748. skipSpaces() {
  749. // eslint-disable-next-line no-constant-condition
  750. while (true) {
  751. const c = this.getCodeNorm();
  752. if (c === EOC || !isS(c)) {
  753. return c;
  754. }
  755. }
  756. }
  757. setXMLVersion(version) {
  758. this.currentXMLVersion = version;
  759. /* eslint-disable @typescript-eslint/unbound-method */
  760. if (version === "1.0") {
  761. this.isChar = isChar10;
  762. this.getCode = this.getCode10;
  763. }
  764. else {
  765. this.isChar = isChar11;
  766. this.getCode = this.getCode11;
  767. }
  768. /* eslint-enable @typescript-eslint/unbound-method */
  769. }
  770. // STATE ENGINE METHODS
  771. // This needs to be a state separate from S_BEGIN_WHITESPACE because we want
  772. // to be sure never to come back to this state later.
  773. sBegin() {
  774. // We are essentially peeking at the first character of the chunk. Since
  775. // S_BEGIN can be in effect only when we start working on the first chunk,
  776. // the index at which we must look is necessarily 0. Note also that the
  777. // following test does not depend on decoding surrogates.
  778. // If the initial character is 0xFEFF, ignore it.
  779. if (this.chunk.charCodeAt(0) === 0xFEFF) {
  780. this.i++;
  781. this.column++;
  782. }
  783. this.state = S_BEGIN_WHITESPACE;
  784. }
  785. sBeginWhitespace() {
  786. // We need to know whether we've encountered spaces or not because as soon
  787. // as we run into a space, an XML declaration is no longer possible. Rather
  788. // than slow down skipSpaces even in places where we don't care whether it
  789. // skipped anything or not, we check whether prevI is equal to the value of
  790. // i from before we skip spaces.
  791. const iBefore = this.i;
  792. const c = this.skipSpaces();
  793. if (this.prevI !== iBefore) {
  794. this.xmlDeclPossible = false;
  795. }
  796. switch (c) {
  797. case LESS:
  798. this.state = S_OPEN_WAKA;
  799. // We could naively call closeText but in this state, it is not normal
  800. // to have text be filled with any data.
  801. if (this.text.length !== 0) {
  802. throw new Error("no-empty text at start");
  803. }
  804. break;
  805. case EOC:
  806. break;
  807. default:
  808. this.unget();
  809. this.state = S_TEXT;
  810. this.xmlDeclPossible = false;
  811. }
  812. }
  813. sDoctype() {
  814. var _a;
  815. const c = this.captureTo(DOCTYPE_TERMINATOR);
  816. switch (c) {
  817. case GREATER: {
  818. (_a = this.doctypeHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  819. this.text = "";
  820. this.state = S_TEXT;
  821. this.doctype = true; // just remember that we saw it.
  822. break;
  823. }
  824. case EOC:
  825. break;
  826. default:
  827. this.text += String.fromCodePoint(c);
  828. if (c === OPEN_BRACKET) {
  829. this.state = S_DTD;
  830. }
  831. else if (isQuote(c)) {
  832. this.state = S_DOCTYPE_QUOTE;
  833. this.q = c;
  834. }
  835. }
  836. }
  837. sDoctypeQuote() {
  838. const q = this.q;
  839. if (this.captureToChar(q)) {
  840. this.text += String.fromCodePoint(q);
  841. this.q = null;
  842. this.state = S_DOCTYPE;
  843. }
  844. }
  845. sDTD() {
  846. const c = this.captureTo(DTD_TERMINATOR);
  847. if (c === EOC) {
  848. return;
  849. }
  850. this.text += String.fromCodePoint(c);
  851. if (c === CLOSE_BRACKET) {
  852. this.state = S_DOCTYPE;
  853. }
  854. else if (c === LESS) {
  855. this.state = S_DTD_OPEN_WAKA;
  856. }
  857. else if (isQuote(c)) {
  858. this.state = S_DTD_QUOTED;
  859. this.q = c;
  860. }
  861. }
  862. sDTDQuoted() {
  863. const q = this.q;
  864. if (this.captureToChar(q)) {
  865. this.text += String.fromCodePoint(q);
  866. this.state = S_DTD;
  867. this.q = null;
  868. }
  869. }
  870. sDTDOpenWaka() {
  871. const c = this.getCodeNorm();
  872. this.text += String.fromCodePoint(c);
  873. switch (c) {
  874. case BANG:
  875. this.state = S_DTD_OPEN_WAKA_BANG;
  876. this.openWakaBang = "";
  877. break;
  878. case QUESTION:
  879. this.state = S_DTD_PI;
  880. break;
  881. default:
  882. this.state = S_DTD;
  883. }
  884. }
  885. sDTDOpenWakaBang() {
  886. const char = String.fromCodePoint(this.getCodeNorm());
  887. const owb = this.openWakaBang += char;
  888. this.text += char;
  889. if (owb !== "-") {
  890. this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
  891. this.openWakaBang = "";
  892. }
  893. }
  894. sDTDComment() {
  895. if (this.captureToChar(MINUS)) {
  896. this.text += "-";
  897. this.state = S_DTD_COMMENT_ENDING;
  898. }
  899. }
  900. sDTDCommentEnding() {
  901. const c = this.getCodeNorm();
  902. this.text += String.fromCodePoint(c);
  903. this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
  904. }
  905. sDTDCommentEnded() {
  906. const c = this.getCodeNorm();
  907. this.text += String.fromCodePoint(c);
  908. if (c === GREATER) {
  909. this.state = S_DTD;
  910. }
  911. else {
  912. this.fail("malformed comment.");
  913. // <!-- blah -- bloo --> will be recorded as
  914. // a comment of " blah -- bloo "
  915. this.state = S_DTD_COMMENT;
  916. }
  917. }
  918. sDTDPI() {
  919. if (this.captureToChar(QUESTION)) {
  920. this.text += "?";
  921. this.state = S_DTD_PI_ENDING;
  922. }
  923. }
  924. sDTDPIEnding() {
  925. const c = this.getCodeNorm();
  926. this.text += String.fromCodePoint(c);
  927. if (c === GREATER) {
  928. this.state = S_DTD;
  929. }
  930. }
  931. sText() {
  932. //
  933. // We did try a version of saxes where the S_TEXT state was split in two
  934. // states: one for text inside the root element, and one for text
  935. // outside. This was avoiding having to test this.tags.length to decide
  936. // what implementation to actually use.
  937. //
  938. // Peformance testing on gigabyte-size files did not show any advantage to
  939. // using the two states solution instead of the current one. Conversely, it
  940. // made the code a bit more complicated elsewhere. For instance, a comment
  941. // can appear before the root element so when a comment ended it was
  942. // necessary to determine whether to return to the S_TEXT state or to the
  943. // new text-outside-root state.
  944. //
  945. if (this.tags.length !== 0) {
  946. this.handleTextInRoot();
  947. }
  948. else {
  949. this.handleTextOutsideRoot();
  950. }
  951. }
  952. sEntity() {
  953. // This is essentially a specialized version of captureToChar(SEMICOLON...)
  954. let { i: start } = this;
  955. const { chunk } = this;
  956. // eslint-disable-next-line no-labels, no-restricted-syntax
  957. loop:
  958. // eslint-disable-next-line no-constant-condition
  959. while (true) {
  960. switch (this.getCode()) {
  961. case NL_LIKE:
  962. this.entity += `${chunk.slice(start, this.prevI)}\n`;
  963. start = this.i;
  964. break;
  965. case SEMICOLON: {
  966. const { entityReturnState } = this;
  967. const entity = this.entity + chunk.slice(start, this.prevI);
  968. this.state = entityReturnState;
  969. let parsed;
  970. if (entity === "") {
  971. this.fail("empty entity name.");
  972. parsed = "&;";
  973. }
  974. else {
  975. parsed = this.parseEntity(entity);
  976. this.entity = "";
  977. }
  978. if (entityReturnState !== S_TEXT || this.textHandler !== undefined) {
  979. this.text += parsed;
  980. }
  981. // eslint-disable-next-line no-labels
  982. break loop;
  983. }
  984. case EOC:
  985. this.entity += chunk.slice(start);
  986. // eslint-disable-next-line no-labels
  987. break loop;
  988. default:
  989. }
  990. }
  991. }
  992. sOpenWaka() {
  993. // Reminder: a state handler is called with at least one character
  994. // available in the current chunk. So the first call to get code inside of
  995. // a state handler cannot return ``EOC``. That's why we don't test
  996. // for it.
  997. const c = this.getCode();
  998. // either a /, ?, !, or text is coming next.
  999. if (isNameStartChar(c)) {
  1000. this.state = S_OPEN_TAG;
  1001. this.unget();
  1002. this.xmlDeclPossible = false;
  1003. }
  1004. else {
  1005. switch (c) {
  1006. case FORWARD_SLASH:
  1007. this.state = S_CLOSE_TAG;
  1008. this.xmlDeclPossible = false;
  1009. break;
  1010. case BANG:
  1011. this.state = S_OPEN_WAKA_BANG;
  1012. this.openWakaBang = "";
  1013. this.xmlDeclPossible = false;
  1014. break;
  1015. case QUESTION:
  1016. this.state = S_PI_FIRST_CHAR;
  1017. break;
  1018. default:
  1019. this.fail("disallowed character in tag name");
  1020. this.state = S_TEXT;
  1021. this.xmlDeclPossible = false;
  1022. }
  1023. }
  1024. }
  1025. sOpenWakaBang() {
  1026. this.openWakaBang += String.fromCodePoint(this.getCodeNorm());
  1027. switch (this.openWakaBang) {
  1028. case "[CDATA[":
  1029. if (!this.sawRoot && !this.reportedTextBeforeRoot) {
  1030. this.fail("text data outside of root node.");
  1031. this.reportedTextBeforeRoot = true;
  1032. }
  1033. if (this.closedRoot && !this.reportedTextAfterRoot) {
  1034. this.fail("text data outside of root node.");
  1035. this.reportedTextAfterRoot = true;
  1036. }
  1037. this.state = S_CDATA;
  1038. this.openWakaBang = "";
  1039. break;
  1040. case "--":
  1041. this.state = S_COMMENT;
  1042. this.openWakaBang = "";
  1043. break;
  1044. case "DOCTYPE":
  1045. this.state = S_DOCTYPE;
  1046. if (this.doctype || this.sawRoot) {
  1047. this.fail("inappropriately located doctype declaration.");
  1048. }
  1049. this.openWakaBang = "";
  1050. break;
  1051. default:
  1052. // 7 happens to be the maximum length of the string that can possibly
  1053. // match one of the cases above.
  1054. if (this.openWakaBang.length >= 7) {
  1055. this.fail("incorrect syntax.");
  1056. }
  1057. }
  1058. }
  1059. sComment() {
  1060. if (this.captureToChar(MINUS)) {
  1061. this.state = S_COMMENT_ENDING;
  1062. }
  1063. }
  1064. sCommentEnding() {
  1065. var _a;
  1066. const c = this.getCodeNorm();
  1067. if (c === MINUS) {
  1068. this.state = S_COMMENT_ENDED;
  1069. (_a = this.commentHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  1070. this.text = "";
  1071. }
  1072. else {
  1073. this.text += `-${String.fromCodePoint(c)}`;
  1074. this.state = S_COMMENT;
  1075. }
  1076. }
  1077. sCommentEnded() {
  1078. const c = this.getCodeNorm();
  1079. if (c !== GREATER) {
  1080. this.fail("malformed comment.");
  1081. // <!-- blah -- bloo --> will be recorded as
  1082. // a comment of " blah -- bloo "
  1083. this.text += `--${String.fromCodePoint(c)}`;
  1084. this.state = S_COMMENT;
  1085. }
  1086. else {
  1087. this.state = S_TEXT;
  1088. }
  1089. }
  1090. sCData() {
  1091. if (this.captureToChar(CLOSE_BRACKET)) {
  1092. this.state = S_CDATA_ENDING;
  1093. }
  1094. }
  1095. sCDataEnding() {
  1096. const c = this.getCodeNorm();
  1097. if (c === CLOSE_BRACKET) {
  1098. this.state = S_CDATA_ENDING_2;
  1099. }
  1100. else {
  1101. this.text += `]${String.fromCodePoint(c)}`;
  1102. this.state = S_CDATA;
  1103. }
  1104. }
  1105. sCDataEnding2() {
  1106. var _a;
  1107. const c = this.getCodeNorm();
  1108. switch (c) {
  1109. case GREATER: {
  1110. (_a = this.cdataHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
  1111. this.text = "";
  1112. this.state = S_TEXT;
  1113. break;
  1114. }
  1115. case CLOSE_BRACKET:
  1116. this.text += "]";
  1117. break;
  1118. default:
  1119. this.text += `]]${String.fromCodePoint(c)}`;
  1120. this.state = S_CDATA;
  1121. }
  1122. }
  1123. // We need this separate state to check the first character fo the pi target
  1124. // with this.nameStartCheck which allows less characters than this.nameCheck.
  1125. sPIFirstChar() {
  1126. const c = this.getCodeNorm();
  1127. // This is first because in the case where the file is well-formed this is
  1128. // the branch taken. We optimize for well-formedness.
  1129. if (this.nameStartCheck(c)) {
  1130. this.piTarget += String.fromCodePoint(c);
  1131. this.state = S_PI_REST;
  1132. }
  1133. else if (c === QUESTION || isS(c)) {
  1134. this.fail("processing instruction without a target.");
  1135. this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
  1136. }
  1137. else {
  1138. this.fail("disallowed character in processing instruction name.");
  1139. this.piTarget += String.fromCodePoint(c);
  1140. this.state = S_PI_REST;
  1141. }
  1142. }
  1143. sPIRest() {
  1144. // Capture characters into a piTarget while ``this.nameCheck`` run on the
  1145. // character read returns true.
  1146. const { chunk, i: start } = this;
  1147. // eslint-disable-next-line no-constant-condition
  1148. while (true) {
  1149. const c = this.getCodeNorm();
  1150. if (c === EOC) {
  1151. this.piTarget += chunk.slice(start);
  1152. return;
  1153. }
  1154. // NL cannot satisfy this.nameCheck so we don't have to test specifically
  1155. // for it.
  1156. if (!this.nameCheck(c)) {
  1157. this.piTarget += chunk.slice(start, this.prevI);
  1158. const isQuestion = c === QUESTION;
  1159. if (isQuestion || isS(c)) {
  1160. if (this.piTarget === "xml") {
  1161. if (!this.xmlDeclPossible) {
  1162. this.fail("an XML declaration must be at the start of the document.");
  1163. }
  1164. this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START;
  1165. }
  1166. else {
  1167. this.state = isQuestion ? S_PI_ENDING : S_PI_BODY;
  1168. }
  1169. }
  1170. else {
  1171. this.fail("disallowed character in processing instruction name.");
  1172. this.piTarget += String.fromCodePoint(c);
  1173. }
  1174. break;
  1175. }
  1176. }
  1177. }
  1178. sPIBody() {
  1179. if (this.text.length === 0) {
  1180. const c = this.getCodeNorm();
  1181. if (c === QUESTION) {
  1182. this.state = S_PI_ENDING;
  1183. }
  1184. else if (!isS(c)) {
  1185. this.text = String.fromCodePoint(c);
  1186. }
  1187. }
  1188. // The question mark character is not valid inside any of the XML
  1189. // declaration name/value pairs.
  1190. else if (this.captureToChar(QUESTION)) {
  1191. this.state = S_PI_ENDING;
  1192. }
  1193. }
  1194. sPIEnding() {
  1195. var _a;
  1196. const c = this.getCodeNorm();
  1197. if (c === GREATER) {
  1198. const { piTarget } = this;
  1199. if (piTarget.toLowerCase() === "xml") {
  1200. this.fail("the XML declaration must appear at the start of the document.");
  1201. }
  1202. (_a = this.piHandler) === null || _a === void 0 ? void 0 : _a.call(this, {
  1203. target: piTarget,
  1204. body: this.text,
  1205. });
  1206. this.piTarget = this.text = "";
  1207. this.state = S_TEXT;
  1208. }
  1209. else if (c === QUESTION) {
  1210. // We ran into ?? as part of a processing instruction. We initially took
  1211. // the first ? as a sign that the PI was ending, but it is not. So we have
  1212. // to add it to the body but we take the new ? as a sign that the PI is
  1213. // ending.
  1214. this.text += "?";
  1215. }
  1216. else {
  1217. this.text += `?${String.fromCodePoint(c)}`;
  1218. this.state = S_PI_BODY;
  1219. }
  1220. this.xmlDeclPossible = false;
  1221. }
  1222. sXMLDeclNameStart() {
  1223. const c = this.skipSpaces();
  1224. // The question mark character is not valid inside any of the XML
  1225. // declaration name/value pairs.
  1226. if (c === QUESTION) {
  1227. // It is valid to go to S_XML_DECL_ENDING from this state.
  1228. this.state = S_XML_DECL_ENDING;
  1229. return;
  1230. }
  1231. if (c !== EOC) {
  1232. this.state = S_XML_DECL_NAME;
  1233. this.name = String.fromCodePoint(c);
  1234. }
  1235. }
  1236. sXMLDeclName() {
  1237. const c = this.captureTo(XML_DECL_NAME_TERMINATOR);
  1238. // The question mark character is not valid inside any of the XML
  1239. // declaration name/value pairs.
  1240. if (c === QUESTION) {
  1241. this.state = S_XML_DECL_ENDING;
  1242. this.name += this.text;
  1243. this.text = "";
  1244. this.fail("XML declaration is incomplete.");
  1245. return;
  1246. }
  1247. if (!(isS(c) || c === EQUAL)) {
  1248. return;
  1249. }
  1250. this.name += this.text;
  1251. this.text = "";
  1252. if (!this.xmlDeclExpects.includes(this.name)) {
  1253. switch (this.name.length) {
  1254. case 0:
  1255. this.fail("did not expect any more name/value pairs.");
  1256. break;
  1257. case 1:
  1258. this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
  1259. break;
  1260. default:
  1261. this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
  1262. }
  1263. }
  1264. this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ;
  1265. }
  1266. sXMLDeclEq() {
  1267. const c = this.getCodeNorm();
  1268. // The question mark character is not valid inside any of the XML
  1269. // declaration name/value pairs.
  1270. if (c === QUESTION) {
  1271. this.state = S_XML_DECL_ENDING;
  1272. this.fail("XML declaration is incomplete.");
  1273. return;
  1274. }
  1275. if (isS(c)) {
  1276. return;
  1277. }
  1278. if (c !== EQUAL) {
  1279. this.fail("value required.");
  1280. }
  1281. this.state = S_XML_DECL_VALUE_START;
  1282. }
  1283. sXMLDeclValueStart() {
  1284. const c = this.getCodeNorm();
  1285. // The question mark character is not valid inside any of the XML
  1286. // declaration name/value pairs.
  1287. if (c === QUESTION) {
  1288. this.state = S_XML_DECL_ENDING;
  1289. this.fail("XML declaration is incomplete.");
  1290. return;
  1291. }
  1292. if (isS(c)) {
  1293. return;
  1294. }
  1295. if (!isQuote(c)) {
  1296. this.fail("value must be quoted.");
  1297. this.q = SPACE;
  1298. }
  1299. else {
  1300. this.q = c;
  1301. }
  1302. this.state = S_XML_DECL_VALUE;
  1303. }
  1304. sXMLDeclValue() {
  1305. const c = this.captureTo([this.q, QUESTION]);
  1306. // The question mark character is not valid inside any of the XML
  1307. // declaration name/value pairs.
  1308. if (c === QUESTION) {
  1309. this.state = S_XML_DECL_ENDING;
  1310. this.text = "";
  1311. this.fail("XML declaration is incomplete.");
  1312. return;
  1313. }
  1314. if (c === EOC) {
  1315. return;
  1316. }
  1317. const value = this.text;
  1318. this.text = "";
  1319. switch (this.name) {
  1320. case "version": {
  1321. this.xmlDeclExpects = ["encoding", "standalone"];
  1322. const version = value;
  1323. this.xmlDecl.version = version;
  1324. // This is the test specified by XML 1.0 but it is fine for XML 1.1.
  1325. if (!/^1\.[0-9]+$/.test(version)) {
  1326. this.fail("version number must match /^1\\.[0-9]+$/.");
  1327. }
  1328. // When forceXMLVersion is set, the XML declaration is ignored.
  1329. else if (!this.opt.forceXMLVersion) {
  1330. this.setXMLVersion(version);
  1331. }
  1332. break;
  1333. }
  1334. case "encoding":
  1335. if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) {
  1336. this.fail("encoding value must match \
  1337. /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
  1338. }
  1339. this.xmlDeclExpects = ["standalone"];
  1340. this.xmlDecl.encoding = value;
  1341. break;
  1342. case "standalone":
  1343. if (value !== "yes" && value !== "no") {
  1344. this.fail("standalone value must match \"yes\" or \"no\".");
  1345. }
  1346. this.xmlDeclExpects = [];
  1347. this.xmlDecl.standalone = value;
  1348. break;
  1349. default:
  1350. // We don't need to raise an error here since we've already raised one
  1351. // when checking what name was expected.
  1352. }
  1353. this.name = "";
  1354. this.state = S_XML_DECL_SEPARATOR;
  1355. }
  1356. sXMLDeclSeparator() {
  1357. const c = this.getCodeNorm();
  1358. // The question mark character is not valid inside any of the XML
  1359. // declaration name/value pairs.
  1360. if (c === QUESTION) {
  1361. // It is valid to go to S_XML_DECL_ENDING from this state.
  1362. this.state = S_XML_DECL_ENDING;
  1363. return;
  1364. }
  1365. if (!isS(c)) {
  1366. this.fail("whitespace required.");
  1367. this.unget();
  1368. }
  1369. this.state = S_XML_DECL_NAME_START;
  1370. }
  1371. sXMLDeclEnding() {
  1372. var _a;
  1373. const c = this.getCodeNorm();
  1374. if (c === GREATER) {
  1375. if (this.piTarget !== "xml") {
  1376. this.fail("processing instructions are not allowed before root.");
  1377. }
  1378. else if (this.name !== "version" &&
  1379. this.xmlDeclExpects.includes("version")) {
  1380. this.fail("XML declaration must contain a version.");
  1381. }
  1382. (_a = this.xmldeclHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.xmlDecl);
  1383. this.name = "";
  1384. this.piTarget = this.text = "";
  1385. this.state = S_TEXT;
  1386. }
  1387. else {
  1388. // We got here because the previous character was a ?, but the question
  1389. // mark character is not valid inside any of the XML declaration
  1390. // name/value pairs.
  1391. this.fail("The character ? is disallowed anywhere in XML declarations.");
  1392. }
  1393. this.xmlDeclPossible = false;
  1394. }
  1395. sOpenTag() {
  1396. var _a;
  1397. const c = this.captureNameChars();
  1398. if (c === EOC) {
  1399. return;
  1400. }
  1401. const tag = this.tag = {
  1402. name: this.name,
  1403. attributes: Object.create(null),
  1404. };
  1405. this.name = "";
  1406. if (this.xmlnsOpt) {
  1407. this.topNS = tag.ns = Object.create(null);
  1408. }
  1409. (_a = this.openTagStartHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1410. this.sawRoot = true;
  1411. if (!this.fragmentOpt && this.closedRoot) {
  1412. this.fail("documents may contain only one root.");
  1413. }
  1414. switch (c) {
  1415. case GREATER:
  1416. this.openTag();
  1417. break;
  1418. case FORWARD_SLASH:
  1419. this.state = S_OPEN_TAG_SLASH;
  1420. break;
  1421. default:
  1422. if (!isS(c)) {
  1423. this.fail("disallowed character in tag name.");
  1424. }
  1425. this.state = S_ATTRIB;
  1426. }
  1427. }
  1428. sOpenTagSlash() {
  1429. if (this.getCode() === GREATER) {
  1430. this.openSelfClosingTag();
  1431. }
  1432. else {
  1433. this.fail("forward-slash in opening tag not followed by >.");
  1434. this.state = S_ATTRIB;
  1435. }
  1436. }
  1437. sAttrib() {
  1438. const c = this.skipSpaces();
  1439. if (c === EOC) {
  1440. return;
  1441. }
  1442. if (isNameStartChar(c)) {
  1443. this.unget();
  1444. this.state = S_ATTRIB_NAME;
  1445. }
  1446. else if (c === GREATER) {
  1447. this.openTag();
  1448. }
  1449. else if (c === FORWARD_SLASH) {
  1450. this.state = S_OPEN_TAG_SLASH;
  1451. }
  1452. else {
  1453. this.fail("disallowed character in attribute name.");
  1454. }
  1455. }
  1456. sAttribName() {
  1457. const c = this.captureNameChars();
  1458. if (c === EQUAL) {
  1459. this.state = S_ATTRIB_VALUE;
  1460. }
  1461. else if (isS(c)) {
  1462. this.state = S_ATTRIB_NAME_SAW_WHITE;
  1463. }
  1464. else if (c === GREATER) {
  1465. this.fail("attribute without value.");
  1466. this.pushAttrib(this.name, this.name);
  1467. this.name = this.text = "";
  1468. this.openTag();
  1469. }
  1470. else if (c !== EOC) {
  1471. this.fail("disallowed character in attribute name.");
  1472. }
  1473. }
  1474. sAttribNameSawWhite() {
  1475. const c = this.skipSpaces();
  1476. switch (c) {
  1477. case EOC:
  1478. return;
  1479. case EQUAL:
  1480. this.state = S_ATTRIB_VALUE;
  1481. break;
  1482. default:
  1483. this.fail("attribute without value.");
  1484. // Should we do this???
  1485. // this.tag.attributes[this.name] = "";
  1486. this.text = "";
  1487. this.name = "";
  1488. if (c === GREATER) {
  1489. this.openTag();
  1490. }
  1491. else if (isNameStartChar(c)) {
  1492. this.unget();
  1493. this.state = S_ATTRIB_NAME;
  1494. }
  1495. else {
  1496. this.fail("disallowed character in attribute name.");
  1497. this.state = S_ATTRIB;
  1498. }
  1499. }
  1500. }
  1501. sAttribValue() {
  1502. const c = this.getCodeNorm();
  1503. if (isQuote(c)) {
  1504. this.q = c;
  1505. this.state = S_ATTRIB_VALUE_QUOTED;
  1506. }
  1507. else if (!isS(c)) {
  1508. this.fail("unquoted attribute value.");
  1509. this.state = S_ATTRIB_VALUE_UNQUOTED;
  1510. this.unget();
  1511. }
  1512. }
  1513. sAttribValueQuoted() {
  1514. // We deliberately do not use captureTo here. The specialized code we use
  1515. // here is faster than using captureTo.
  1516. const { q, chunk } = this;
  1517. let { i: start } = this;
  1518. // eslint-disable-next-line no-constant-condition
  1519. while (true) {
  1520. switch (this.getCode()) {
  1521. case q:
  1522. this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI));
  1523. this.name = this.text = "";
  1524. this.q = null;
  1525. this.state = S_ATTRIB_VALUE_CLOSED;
  1526. return;
  1527. case AMP:
  1528. this.text += chunk.slice(start, this.prevI);
  1529. this.state = S_ENTITY;
  1530. this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
  1531. return;
  1532. case NL:
  1533. case NL_LIKE:
  1534. case TAB:
  1535. this.text += `${chunk.slice(start, this.prevI)} `;
  1536. start = this.i;
  1537. break;
  1538. case LESS:
  1539. this.text += chunk.slice(start, this.prevI);
  1540. this.fail("disallowed character.");
  1541. return;
  1542. case EOC:
  1543. this.text += chunk.slice(start);
  1544. return;
  1545. default:
  1546. }
  1547. }
  1548. }
  1549. sAttribValueClosed() {
  1550. const c = this.getCodeNorm();
  1551. if (isS(c)) {
  1552. this.state = S_ATTRIB;
  1553. }
  1554. else if (c === GREATER) {
  1555. this.openTag();
  1556. }
  1557. else if (c === FORWARD_SLASH) {
  1558. this.state = S_OPEN_TAG_SLASH;
  1559. }
  1560. else if (isNameStartChar(c)) {
  1561. this.fail("no whitespace between attributes.");
  1562. this.unget();
  1563. this.state = S_ATTRIB_NAME;
  1564. }
  1565. else {
  1566. this.fail("disallowed character in attribute name.");
  1567. }
  1568. }
  1569. sAttribValueUnquoted() {
  1570. // We don't do anything regarding EOL or space handling for unquoted
  1571. // attributes. We already have failed by the time we get here, and the
  1572. // contract that saxes upholds states that upon failure, it is not safe to
  1573. // rely on the data passed to event handlers (other than
  1574. // ``onerror``). Passing "bad" data is not a problem.
  1575. const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR);
  1576. switch (c) {
  1577. case AMP:
  1578. this.state = S_ENTITY;
  1579. this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
  1580. break;
  1581. case LESS:
  1582. this.fail("disallowed character.");
  1583. break;
  1584. case EOC:
  1585. break;
  1586. default:
  1587. if (this.text.includes("]]>")) {
  1588. this.fail("the string \"]]>\" is disallowed in char data.");
  1589. }
  1590. this.pushAttrib(this.name, this.text);
  1591. this.name = this.text = "";
  1592. if (c === GREATER) {
  1593. this.openTag();
  1594. }
  1595. else {
  1596. this.state = S_ATTRIB;
  1597. }
  1598. }
  1599. }
  1600. sCloseTag() {
  1601. const c = this.captureNameChars();
  1602. if (c === GREATER) {
  1603. this.closeTag();
  1604. }
  1605. else if (isS(c)) {
  1606. this.state = S_CLOSE_TAG_SAW_WHITE;
  1607. }
  1608. else if (c !== EOC) {
  1609. this.fail("disallowed character in closing tag.");
  1610. }
  1611. }
  1612. sCloseTagSawWhite() {
  1613. switch (this.skipSpaces()) {
  1614. case GREATER:
  1615. this.closeTag();
  1616. break;
  1617. case EOC:
  1618. break;
  1619. default:
  1620. this.fail("disallowed character in closing tag.");
  1621. }
  1622. }
  1623. // END OF STATE ENGINE METHODS
  1624. handleTextInRoot() {
  1625. // This is essentially a specialized version of captureTo which is optimized
  1626. // for performing the ]]> check. A previous version of this code, checked
  1627. // ``this.text`` for the presence of ]]>. It simplified the code but was
  1628. // very costly when character data contained a lot of entities to be parsed.
  1629. //
  1630. // Since we are using a specialized loop, we also keep track of the presence
  1631. // of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
  1632. //
  1633. let { i: start, forbiddenState } = this;
  1634. const { chunk, textHandler: handler } = this;
  1635. // eslint-disable-next-line no-labels, no-restricted-syntax
  1636. scanLoop:
  1637. // eslint-disable-next-line no-constant-condition
  1638. while (true) {
  1639. switch (this.getCode()) {
  1640. case LESS: {
  1641. this.state = S_OPEN_WAKA;
  1642. if (handler !== undefined) {
  1643. const { text } = this;
  1644. const slice = chunk.slice(start, this.prevI);
  1645. if (text.length !== 0) {
  1646. handler(text + slice);
  1647. this.text = "";
  1648. }
  1649. else if (slice.length !== 0) {
  1650. handler(slice);
  1651. }
  1652. }
  1653. forbiddenState = FORBIDDEN_START;
  1654. // eslint-disable-next-line no-labels
  1655. break scanLoop;
  1656. }
  1657. case AMP:
  1658. this.state = S_ENTITY;
  1659. this.entityReturnState = S_TEXT;
  1660. if (handler !== undefined) {
  1661. this.text += chunk.slice(start, this.prevI);
  1662. }
  1663. forbiddenState = FORBIDDEN_START;
  1664. // eslint-disable-next-line no-labels
  1665. break scanLoop;
  1666. case CLOSE_BRACKET:
  1667. switch (forbiddenState) {
  1668. case FORBIDDEN_START:
  1669. forbiddenState = FORBIDDEN_BRACKET;
  1670. break;
  1671. case FORBIDDEN_BRACKET:
  1672. forbiddenState = FORBIDDEN_BRACKET_BRACKET;
  1673. break;
  1674. case FORBIDDEN_BRACKET_BRACKET:
  1675. break;
  1676. default:
  1677. throw new Error("impossible state");
  1678. }
  1679. break;
  1680. case GREATER:
  1681. if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
  1682. this.fail("the string \"]]>\" is disallowed in char data.");
  1683. }
  1684. forbiddenState = FORBIDDEN_START;
  1685. break;
  1686. case NL_LIKE:
  1687. if (handler !== undefined) {
  1688. this.text += `${chunk.slice(start, this.prevI)}\n`;
  1689. }
  1690. start = this.i;
  1691. forbiddenState = FORBIDDEN_START;
  1692. break;
  1693. case EOC:
  1694. if (handler !== undefined) {
  1695. this.text += chunk.slice(start);
  1696. }
  1697. // eslint-disable-next-line no-labels
  1698. break scanLoop;
  1699. default:
  1700. forbiddenState = FORBIDDEN_START;
  1701. }
  1702. }
  1703. this.forbiddenState = forbiddenState;
  1704. }
  1705. handleTextOutsideRoot() {
  1706. // This is essentially a specialized version of captureTo which is optimized
  1707. // for a specialized task. We keep track of the presence of non-space
  1708. // characters in the text since these are errors when appearing outside the
  1709. // document root element.
  1710. let { i: start } = this;
  1711. const { chunk, textHandler: handler } = this;
  1712. let nonSpace = false;
  1713. // eslint-disable-next-line no-labels, no-restricted-syntax
  1714. outRootLoop:
  1715. // eslint-disable-next-line no-constant-condition
  1716. while (true) {
  1717. const code = this.getCode();
  1718. switch (code) {
  1719. case LESS: {
  1720. this.state = S_OPEN_WAKA;
  1721. if (handler !== undefined) {
  1722. const { text } = this;
  1723. const slice = chunk.slice(start, this.prevI);
  1724. if (text.length !== 0) {
  1725. handler(text + slice);
  1726. this.text = "";
  1727. }
  1728. else if (slice.length !== 0) {
  1729. handler(slice);
  1730. }
  1731. }
  1732. // eslint-disable-next-line no-labels
  1733. break outRootLoop;
  1734. }
  1735. case AMP:
  1736. this.state = S_ENTITY;
  1737. this.entityReturnState = S_TEXT;
  1738. if (handler !== undefined) {
  1739. this.text += chunk.slice(start, this.prevI);
  1740. }
  1741. nonSpace = true;
  1742. // eslint-disable-next-line no-labels
  1743. break outRootLoop;
  1744. case NL_LIKE:
  1745. if (handler !== undefined) {
  1746. this.text += `${chunk.slice(start, this.prevI)}\n`;
  1747. }
  1748. start = this.i;
  1749. break;
  1750. case EOC:
  1751. if (handler !== undefined) {
  1752. this.text += chunk.slice(start);
  1753. }
  1754. // eslint-disable-next-line no-labels
  1755. break outRootLoop;
  1756. default:
  1757. if (!isS(code)) {
  1758. nonSpace = true;
  1759. }
  1760. }
  1761. }
  1762. if (!nonSpace) {
  1763. return;
  1764. }
  1765. // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
  1766. // to avoid reporting errors for every single character that is out of
  1767. // place.
  1768. if (!this.sawRoot && !this.reportedTextBeforeRoot) {
  1769. this.fail("text data outside of root node.");
  1770. this.reportedTextBeforeRoot = true;
  1771. }
  1772. if (this.closedRoot && !this.reportedTextAfterRoot) {
  1773. this.fail("text data outside of root node.");
  1774. this.reportedTextAfterRoot = true;
  1775. }
  1776. }
  1777. pushAttribNS(name, value) {
  1778. var _a;
  1779. const { prefix, local } = this.qname(name);
  1780. const attr = { name, prefix, local, value };
  1781. this.attribList.push(attr);
  1782. (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
  1783. if (prefix === "xmlns") {
  1784. const trimmed = value.trim();
  1785. if (this.currentXMLVersion === "1.0" && trimmed === "") {
  1786. this.fail("invalid attempt to undefine prefix in XML 1.0");
  1787. }
  1788. this.topNS[local] = trimmed;
  1789. nsPairCheck(this, local, trimmed);
  1790. }
  1791. else if (name === "xmlns") {
  1792. const trimmed = value.trim();
  1793. this.topNS[""] = trimmed;
  1794. nsPairCheck(this, "", trimmed);
  1795. }
  1796. }
  1797. pushAttribPlain(name, value) {
  1798. var _a;
  1799. const attr = { name, value };
  1800. this.attribList.push(attr);
  1801. (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
  1802. }
  1803. /**
  1804. * End parsing. This performs final well-formedness checks and resets the
  1805. * parser to a clean state.
  1806. *
  1807. * @returns this
  1808. */
  1809. end() {
  1810. var _a, _b;
  1811. if (!this.sawRoot) {
  1812. this.fail("document must contain a root element.");
  1813. }
  1814. const { tags } = this;
  1815. while (tags.length > 0) {
  1816. const tag = tags.pop();
  1817. this.fail(`unclosed tag: ${tag.name}`);
  1818. }
  1819. if ((this.state !== S_BEGIN) && (this.state !== S_TEXT)) {
  1820. this.fail("unexpected end.");
  1821. }
  1822. const { text } = this;
  1823. if (text.length !== 0) {
  1824. (_a = this.textHandler) === null || _a === void 0 ? void 0 : _a.call(this, text);
  1825. this.text = "";
  1826. }
  1827. this._closed = true;
  1828. (_b = this.endHandler) === null || _b === void 0 ? void 0 : _b.call(this);
  1829. this._init();
  1830. return this;
  1831. }
  1832. /**
  1833. * Resolve a namespace prefix.
  1834. *
  1835. * @param prefix The prefix to resolve.
  1836. *
  1837. * @returns The namespace URI or ``undefined`` if the prefix is not defined.
  1838. */
  1839. resolve(prefix) {
  1840. var _a, _b;
  1841. let uri = this.topNS[prefix];
  1842. if (uri !== undefined) {
  1843. return uri;
  1844. }
  1845. const { tags } = this;
  1846. for (let index = tags.length - 1; index >= 0; index--) {
  1847. uri = tags[index].ns[prefix];
  1848. if (uri !== undefined) {
  1849. return uri;
  1850. }
  1851. }
  1852. uri = this.ns[prefix];
  1853. if (uri !== undefined) {
  1854. return uri;
  1855. }
  1856. return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix);
  1857. }
  1858. /**
  1859. * Parse a qname into its prefix and local name parts.
  1860. *
  1861. * @param name The name to parse
  1862. *
  1863. * @returns
  1864. */
  1865. qname(name) {
  1866. // This is faster than using name.split(":").
  1867. const colon = name.indexOf(":");
  1868. if (colon === -1) {
  1869. return { prefix: "", local: name };
  1870. }
  1871. const local = name.slice(colon + 1);
  1872. const prefix = name.slice(0, colon);
  1873. if (prefix === "" || local === "" || local.includes(":")) {
  1874. this.fail(`malformed name: ${name}.`);
  1875. }
  1876. return { prefix, local };
  1877. }
  1878. processAttribsNS() {
  1879. var _a;
  1880. const { attribList } = this;
  1881. const tag = this.tag;
  1882. {
  1883. // add namespace info to tag
  1884. const { prefix, local } = this.qname(tag.name);
  1885. tag.prefix = prefix;
  1886. tag.local = local;
  1887. const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : "";
  1888. if (prefix !== "") {
  1889. if (prefix === "xmlns") {
  1890. this.fail("tags may not have \"xmlns\" as prefix.");
  1891. }
  1892. if (uri === "") {
  1893. this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
  1894. tag.uri = prefix;
  1895. }
  1896. }
  1897. }
  1898. if (attribList.length === 0) {
  1899. return;
  1900. }
  1901. const { attributes } = tag;
  1902. const seen = new Set();
  1903. // Note: do not apply default ns to attributes:
  1904. // http://www.w3.org/TR/REC-xml-names/#defaulting
  1905. for (const attr of attribList) {
  1906. const { name, prefix, local } = attr;
  1907. let uri;
  1908. let eqname;
  1909. if (prefix === "") {
  1910. uri = name === "xmlns" ? XMLNS_NAMESPACE : "";
  1911. eqname = name;
  1912. }
  1913. else {
  1914. uri = this.resolve(prefix);
  1915. // if there's any attributes with an undefined namespace,
  1916. // then fail on them now.
  1917. if (uri === undefined) {
  1918. this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
  1919. uri = prefix;
  1920. }
  1921. eqname = `{${uri}}${local}`;
  1922. }
  1923. if (seen.has(eqname)) {
  1924. this.fail(`duplicate attribute: ${eqname}.`);
  1925. }
  1926. seen.add(eqname);
  1927. attr.uri = uri;
  1928. attributes[name] = attr;
  1929. }
  1930. this.attribList = [];
  1931. }
  1932. processAttribsPlain() {
  1933. const { attribList } = this;
  1934. // eslint-disable-next-line prefer-destructuring
  1935. const attributes = this.tag.attributes;
  1936. for (const { name, value } of attribList) {
  1937. if (attributes[name] !== undefined) {
  1938. this.fail(`duplicate attribute: ${name}.`);
  1939. }
  1940. attributes[name] = value;
  1941. }
  1942. this.attribList = [];
  1943. }
  1944. /**
  1945. * Handle a complete open tag. This parser code calls this once it has seen
  1946. * the whole tag. This method checks for well-formeness and then emits
  1947. * ``onopentag``.
  1948. */
  1949. openTag() {
  1950. var _a;
  1951. this.processAttribs();
  1952. const { tags } = this;
  1953. const tag = this.tag;
  1954. tag.isSelfClosing = false;
  1955. // There cannot be any pending text here due to the onopentagstart that was
  1956. // necessarily emitted before we get here. So we do not check text.
  1957. (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1958. tags.push(tag);
  1959. this.state = S_TEXT;
  1960. this.name = "";
  1961. }
  1962. /**
  1963. * Handle a complete self-closing tag. This parser code calls this once it has
  1964. * seen the whole tag. This method checks for well-formeness and then emits
  1965. * ``onopentag`` and ``onclosetag``.
  1966. */
  1967. openSelfClosingTag() {
  1968. var _a, _b, _c;
  1969. this.processAttribs();
  1970. const { tags } = this;
  1971. const tag = this.tag;
  1972. tag.isSelfClosing = true;
  1973. // There cannot be any pending text here due to the onopentagstart that was
  1974. // necessarily emitted before we get here. So we do not check text.
  1975. (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
  1976. (_b = this.closeTagHandler) === null || _b === void 0 ? void 0 : _b.call(this, tag);
  1977. const top = this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null;
  1978. if (top === null) {
  1979. this.closedRoot = true;
  1980. }
  1981. this.state = S_TEXT;
  1982. this.name = "";
  1983. }
  1984. /**
  1985. * Handle a complete close tag. This parser code calls this once it has seen
  1986. * the whole tag. This method checks for well-formeness and then emits
  1987. * ``onclosetag``.
  1988. */
  1989. closeTag() {
  1990. const { tags, name } = this;
  1991. // Our state after this will be S_TEXT, no matter what, and we can clear
  1992. // tagName now.
  1993. this.state = S_TEXT;
  1994. this.name = "";
  1995. if (name === "") {
  1996. this.fail("weird empty close tag.");
  1997. this.text += "</>";
  1998. return;
  1999. }
  2000. const handler = this.closeTagHandler;
  2001. let l = tags.length;
  2002. while (l-- > 0) {
  2003. const tag = this.tag = tags.pop();
  2004. this.topNS = tag.ns;
  2005. handler === null || handler === void 0 ? void 0 : handler(tag);
  2006. if (tag.name === name) {
  2007. break;
  2008. }
  2009. this.fail("unexpected close tag.");
  2010. }
  2011. if (l === 0) {
  2012. this.closedRoot = true;
  2013. }
  2014. else if (l < 0) {
  2015. this.fail(`unmatched closing tag: ${name}.`);
  2016. this.text += `</${name}>`;
  2017. }
  2018. }
  2019. /**
  2020. * Resolves an entity. Makes any necessary well-formedness checks.
  2021. *
  2022. * @param entity The entity to resolve.
  2023. *
  2024. * @returns The parsed entity.
  2025. */
  2026. parseEntity(entity) {
  2027. // startsWith would be significantly slower for this test.
  2028. if (entity[0] !== "#") {
  2029. const defined = this.ENTITIES[entity];
  2030. if (defined !== undefined) {
  2031. return defined;
  2032. }
  2033. this.fail(this.isName(entity) ? "undefined entity." :
  2034. "disallowed character in entity name.");
  2035. return `&${entity};`;
  2036. }
  2037. let num = NaN;
  2038. if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
  2039. num = parseInt(entity.slice(2), 16);
  2040. }
  2041. else if (/^#[0-9]+$/.test(entity)) {
  2042. num = parseInt(entity.slice(1), 10);
  2043. }
  2044. // The character reference is required to match the CHAR production.
  2045. if (!this.isChar(num)) {
  2046. this.fail("malformed character entity.");
  2047. return `&${entity};`;
  2048. }
  2049. return String.fromCodePoint(num);
  2050. }
  2051. }
  2052. exports.SaxesParser = SaxesParser;
  2053. //# sourceMappingURL=saxes.js.map