serialize.js 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. "use strict";
  2. const xnv = require("xml-name-validator");
  3. const attributeUtils = require("./attributes");
  4. const { NAMESPACES, VOID_ELEMENTS, NODE_TYPES } = require("./constants");
  5. const XML_CHAR = /^(\x09|\x0A|\x0D|[\x20-\uD7FF]|[\uE000-\uFFFD]|[\u{10000}-\u{10FFFF}])*$/u;
  6. const PUBID_CHAR = /^(\x20|\x0D|\x0A|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%])*$/u;
  7. function asciiCaseInsensitiveMatch(a, b) {
  8. if (a.length !== b.length) {
  9. return false;
  10. }
  11. for (let i = 0; i < a.length; ++i) {
  12. if ((a.charCodeAt(i) | 32) !== (b.charCodeAt(i) | 32)) {
  13. return false;
  14. }
  15. }
  16. return true;
  17. }
  18. function recordNamespaceInformation(element, map, prefixMap) {
  19. let defaultNamespaceAttrValue = null;
  20. for (let i = 0; i < element.attributes.length; ++i) {
  21. const attr = element.attributes[i];
  22. if (attr.namespaceURI === NAMESPACES.XMLNS) {
  23. if (attr.prefix === null) {
  24. defaultNamespaceAttrValue = attr.value;
  25. continue;
  26. }
  27. let namespaceDefinition = attr.value;
  28. if (namespaceDefinition === NAMESPACES.XML) {
  29. continue;
  30. }
  31. // This is exactly the other way than the spec says, but that's intended.
  32. // All the maps coalesce null to the empty string (explained in the
  33. // spec), so instead of doing that every time, just do it once here.
  34. if (namespaceDefinition === null) {
  35. namespaceDefinition = "";
  36. }
  37. if (
  38. namespaceDefinition in map &&
  39. map[namespaceDefinition].includes(attr.localName)
  40. ) {
  41. continue;
  42. }
  43. if (!(namespaceDefinition in map)) {
  44. map[namespaceDefinition] = [];
  45. }
  46. map[namespaceDefinition].push(attr.localName);
  47. prefixMap[attr.localName] = namespaceDefinition;
  48. }
  49. }
  50. return defaultNamespaceAttrValue;
  51. }
  52. function serializeDocumentType(node, namespace, prefixMap, requireWellFormed) {
  53. if (requireWellFormed && !PUBID_CHAR.test(node.publicId)) {
  54. throw new Error("Failed to serialize XML: document type node publicId is not well-formed.");
  55. }
  56. if (
  57. requireWellFormed &&
  58. (!XML_CHAR.test(node.systemId) ||
  59. (node.systemId.includes('"') && node.systemId.includes("'")))
  60. ) {
  61. throw new Error("Failed to serialize XML: document type node systemId is not well-formed.");
  62. }
  63. let markup = `<!DOCTYPE ${node.name}`;
  64. if (node.publicId !== "") {
  65. markup += ` PUBLIC "${node.publicId}"`;
  66. } else if (node.systemId !== "") {
  67. markup += " SYSTEM";
  68. }
  69. if (node.systemId !== "") {
  70. markup += ` "${node.systemId}"`;
  71. }
  72. return `${markup}>`;
  73. }
  74. function serializeProcessingInstruction(
  75. node,
  76. namespace,
  77. prefixMap,
  78. requireWellFormed
  79. ) {
  80. if (
  81. requireWellFormed &&
  82. (node.target.includes(":") || asciiCaseInsensitiveMatch(node.target, "xml"))
  83. ) {
  84. throw new Error("Failed to serialize XML: processing instruction node target is not well-formed.");
  85. }
  86. if (
  87. requireWellFormed &&
  88. (!XML_CHAR.test(node.data) || node.data.includes("?>"))
  89. ) {
  90. throw new Error("Failed to serialize XML: processing instruction node data is not well-formed.");
  91. }
  92. return `<?${node.target} ${node.data}?>`;
  93. }
  94. function serializeDocument(
  95. node,
  96. namespace,
  97. prefixMap,
  98. requireWellFormed,
  99. refs
  100. ) {
  101. if (requireWellFormed && node.documentElement === null) {
  102. throw new Error("Failed to serialize XML: document does not have a document element.");
  103. }
  104. let serializedDocument = "";
  105. for (const child of node.childNodes) {
  106. serializedDocument += xmlSerialization(
  107. child,
  108. namespace,
  109. prefixMap,
  110. requireWellFormed,
  111. refs
  112. );
  113. }
  114. return serializedDocument;
  115. }
  116. function serializeDocumentFragment(
  117. node,
  118. namespace,
  119. prefixMap,
  120. requireWellFormed,
  121. refs
  122. ) {
  123. let markup = "";
  124. for (const child of node.childNodes) {
  125. markup += xmlSerialization(
  126. child,
  127. namespace,
  128. prefixMap,
  129. requireWellFormed,
  130. refs
  131. );
  132. }
  133. return markup;
  134. }
  135. function serializeText(node, namespace, prefixMap, requireWellFormed) {
  136. if (requireWellFormed && !XML_CHAR.test(node.data)) {
  137. throw new Error("Failed to serialize XML: text node data is not well-formed.");
  138. }
  139. return node.data
  140. .replace(/&/ug, "&amp;")
  141. .replace(/</ug, "&lt;")
  142. .replace(/>/ug, "&gt;");
  143. }
  144. function serializeComment(node, namespace, prefixMap, requireWellFormed) {
  145. if (requireWellFormed && !XML_CHAR.test(node.data)) {
  146. throw new Error("Failed to serialize XML: comment node data is not well-formed.");
  147. }
  148. if (
  149. requireWellFormed &&
  150. (node.data.includes("--") || node.data.endsWith("-"))
  151. ) {
  152. throw new Error("Failed to serialize XML: found hyphens in illegal places in comment node data.");
  153. }
  154. return `<!--${node.data}-->`;
  155. }
  156. function serializeElement(node, namespace, prefixMap, requireWellFormed, refs) {
  157. if (
  158. requireWellFormed &&
  159. (node.localName.includes(":") || !xnv.name(node.localName))
  160. ) {
  161. throw new Error("Failed to serialize XML: element node localName is not a valid XML name.");
  162. }
  163. let markup = "<";
  164. let qualifiedName = "";
  165. let skipEndTag = false;
  166. let ignoreNamespaceDefinitionAttr = false;
  167. const map = { ...prefixMap };
  168. const localPrefixesMap = Object.create(null);
  169. const localDefaultNamespace = recordNamespaceInformation(
  170. node,
  171. map,
  172. localPrefixesMap
  173. );
  174. let inheritedNs = namespace;
  175. const ns = node.namespaceURI;
  176. if (inheritedNs === ns) {
  177. if (localDefaultNamespace !== null) {
  178. ignoreNamespaceDefinitionAttr = true;
  179. }
  180. if (ns === NAMESPACES.XML) {
  181. qualifiedName = `xml:${node.localName}`;
  182. } else {
  183. qualifiedName = node.localName;
  184. }
  185. markup += qualifiedName;
  186. } else {
  187. let { prefix } = node;
  188. let candidatePrefix = attributeUtils.preferredPrefixString(map, ns, prefix);
  189. if (prefix === "xmlns") {
  190. if (requireWellFormed) {
  191. throw new Error("Failed to serialize XML: element nodes can't have a prefix of \"xmlns\".");
  192. }
  193. candidatePrefix = "xmlns";
  194. }
  195. if (candidatePrefix !== null) {
  196. qualifiedName = `${candidatePrefix}:${node.localName}`;
  197. if (
  198. localDefaultNamespace !== null &&
  199. localDefaultNamespace !== NAMESPACES.XML
  200. ) {
  201. inheritedNs =
  202. localDefaultNamespace === "" ? null : localDefaultNamespace;
  203. }
  204. markup += qualifiedName;
  205. } else if (prefix !== null) {
  206. if (prefix in localPrefixesMap) {
  207. prefix = attributeUtils.generatePrefix(map, ns, refs.prefixIndex++);
  208. }
  209. if (map[ns]) {
  210. map[ns].push(prefix);
  211. } else {
  212. map[ns] = [prefix];
  213. }
  214. qualifiedName = `${prefix}:${node.localName}`;
  215. markup += `${qualifiedName} xmlns:${prefix}="${attributeUtils.serializeAttributeValue(ns, requireWellFormed)}"`;
  216. if (localDefaultNamespace !== null) {
  217. inheritedNs =
  218. localDefaultNamespace === "" ? null : localDefaultNamespace;
  219. }
  220. } else if (localDefaultNamespace === null || localDefaultNamespace !== ns) {
  221. ignoreNamespaceDefinitionAttr = true;
  222. qualifiedName = node.localName;
  223. inheritedNs = ns;
  224. markup += `${qualifiedName} xmlns="${attributeUtils.serializeAttributeValue(ns, requireWellFormed)}"`;
  225. } else {
  226. qualifiedName = node.localName;
  227. inheritedNs = ns;
  228. markup += qualifiedName;
  229. }
  230. }
  231. markup += attributeUtils.serializeAttributes(
  232. node,
  233. map,
  234. localPrefixesMap,
  235. ignoreNamespaceDefinitionAttr,
  236. requireWellFormed,
  237. refs
  238. );
  239. if (
  240. ns === NAMESPACES.HTML &&
  241. node.childNodes.length === 0 &&
  242. VOID_ELEMENTS.has(node.localName)
  243. ) {
  244. markup += " /";
  245. skipEndTag = true;
  246. } else if (ns !== NAMESPACES.HTML && node.childNodes.length === 0) {
  247. markup += "/";
  248. skipEndTag = true;
  249. }
  250. markup += ">";
  251. if (skipEndTag) {
  252. return markup;
  253. }
  254. if (ns === NAMESPACES.HTML && node.localName === "template") {
  255. markup += xmlSerialization(
  256. node.content,
  257. inheritedNs,
  258. map,
  259. requireWellFormed,
  260. refs
  261. );
  262. } else {
  263. for (const child of node.childNodes) {
  264. markup += xmlSerialization(
  265. child,
  266. inheritedNs,
  267. map,
  268. requireWellFormed,
  269. refs
  270. );
  271. }
  272. }
  273. markup += `</${qualifiedName}>`;
  274. return markup;
  275. }
  276. function serializeCDATASection(node) {
  277. return `<![CDATA[${node.data}]]>`;
  278. }
  279. /**
  280. * @param {{prefixIndex: number}} refs
  281. */
  282. function xmlSerialization(node, namespace, prefixMap, requireWellFormed, refs) {
  283. switch (node.nodeType) {
  284. case NODE_TYPES.ELEMENT_NODE:
  285. return serializeElement(
  286. node,
  287. namespace,
  288. prefixMap,
  289. requireWellFormed,
  290. refs
  291. );
  292. case NODE_TYPES.DOCUMENT_NODE:
  293. return serializeDocument(
  294. node,
  295. namespace,
  296. prefixMap,
  297. requireWellFormed,
  298. refs
  299. );
  300. case NODE_TYPES.COMMENT_NODE:
  301. return serializeComment(node, namespace, prefixMap, requireWellFormed);
  302. case NODE_TYPES.TEXT_NODE:
  303. return serializeText(node, namespace, prefixMap, requireWellFormed);
  304. case NODE_TYPES.DOCUMENT_FRAGMENT_NODE:
  305. return serializeDocumentFragment(
  306. node,
  307. namespace,
  308. prefixMap,
  309. requireWellFormed,
  310. refs
  311. );
  312. case NODE_TYPES.DOCUMENT_TYPE_NODE:
  313. return serializeDocumentType(
  314. node,
  315. namespace,
  316. prefixMap,
  317. requireWellFormed
  318. );
  319. case NODE_TYPES.PROCESSING_INSTRUCTION_NODE:
  320. return serializeProcessingInstruction(
  321. node,
  322. namespace,
  323. prefixMap,
  324. requireWellFormed
  325. );
  326. case NODE_TYPES.ATTRIBUTE_NODE:
  327. return "";
  328. case NODE_TYPES.CDATA_SECTION_NODE:
  329. return serializeCDATASection(node);
  330. default:
  331. throw new TypeError("Failed to serialize XML: only Nodes can be serialized.");
  332. }
  333. }
  334. module.exports = (root, { requireWellFormed = false } = {}) => {
  335. const namespacePrefixMap = Object.create(null);
  336. namespacePrefixMap["http://www.w3.org/XML/1998/namespace"] = ["xml"];
  337. return xmlSerialization(root, null, namespacePrefixMap, requireWellFormed, {
  338. prefixIndex: 1
  339. });
  340. };