html-text.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').Construct} Construct
  4. * @typedef {import('micromark-util-types').State} State
  5. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  6. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  7. */
  8. import {factorySpace} from 'micromark-factory-space'
  9. import {
  10. asciiAlpha,
  11. asciiAlphanumeric,
  12. markdownLineEnding,
  13. markdownLineEndingOrSpace,
  14. markdownSpace
  15. } from 'micromark-util-character'
  16. /** @type {Construct} */
  17. export const htmlText = {
  18. name: 'htmlText',
  19. tokenize: tokenizeHtmlText
  20. }
  21. /**
  22. * @this {TokenizeContext}
  23. * @type {Tokenizer}
  24. */
  25. function tokenizeHtmlText(effects, ok, nok) {
  26. const self = this
  27. /** @type {NonNullable<Code> | undefined} */
  28. let marker
  29. /** @type {number} */
  30. let index
  31. /** @type {State} */
  32. let returnState
  33. return start
  34. /**
  35. * Start of HTML (text).
  36. *
  37. * ```markdown
  38. * > | a <b> c
  39. * ^
  40. * ```
  41. *
  42. * @type {State}
  43. */
  44. function start(code) {
  45. effects.enter('htmlText')
  46. effects.enter('htmlTextData')
  47. effects.consume(code)
  48. return open
  49. }
  50. /**
  51. * After `<`, at tag name or other stuff.
  52. *
  53. * ```markdown
  54. * > | a <b> c
  55. * ^
  56. * > | a <!doctype> c
  57. * ^
  58. * > | a <!--b--> c
  59. * ^
  60. * ```
  61. *
  62. * @type {State}
  63. */
  64. function open(code) {
  65. if (code === 33) {
  66. effects.consume(code)
  67. return declarationOpen
  68. }
  69. if (code === 47) {
  70. effects.consume(code)
  71. return tagCloseStart
  72. }
  73. if (code === 63) {
  74. effects.consume(code)
  75. return instruction
  76. }
  77. // ASCII alphabetical.
  78. if (asciiAlpha(code)) {
  79. effects.consume(code)
  80. return tagOpen
  81. }
  82. return nok(code)
  83. }
  84. /**
  85. * After `<!`, at declaration, comment, or CDATA.
  86. *
  87. * ```markdown
  88. * > | a <!doctype> c
  89. * ^
  90. * > | a <!--b--> c
  91. * ^
  92. * > | a <![CDATA[>&<]]> c
  93. * ^
  94. * ```
  95. *
  96. * @type {State}
  97. */
  98. function declarationOpen(code) {
  99. if (code === 45) {
  100. effects.consume(code)
  101. return commentOpenInside
  102. }
  103. if (code === 91) {
  104. effects.consume(code)
  105. index = 0
  106. return cdataOpenInside
  107. }
  108. if (asciiAlpha(code)) {
  109. effects.consume(code)
  110. return declaration
  111. }
  112. return nok(code)
  113. }
  114. /**
  115. * In a comment, after `<!-`, at another `-`.
  116. *
  117. * ```markdown
  118. * > | a <!--b--> c
  119. * ^
  120. * ```
  121. *
  122. * @type {State}
  123. */
  124. function commentOpenInside(code) {
  125. if (code === 45) {
  126. effects.consume(code)
  127. return commentEnd
  128. }
  129. return nok(code)
  130. }
  131. /**
  132. * In comment.
  133. *
  134. * ```markdown
  135. * > | a <!--b--> c
  136. * ^
  137. * ```
  138. *
  139. * @type {State}
  140. */
  141. function comment(code) {
  142. if (code === null) {
  143. return nok(code)
  144. }
  145. if (code === 45) {
  146. effects.consume(code)
  147. return commentClose
  148. }
  149. if (markdownLineEnding(code)) {
  150. returnState = comment
  151. return lineEndingBefore(code)
  152. }
  153. effects.consume(code)
  154. return comment
  155. }
  156. /**
  157. * In comment, after `-`.
  158. *
  159. * ```markdown
  160. * > | a <!--b--> c
  161. * ^
  162. * ```
  163. *
  164. * @type {State}
  165. */
  166. function commentClose(code) {
  167. if (code === 45) {
  168. effects.consume(code)
  169. return commentEnd
  170. }
  171. return comment(code)
  172. }
  173. /**
  174. * In comment, after `--`.
  175. *
  176. * ```markdown
  177. * > | a <!--b--> c
  178. * ^
  179. * ```
  180. *
  181. * @type {State}
  182. */
  183. function commentEnd(code) {
  184. return code === 62
  185. ? end(code)
  186. : code === 45
  187. ? commentClose(code)
  188. : comment(code)
  189. }
  190. /**
  191. * After `<![`, in CDATA, expecting `CDATA[`.
  192. *
  193. * ```markdown
  194. * > | a <![CDATA[>&<]]> b
  195. * ^^^^^^
  196. * ```
  197. *
  198. * @type {State}
  199. */
  200. function cdataOpenInside(code) {
  201. const value = 'CDATA['
  202. if (code === value.charCodeAt(index++)) {
  203. effects.consume(code)
  204. return index === value.length ? cdata : cdataOpenInside
  205. }
  206. return nok(code)
  207. }
  208. /**
  209. * In CDATA.
  210. *
  211. * ```markdown
  212. * > | a <![CDATA[>&<]]> b
  213. * ^^^
  214. * ```
  215. *
  216. * @type {State}
  217. */
  218. function cdata(code) {
  219. if (code === null) {
  220. return nok(code)
  221. }
  222. if (code === 93) {
  223. effects.consume(code)
  224. return cdataClose
  225. }
  226. if (markdownLineEnding(code)) {
  227. returnState = cdata
  228. return lineEndingBefore(code)
  229. }
  230. effects.consume(code)
  231. return cdata
  232. }
  233. /**
  234. * In CDATA, after `]`, at another `]`.
  235. *
  236. * ```markdown
  237. * > | a <![CDATA[>&<]]> b
  238. * ^
  239. * ```
  240. *
  241. * @type {State}
  242. */
  243. function cdataClose(code) {
  244. if (code === 93) {
  245. effects.consume(code)
  246. return cdataEnd
  247. }
  248. return cdata(code)
  249. }
  250. /**
  251. * In CDATA, after `]]`, at `>`.
  252. *
  253. * ```markdown
  254. * > | a <![CDATA[>&<]]> b
  255. * ^
  256. * ```
  257. *
  258. * @type {State}
  259. */
  260. function cdataEnd(code) {
  261. if (code === 62) {
  262. return end(code)
  263. }
  264. if (code === 93) {
  265. effects.consume(code)
  266. return cdataEnd
  267. }
  268. return cdata(code)
  269. }
  270. /**
  271. * In declaration.
  272. *
  273. * ```markdown
  274. * > | a <!b> c
  275. * ^
  276. * ```
  277. *
  278. * @type {State}
  279. */
  280. function declaration(code) {
  281. if (code === null || code === 62) {
  282. return end(code)
  283. }
  284. if (markdownLineEnding(code)) {
  285. returnState = declaration
  286. return lineEndingBefore(code)
  287. }
  288. effects.consume(code)
  289. return declaration
  290. }
  291. /**
  292. * In instruction.
  293. *
  294. * ```markdown
  295. * > | a <?b?> c
  296. * ^
  297. * ```
  298. *
  299. * @type {State}
  300. */
  301. function instruction(code) {
  302. if (code === null) {
  303. return nok(code)
  304. }
  305. if (code === 63) {
  306. effects.consume(code)
  307. return instructionClose
  308. }
  309. if (markdownLineEnding(code)) {
  310. returnState = instruction
  311. return lineEndingBefore(code)
  312. }
  313. effects.consume(code)
  314. return instruction
  315. }
  316. /**
  317. * In instruction, after `?`, at `>`.
  318. *
  319. * ```markdown
  320. * > | a <?b?> c
  321. * ^
  322. * ```
  323. *
  324. * @type {State}
  325. */
  326. function instructionClose(code) {
  327. return code === 62 ? end(code) : instruction(code)
  328. }
  329. /**
  330. * After `</`, in closing tag, at tag name.
  331. *
  332. * ```markdown
  333. * > | a </b> c
  334. * ^
  335. * ```
  336. *
  337. * @type {State}
  338. */
  339. function tagCloseStart(code) {
  340. // ASCII alphabetical.
  341. if (asciiAlpha(code)) {
  342. effects.consume(code)
  343. return tagClose
  344. }
  345. return nok(code)
  346. }
  347. /**
  348. * After `</x`, in a tag name.
  349. *
  350. * ```markdown
  351. * > | a </b> c
  352. * ^
  353. * ```
  354. *
  355. * @type {State}
  356. */
  357. function tagClose(code) {
  358. // ASCII alphanumerical and `-`.
  359. if (code === 45 || asciiAlphanumeric(code)) {
  360. effects.consume(code)
  361. return tagClose
  362. }
  363. return tagCloseBetween(code)
  364. }
  365. /**
  366. * In closing tag, after tag name.
  367. *
  368. * ```markdown
  369. * > | a </b> c
  370. * ^
  371. * ```
  372. *
  373. * @type {State}
  374. */
  375. function tagCloseBetween(code) {
  376. if (markdownLineEnding(code)) {
  377. returnState = tagCloseBetween
  378. return lineEndingBefore(code)
  379. }
  380. if (markdownSpace(code)) {
  381. effects.consume(code)
  382. return tagCloseBetween
  383. }
  384. return end(code)
  385. }
  386. /**
  387. * After `<x`, in opening tag name.
  388. *
  389. * ```markdown
  390. * > | a <b> c
  391. * ^
  392. * ```
  393. *
  394. * @type {State}
  395. */
  396. function tagOpen(code) {
  397. // ASCII alphanumerical and `-`.
  398. if (code === 45 || asciiAlphanumeric(code)) {
  399. effects.consume(code)
  400. return tagOpen
  401. }
  402. if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
  403. return tagOpenBetween(code)
  404. }
  405. return nok(code)
  406. }
  407. /**
  408. * In opening tag, after tag name.
  409. *
  410. * ```markdown
  411. * > | a <b> c
  412. * ^
  413. * ```
  414. *
  415. * @type {State}
  416. */
  417. function tagOpenBetween(code) {
  418. if (code === 47) {
  419. effects.consume(code)
  420. return end
  421. }
  422. // ASCII alphabetical and `:` and `_`.
  423. if (code === 58 || code === 95 || asciiAlpha(code)) {
  424. effects.consume(code)
  425. return tagOpenAttributeName
  426. }
  427. if (markdownLineEnding(code)) {
  428. returnState = tagOpenBetween
  429. return lineEndingBefore(code)
  430. }
  431. if (markdownSpace(code)) {
  432. effects.consume(code)
  433. return tagOpenBetween
  434. }
  435. return end(code)
  436. }
  437. /**
  438. * In attribute name.
  439. *
  440. * ```markdown
  441. * > | a <b c> d
  442. * ^
  443. * ```
  444. *
  445. * @type {State}
  446. */
  447. function tagOpenAttributeName(code) {
  448. // ASCII alphabetical and `-`, `.`, `:`, and `_`.
  449. if (
  450. code === 45 ||
  451. code === 46 ||
  452. code === 58 ||
  453. code === 95 ||
  454. asciiAlphanumeric(code)
  455. ) {
  456. effects.consume(code)
  457. return tagOpenAttributeName
  458. }
  459. return tagOpenAttributeNameAfter(code)
  460. }
  461. /**
  462. * After attribute name, before initializer, the end of the tag, or
  463. * whitespace.
  464. *
  465. * ```markdown
  466. * > | a <b c> d
  467. * ^
  468. * ```
  469. *
  470. * @type {State}
  471. */
  472. function tagOpenAttributeNameAfter(code) {
  473. if (code === 61) {
  474. effects.consume(code)
  475. return tagOpenAttributeValueBefore
  476. }
  477. if (markdownLineEnding(code)) {
  478. returnState = tagOpenAttributeNameAfter
  479. return lineEndingBefore(code)
  480. }
  481. if (markdownSpace(code)) {
  482. effects.consume(code)
  483. return tagOpenAttributeNameAfter
  484. }
  485. return tagOpenBetween(code)
  486. }
  487. /**
  488. * Before unquoted, double quoted, or single quoted attribute value, allowing
  489. * whitespace.
  490. *
  491. * ```markdown
  492. * > | a <b c=d> e
  493. * ^
  494. * ```
  495. *
  496. * @type {State}
  497. */
  498. function tagOpenAttributeValueBefore(code) {
  499. if (
  500. code === null ||
  501. code === 60 ||
  502. code === 61 ||
  503. code === 62 ||
  504. code === 96
  505. ) {
  506. return nok(code)
  507. }
  508. if (code === 34 || code === 39) {
  509. effects.consume(code)
  510. marker = code
  511. return tagOpenAttributeValueQuoted
  512. }
  513. if (markdownLineEnding(code)) {
  514. returnState = tagOpenAttributeValueBefore
  515. return lineEndingBefore(code)
  516. }
  517. if (markdownSpace(code)) {
  518. effects.consume(code)
  519. return tagOpenAttributeValueBefore
  520. }
  521. effects.consume(code)
  522. return tagOpenAttributeValueUnquoted
  523. }
  524. /**
  525. * In double or single quoted attribute value.
  526. *
  527. * ```markdown
  528. * > | a <b c="d"> e
  529. * ^
  530. * ```
  531. *
  532. * @type {State}
  533. */
  534. function tagOpenAttributeValueQuoted(code) {
  535. if (code === marker) {
  536. effects.consume(code)
  537. marker = undefined
  538. return tagOpenAttributeValueQuotedAfter
  539. }
  540. if (code === null) {
  541. return nok(code)
  542. }
  543. if (markdownLineEnding(code)) {
  544. returnState = tagOpenAttributeValueQuoted
  545. return lineEndingBefore(code)
  546. }
  547. effects.consume(code)
  548. return tagOpenAttributeValueQuoted
  549. }
  550. /**
  551. * In unquoted attribute value.
  552. *
  553. * ```markdown
  554. * > | a <b c=d> e
  555. * ^
  556. * ```
  557. *
  558. * @type {State}
  559. */
  560. function tagOpenAttributeValueUnquoted(code) {
  561. if (
  562. code === null ||
  563. code === 34 ||
  564. code === 39 ||
  565. code === 60 ||
  566. code === 61 ||
  567. code === 96
  568. ) {
  569. return nok(code)
  570. }
  571. if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
  572. return tagOpenBetween(code)
  573. }
  574. effects.consume(code)
  575. return tagOpenAttributeValueUnquoted
  576. }
  577. /**
  578. * After double or single quoted attribute value, before whitespace or the end
  579. * of the tag.
  580. *
  581. * ```markdown
  582. * > | a <b c="d"> e
  583. * ^
  584. * ```
  585. *
  586. * @type {State}
  587. */
  588. function tagOpenAttributeValueQuotedAfter(code) {
  589. if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
  590. return tagOpenBetween(code)
  591. }
  592. return nok(code)
  593. }
  594. /**
  595. * In certain circumstances of a tag where only an `>` is allowed.
  596. *
  597. * ```markdown
  598. * > | a <b c="d"> e
  599. * ^
  600. * ```
  601. *
  602. * @type {State}
  603. */
  604. function end(code) {
  605. if (code === 62) {
  606. effects.consume(code)
  607. effects.exit('htmlTextData')
  608. effects.exit('htmlText')
  609. return ok
  610. }
  611. return nok(code)
  612. }
  613. /**
  614. * At eol.
  615. *
  616. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  617. * > empty tokens.
  618. *
  619. * ```markdown
  620. * > | a <!--a
  621. * ^
  622. * | b-->
  623. * ```
  624. *
  625. * @type {State}
  626. */
  627. function lineEndingBefore(code) {
  628. effects.exit('htmlTextData')
  629. effects.enter('lineEnding')
  630. effects.consume(code)
  631. effects.exit('lineEnding')
  632. return lineEndingAfter
  633. }
  634. /**
  635. * After eol, at optional whitespace.
  636. *
  637. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  638. * > empty tokens.
  639. *
  640. * ```markdown
  641. * | a <!--a
  642. * > | b-->
  643. * ^
  644. * ```
  645. *
  646. * @type {State}
  647. */
  648. function lineEndingAfter(code) {
  649. // Always populated by defaults.
  650. return markdownSpace(code)
  651. ? factorySpace(
  652. effects,
  653. lineEndingAfterPrefix,
  654. 'linePrefix',
  655. self.parser.constructs.disable.null.includes('codeIndented')
  656. ? undefined
  657. : 4
  658. )(code)
  659. : lineEndingAfterPrefix(code)
  660. }
  661. /**
  662. * After eol, after optional whitespace.
  663. *
  664. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  665. * > empty tokens.
  666. *
  667. * ```markdown
  668. * | a <!--a
  669. * > | b-->
  670. * ^
  671. * ```
  672. *
  673. * @type {State}
  674. */
  675. function lineEndingAfterPrefix(code) {
  676. effects.enter('htmlTextData')
  677. return returnState(code)
  678. }
  679. }