html-text.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').Construct} Construct
  4. * @typedef {import('micromark-util-types').State} State
  5. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  6. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  7. */
  8. import {factorySpace} from 'micromark-factory-space'
  9. import {
  10. asciiAlpha,
  11. asciiAlphanumeric,
  12. markdownLineEnding,
  13. markdownLineEndingOrSpace,
  14. markdownSpace
  15. } from 'micromark-util-character'
  16. import {codes, constants, types} from 'micromark-util-symbol'
  17. import {ok as assert} from 'devlop'
  18. /** @type {Construct} */
  19. export const htmlText = {name: 'htmlText', tokenize: tokenizeHtmlText}
  20. /**
  21. * @this {TokenizeContext}
  22. * @type {Tokenizer}
  23. */
  24. function tokenizeHtmlText(effects, ok, nok) {
  25. const self = this
  26. /** @type {NonNullable<Code> | undefined} */
  27. let marker
  28. /** @type {number} */
  29. let index
  30. /** @type {State} */
  31. let returnState
  32. return start
  33. /**
  34. * Start of HTML (text).
  35. *
  36. * ```markdown
  37. * > | a <b> c
  38. * ^
  39. * ```
  40. *
  41. * @type {State}
  42. */
  43. function start(code) {
  44. assert(code === codes.lessThan, 'expected `<`')
  45. effects.enter(types.htmlText)
  46. effects.enter(types.htmlTextData)
  47. effects.consume(code)
  48. return open
  49. }
  50. /**
  51. * After `<`, at tag name or other stuff.
  52. *
  53. * ```markdown
  54. * > | a <b> c
  55. * ^
  56. * > | a <!doctype> c
  57. * ^
  58. * > | a <!--b--> c
  59. * ^
  60. * ```
  61. *
  62. * @type {State}
  63. */
  64. function open(code) {
  65. if (code === codes.exclamationMark) {
  66. effects.consume(code)
  67. return declarationOpen
  68. }
  69. if (code === codes.slash) {
  70. effects.consume(code)
  71. return tagCloseStart
  72. }
  73. if (code === codes.questionMark) {
  74. effects.consume(code)
  75. return instruction
  76. }
  77. // ASCII alphabetical.
  78. if (asciiAlpha(code)) {
  79. effects.consume(code)
  80. return tagOpen
  81. }
  82. return nok(code)
  83. }
  84. /**
  85. * After `<!`, at declaration, comment, or CDATA.
  86. *
  87. * ```markdown
  88. * > | a <!doctype> c
  89. * ^
  90. * > | a <!--b--> c
  91. * ^
  92. * > | a <![CDATA[>&<]]> c
  93. * ^
  94. * ```
  95. *
  96. * @type {State}
  97. */
  98. function declarationOpen(code) {
  99. if (code === codes.dash) {
  100. effects.consume(code)
  101. return commentOpenInside
  102. }
  103. if (code === codes.leftSquareBracket) {
  104. effects.consume(code)
  105. index = 0
  106. return cdataOpenInside
  107. }
  108. if (asciiAlpha(code)) {
  109. effects.consume(code)
  110. return declaration
  111. }
  112. return nok(code)
  113. }
  114. /**
  115. * In a comment, after `<!-`, at another `-`.
  116. *
  117. * ```markdown
  118. * > | a <!--b--> c
  119. * ^
  120. * ```
  121. *
  122. * @type {State}
  123. */
  124. function commentOpenInside(code) {
  125. if (code === codes.dash) {
  126. effects.consume(code)
  127. return commentEnd
  128. }
  129. return nok(code)
  130. }
  131. /**
  132. * In comment.
  133. *
  134. * ```markdown
  135. * > | a <!--b--> c
  136. * ^
  137. * ```
  138. *
  139. * @type {State}
  140. */
  141. function comment(code) {
  142. if (code === codes.eof) {
  143. return nok(code)
  144. }
  145. if (code === codes.dash) {
  146. effects.consume(code)
  147. return commentClose
  148. }
  149. if (markdownLineEnding(code)) {
  150. returnState = comment
  151. return lineEndingBefore(code)
  152. }
  153. effects.consume(code)
  154. return comment
  155. }
  156. /**
  157. * In comment, after `-`.
  158. *
  159. * ```markdown
  160. * > | a <!--b--> c
  161. * ^
  162. * ```
  163. *
  164. * @type {State}
  165. */
  166. function commentClose(code) {
  167. if (code === codes.dash) {
  168. effects.consume(code)
  169. return commentEnd
  170. }
  171. return comment(code)
  172. }
  173. /**
  174. * In comment, after `--`.
  175. *
  176. * ```markdown
  177. * > | a <!--b--> c
  178. * ^
  179. * ```
  180. *
  181. * @type {State}
  182. */
  183. function commentEnd(code) {
  184. return code === codes.greaterThan
  185. ? end(code)
  186. : code === codes.dash
  187. ? commentClose(code)
  188. : comment(code)
  189. }
  190. /**
  191. * After `<![`, in CDATA, expecting `CDATA[`.
  192. *
  193. * ```markdown
  194. * > | a <![CDATA[>&<]]> b
  195. * ^^^^^^
  196. * ```
  197. *
  198. * @type {State}
  199. */
  200. function cdataOpenInside(code) {
  201. const value = constants.cdataOpeningString
  202. if (code === value.charCodeAt(index++)) {
  203. effects.consume(code)
  204. return index === value.length ? cdata : cdataOpenInside
  205. }
  206. return nok(code)
  207. }
  208. /**
  209. * In CDATA.
  210. *
  211. * ```markdown
  212. * > | a <![CDATA[>&<]]> b
  213. * ^^^
  214. * ```
  215. *
  216. * @type {State}
  217. */
  218. function cdata(code) {
  219. if (code === codes.eof) {
  220. return nok(code)
  221. }
  222. if (code === codes.rightSquareBracket) {
  223. effects.consume(code)
  224. return cdataClose
  225. }
  226. if (markdownLineEnding(code)) {
  227. returnState = cdata
  228. return lineEndingBefore(code)
  229. }
  230. effects.consume(code)
  231. return cdata
  232. }
  233. /**
  234. * In CDATA, after `]`, at another `]`.
  235. *
  236. * ```markdown
  237. * > | a <![CDATA[>&<]]> b
  238. * ^
  239. * ```
  240. *
  241. * @type {State}
  242. */
  243. function cdataClose(code) {
  244. if (code === codes.rightSquareBracket) {
  245. effects.consume(code)
  246. return cdataEnd
  247. }
  248. return cdata(code)
  249. }
  250. /**
  251. * In CDATA, after `]]`, at `>`.
  252. *
  253. * ```markdown
  254. * > | a <![CDATA[>&<]]> b
  255. * ^
  256. * ```
  257. *
  258. * @type {State}
  259. */
  260. function cdataEnd(code) {
  261. if (code === codes.greaterThan) {
  262. return end(code)
  263. }
  264. if (code === codes.rightSquareBracket) {
  265. effects.consume(code)
  266. return cdataEnd
  267. }
  268. return cdata(code)
  269. }
  270. /**
  271. * In declaration.
  272. *
  273. * ```markdown
  274. * > | a <!b> c
  275. * ^
  276. * ```
  277. *
  278. * @type {State}
  279. */
  280. function declaration(code) {
  281. if (code === codes.eof || code === codes.greaterThan) {
  282. return end(code)
  283. }
  284. if (markdownLineEnding(code)) {
  285. returnState = declaration
  286. return lineEndingBefore(code)
  287. }
  288. effects.consume(code)
  289. return declaration
  290. }
  291. /**
  292. * In instruction.
  293. *
  294. * ```markdown
  295. * > | a <?b?> c
  296. * ^
  297. * ```
  298. *
  299. * @type {State}
  300. */
  301. function instruction(code) {
  302. if (code === codes.eof) {
  303. return nok(code)
  304. }
  305. if (code === codes.questionMark) {
  306. effects.consume(code)
  307. return instructionClose
  308. }
  309. if (markdownLineEnding(code)) {
  310. returnState = instruction
  311. return lineEndingBefore(code)
  312. }
  313. effects.consume(code)
  314. return instruction
  315. }
  316. /**
  317. * In instruction, after `?`, at `>`.
  318. *
  319. * ```markdown
  320. * > | a <?b?> c
  321. * ^
  322. * ```
  323. *
  324. * @type {State}
  325. */
  326. function instructionClose(code) {
  327. return code === codes.greaterThan ? end(code) : instruction(code)
  328. }
  329. /**
  330. * After `</`, in closing tag, at tag name.
  331. *
  332. * ```markdown
  333. * > | a </b> c
  334. * ^
  335. * ```
  336. *
  337. * @type {State}
  338. */
  339. function tagCloseStart(code) {
  340. // ASCII alphabetical.
  341. if (asciiAlpha(code)) {
  342. effects.consume(code)
  343. return tagClose
  344. }
  345. return nok(code)
  346. }
  347. /**
  348. * After `</x`, in a tag name.
  349. *
  350. * ```markdown
  351. * > | a </b> c
  352. * ^
  353. * ```
  354. *
  355. * @type {State}
  356. */
  357. function tagClose(code) {
  358. // ASCII alphanumerical and `-`.
  359. if (code === codes.dash || asciiAlphanumeric(code)) {
  360. effects.consume(code)
  361. return tagClose
  362. }
  363. return tagCloseBetween(code)
  364. }
  365. /**
  366. * In closing tag, after tag name.
  367. *
  368. * ```markdown
  369. * > | a </b> c
  370. * ^
  371. * ```
  372. *
  373. * @type {State}
  374. */
  375. function tagCloseBetween(code) {
  376. if (markdownLineEnding(code)) {
  377. returnState = tagCloseBetween
  378. return lineEndingBefore(code)
  379. }
  380. if (markdownSpace(code)) {
  381. effects.consume(code)
  382. return tagCloseBetween
  383. }
  384. return end(code)
  385. }
  386. /**
  387. * After `<x`, in opening tag name.
  388. *
  389. * ```markdown
  390. * > | a <b> c
  391. * ^
  392. * ```
  393. *
  394. * @type {State}
  395. */
  396. function tagOpen(code) {
  397. // ASCII alphanumerical and `-`.
  398. if (code === codes.dash || asciiAlphanumeric(code)) {
  399. effects.consume(code)
  400. return tagOpen
  401. }
  402. if (
  403. code === codes.slash ||
  404. code === codes.greaterThan ||
  405. markdownLineEndingOrSpace(code)
  406. ) {
  407. return tagOpenBetween(code)
  408. }
  409. return nok(code)
  410. }
  411. /**
  412. * In opening tag, after tag name.
  413. *
  414. * ```markdown
  415. * > | a <b> c
  416. * ^
  417. * ```
  418. *
  419. * @type {State}
  420. */
  421. function tagOpenBetween(code) {
  422. if (code === codes.slash) {
  423. effects.consume(code)
  424. return end
  425. }
  426. // ASCII alphabetical and `:` and `_`.
  427. if (code === codes.colon || code === codes.underscore || asciiAlpha(code)) {
  428. effects.consume(code)
  429. return tagOpenAttributeName
  430. }
  431. if (markdownLineEnding(code)) {
  432. returnState = tagOpenBetween
  433. return lineEndingBefore(code)
  434. }
  435. if (markdownSpace(code)) {
  436. effects.consume(code)
  437. return tagOpenBetween
  438. }
  439. return end(code)
  440. }
  441. /**
  442. * In attribute name.
  443. *
  444. * ```markdown
  445. * > | a <b c> d
  446. * ^
  447. * ```
  448. *
  449. * @type {State}
  450. */
  451. function tagOpenAttributeName(code) {
  452. // ASCII alphabetical and `-`, `.`, `:`, and `_`.
  453. if (
  454. code === codes.dash ||
  455. code === codes.dot ||
  456. code === codes.colon ||
  457. code === codes.underscore ||
  458. asciiAlphanumeric(code)
  459. ) {
  460. effects.consume(code)
  461. return tagOpenAttributeName
  462. }
  463. return tagOpenAttributeNameAfter(code)
  464. }
  465. /**
  466. * After attribute name, before initializer, the end of the tag, or
  467. * whitespace.
  468. *
  469. * ```markdown
  470. * > | a <b c> d
  471. * ^
  472. * ```
  473. *
  474. * @type {State}
  475. */
  476. function tagOpenAttributeNameAfter(code) {
  477. if (code === codes.equalsTo) {
  478. effects.consume(code)
  479. return tagOpenAttributeValueBefore
  480. }
  481. if (markdownLineEnding(code)) {
  482. returnState = tagOpenAttributeNameAfter
  483. return lineEndingBefore(code)
  484. }
  485. if (markdownSpace(code)) {
  486. effects.consume(code)
  487. return tagOpenAttributeNameAfter
  488. }
  489. return tagOpenBetween(code)
  490. }
  491. /**
  492. * Before unquoted, double quoted, or single quoted attribute value, allowing
  493. * whitespace.
  494. *
  495. * ```markdown
  496. * > | a <b c=d> e
  497. * ^
  498. * ```
  499. *
  500. * @type {State}
  501. */
  502. function tagOpenAttributeValueBefore(code) {
  503. if (
  504. code === codes.eof ||
  505. code === codes.lessThan ||
  506. code === codes.equalsTo ||
  507. code === codes.greaterThan ||
  508. code === codes.graveAccent
  509. ) {
  510. return nok(code)
  511. }
  512. if (code === codes.quotationMark || code === codes.apostrophe) {
  513. effects.consume(code)
  514. marker = code
  515. return tagOpenAttributeValueQuoted
  516. }
  517. if (markdownLineEnding(code)) {
  518. returnState = tagOpenAttributeValueBefore
  519. return lineEndingBefore(code)
  520. }
  521. if (markdownSpace(code)) {
  522. effects.consume(code)
  523. return tagOpenAttributeValueBefore
  524. }
  525. effects.consume(code)
  526. return tagOpenAttributeValueUnquoted
  527. }
  528. /**
  529. * In double or single quoted attribute value.
  530. *
  531. * ```markdown
  532. * > | a <b c="d"> e
  533. * ^
  534. * ```
  535. *
  536. * @type {State}
  537. */
  538. function tagOpenAttributeValueQuoted(code) {
  539. if (code === marker) {
  540. effects.consume(code)
  541. marker = undefined
  542. return tagOpenAttributeValueQuotedAfter
  543. }
  544. if (code === codes.eof) {
  545. return nok(code)
  546. }
  547. if (markdownLineEnding(code)) {
  548. returnState = tagOpenAttributeValueQuoted
  549. return lineEndingBefore(code)
  550. }
  551. effects.consume(code)
  552. return tagOpenAttributeValueQuoted
  553. }
  554. /**
  555. * In unquoted attribute value.
  556. *
  557. * ```markdown
  558. * > | a <b c=d> e
  559. * ^
  560. * ```
  561. *
  562. * @type {State}
  563. */
  564. function tagOpenAttributeValueUnquoted(code) {
  565. if (
  566. code === codes.eof ||
  567. code === codes.quotationMark ||
  568. code === codes.apostrophe ||
  569. code === codes.lessThan ||
  570. code === codes.equalsTo ||
  571. code === codes.graveAccent
  572. ) {
  573. return nok(code)
  574. }
  575. if (
  576. code === codes.slash ||
  577. code === codes.greaterThan ||
  578. markdownLineEndingOrSpace(code)
  579. ) {
  580. return tagOpenBetween(code)
  581. }
  582. effects.consume(code)
  583. return tagOpenAttributeValueUnquoted
  584. }
  585. /**
  586. * After double or single quoted attribute value, before whitespace or the end
  587. * of the tag.
  588. *
  589. * ```markdown
  590. * > | a <b c="d"> e
  591. * ^
  592. * ```
  593. *
  594. * @type {State}
  595. */
  596. function tagOpenAttributeValueQuotedAfter(code) {
  597. if (
  598. code === codes.slash ||
  599. code === codes.greaterThan ||
  600. markdownLineEndingOrSpace(code)
  601. ) {
  602. return tagOpenBetween(code)
  603. }
  604. return nok(code)
  605. }
  606. /**
  607. * In certain circumstances of a tag where only an `>` is allowed.
  608. *
  609. * ```markdown
  610. * > | a <b c="d"> e
  611. * ^
  612. * ```
  613. *
  614. * @type {State}
  615. */
  616. function end(code) {
  617. if (code === codes.greaterThan) {
  618. effects.consume(code)
  619. effects.exit(types.htmlTextData)
  620. effects.exit(types.htmlText)
  621. return ok
  622. }
  623. return nok(code)
  624. }
  625. /**
  626. * At eol.
  627. *
  628. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  629. * > empty tokens.
  630. *
  631. * ```markdown
  632. * > | a <!--a
  633. * ^
  634. * | b-->
  635. * ```
  636. *
  637. * @type {State}
  638. */
  639. function lineEndingBefore(code) {
  640. assert(returnState, 'expected return state')
  641. assert(markdownLineEnding(code), 'expected eol')
  642. effects.exit(types.htmlTextData)
  643. effects.enter(types.lineEnding)
  644. effects.consume(code)
  645. effects.exit(types.lineEnding)
  646. return lineEndingAfter
  647. }
  648. /**
  649. * After eol, at optional whitespace.
  650. *
  651. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  652. * > empty tokens.
  653. *
  654. * ```markdown
  655. * | a <!--a
  656. * > | b-->
  657. * ^
  658. * ```
  659. *
  660. * @type {State}
  661. */
  662. function lineEndingAfter(code) {
  663. // Always populated by defaults.
  664. assert(
  665. self.parser.constructs.disable.null,
  666. 'expected `disable.null` to be populated'
  667. )
  668. return markdownSpace(code)
  669. ? factorySpace(
  670. effects,
  671. lineEndingAfterPrefix,
  672. types.linePrefix,
  673. self.parser.constructs.disable.null.includes('codeIndented')
  674. ? undefined
  675. : constants.tabSize
  676. )(code)
  677. : lineEndingAfterPrefix(code)
  678. }
  679. /**
  680. * After eol, after optional whitespace.
  681. *
  682. * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
  683. * > empty tokens.
  684. *
  685. * ```markdown
  686. * | a <!--a
  687. * > | b-->
  688. * ^
  689. * ```
  690. *
  691. * @type {State}
  692. */
  693. function lineEndingAfterPrefix(code) {
  694. effects.enter(types.htmlTextData)
  695. return returnState(code)
  696. }
  697. }