character-reference.js 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').Construct} Construct
  4. * @typedef {import('micromark-util-types').State} State
  5. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  6. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  7. */
  8. import {decodeNamedCharacterReference} from 'decode-named-character-reference'
  9. import {
  10. asciiAlphanumeric,
  11. asciiDigit,
  12. asciiHexDigit
  13. } from 'micromark-util-character'
  14. /** @type {Construct} */
  15. export const characterReference = {
  16. name: 'characterReference',
  17. tokenize: tokenizeCharacterReference
  18. }
  19. /**
  20. * @this {TokenizeContext}
  21. * @type {Tokenizer}
  22. */
  23. function tokenizeCharacterReference(effects, ok, nok) {
  24. const self = this
  25. let size = 0
  26. /** @type {number} */
  27. let max
  28. /** @type {(code: Code) => boolean} */
  29. let test
  30. return start
  31. /**
  32. * Start of character reference.
  33. *
  34. * ```markdown
  35. * > | a&b
  36. * ^
  37. * > | a{b
  38. * ^
  39. * > | a	b
  40. * ^
  41. * ```
  42. *
  43. * @type {State}
  44. */
  45. function start(code) {
  46. effects.enter('characterReference')
  47. effects.enter('characterReferenceMarker')
  48. effects.consume(code)
  49. effects.exit('characterReferenceMarker')
  50. return open
  51. }
  52. /**
  53. * After `&`, at `#` for numeric references or alphanumeric for named
  54. * references.
  55. *
  56. * ```markdown
  57. * > | a&b
  58. * ^
  59. * > | a{b
  60. * ^
  61. * > | a	b
  62. * ^
  63. * ```
  64. *
  65. * @type {State}
  66. */
  67. function open(code) {
  68. if (code === 35) {
  69. effects.enter('characterReferenceMarkerNumeric')
  70. effects.consume(code)
  71. effects.exit('characterReferenceMarkerNumeric')
  72. return numeric
  73. }
  74. effects.enter('characterReferenceValue')
  75. max = 31
  76. test = asciiAlphanumeric
  77. return value(code)
  78. }
  79. /**
  80. * After `#`, at `x` for hexadecimals or digit for decimals.
  81. *
  82. * ```markdown
  83. * > | a{b
  84. * ^
  85. * > | a	b
  86. * ^
  87. * ```
  88. *
  89. * @type {State}
  90. */
  91. function numeric(code) {
  92. if (code === 88 || code === 120) {
  93. effects.enter('characterReferenceMarkerHexadecimal')
  94. effects.consume(code)
  95. effects.exit('characterReferenceMarkerHexadecimal')
  96. effects.enter('characterReferenceValue')
  97. max = 6
  98. test = asciiHexDigit
  99. return value
  100. }
  101. effects.enter('characterReferenceValue')
  102. max = 7
  103. test = asciiDigit
  104. return value(code)
  105. }
  106. /**
  107. * After markers (`&#x`, `&#`, or `&`), in value, before `;`.
  108. *
  109. * The character reference kind defines what and how many characters are
  110. * allowed.
  111. *
  112. * ```markdown
  113. * > | a&b
  114. * ^^^
  115. * > | a{b
  116. * ^^^
  117. * > | a	b
  118. * ^
  119. * ```
  120. *
  121. * @type {State}
  122. */
  123. function value(code) {
  124. if (code === 59 && size) {
  125. const token = effects.exit('characterReferenceValue')
  126. if (
  127. test === asciiAlphanumeric &&
  128. !decodeNamedCharacterReference(self.sliceSerialize(token))
  129. ) {
  130. return nok(code)
  131. }
  132. // To do: `markdown-rs` uses a different name:
  133. // `CharacterReferenceMarkerSemi`.
  134. effects.enter('characterReferenceMarker')
  135. effects.consume(code)
  136. effects.exit('characterReferenceMarker')
  137. effects.exit('characterReference')
  138. return ok
  139. }
  140. if (test(code) && size++ < max) {
  141. effects.consume(code)
  142. return value
  143. }
  144. return nok(code)
  145. }
  146. }