attention.js 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').Construct} Construct
  4. * @typedef {import('micromark-util-types').Event} Event
  5. * @typedef {import('micromark-util-types').Point} Point
  6. * @typedef {import('micromark-util-types').Resolver} Resolver
  7. * @typedef {import('micromark-util-types').State} State
  8. * @typedef {import('micromark-util-types').Token} Token
  9. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  10. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  11. */
  12. import {push, splice} from 'micromark-util-chunked'
  13. import {classifyCharacter} from 'micromark-util-classify-character'
  14. import {resolveAll} from 'micromark-util-resolve-all'
  15. import {codes, constants, types} from 'micromark-util-symbol'
  16. import {ok as assert} from 'devlop'
  17. /** @type {Construct} */
  18. export const attention = {
  19. name: 'attention',
  20. tokenize: tokenizeAttention,
  21. resolveAll: resolveAllAttention
  22. }
  23. /**
  24. * Take all events and resolve attention to emphasis or strong.
  25. *
  26. * @type {Resolver}
  27. */
  28. // eslint-disable-next-line complexity
  29. function resolveAllAttention(events, context) {
  30. let index = -1
  31. /** @type {number} */
  32. let open
  33. /** @type {Token} */
  34. let group
  35. /** @type {Token} */
  36. let text
  37. /** @type {Token} */
  38. let openingSequence
  39. /** @type {Token} */
  40. let closingSequence
  41. /** @type {number} */
  42. let use
  43. /** @type {Array<Event>} */
  44. let nextEvents
  45. /** @type {number} */
  46. let offset
  47. // Walk through all events.
  48. //
  49. // Note: performance of this is fine on an mb of normal markdown, but it’s
  50. // a bottleneck for malicious stuff.
  51. while (++index < events.length) {
  52. // Find a token that can close.
  53. if (
  54. events[index][0] === 'enter' &&
  55. events[index][1].type === 'attentionSequence' &&
  56. events[index][1]._close
  57. ) {
  58. open = index
  59. // Now walk back to find an opener.
  60. while (open--) {
  61. // Find a token that can open the closer.
  62. if (
  63. events[open][0] === 'exit' &&
  64. events[open][1].type === 'attentionSequence' &&
  65. events[open][1]._open &&
  66. // If the markers are the same:
  67. context.sliceSerialize(events[open][1]).charCodeAt(0) ===
  68. context.sliceSerialize(events[index][1]).charCodeAt(0)
  69. ) {
  70. // If the opening can close or the closing can open,
  71. // and the close size *is not* a multiple of three,
  72. // but the sum of the opening and closing size *is* multiple of three,
  73. // then don’t match.
  74. if (
  75. (events[open][1]._close || events[index][1]._open) &&
  76. (events[index][1].end.offset - events[index][1].start.offset) % 3 &&
  77. !(
  78. (events[open][1].end.offset -
  79. events[open][1].start.offset +
  80. events[index][1].end.offset -
  81. events[index][1].start.offset) %
  82. 3
  83. )
  84. ) {
  85. continue
  86. }
  87. // Number of markers to use from the sequence.
  88. use =
  89. events[open][1].end.offset - events[open][1].start.offset > 1 &&
  90. events[index][1].end.offset - events[index][1].start.offset > 1
  91. ? 2
  92. : 1
  93. const start = Object.assign({}, events[open][1].end)
  94. const end = Object.assign({}, events[index][1].start)
  95. movePoint(start, -use)
  96. movePoint(end, use)
  97. openingSequence = {
  98. type: use > 1 ? types.strongSequence : types.emphasisSequence,
  99. start,
  100. end: Object.assign({}, events[open][1].end)
  101. }
  102. closingSequence = {
  103. type: use > 1 ? types.strongSequence : types.emphasisSequence,
  104. start: Object.assign({}, events[index][1].start),
  105. end
  106. }
  107. text = {
  108. type: use > 1 ? types.strongText : types.emphasisText,
  109. start: Object.assign({}, events[open][1].end),
  110. end: Object.assign({}, events[index][1].start)
  111. }
  112. group = {
  113. type: use > 1 ? types.strong : types.emphasis,
  114. start: Object.assign({}, openingSequence.start),
  115. end: Object.assign({}, closingSequence.end)
  116. }
  117. events[open][1].end = Object.assign({}, openingSequence.start)
  118. events[index][1].start = Object.assign({}, closingSequence.end)
  119. nextEvents = []
  120. // If there are more markers in the opening, add them before.
  121. if (events[open][1].end.offset - events[open][1].start.offset) {
  122. nextEvents = push(nextEvents, [
  123. ['enter', events[open][1], context],
  124. ['exit', events[open][1], context]
  125. ])
  126. }
  127. // Opening.
  128. nextEvents = push(nextEvents, [
  129. ['enter', group, context],
  130. ['enter', openingSequence, context],
  131. ['exit', openingSequence, context],
  132. ['enter', text, context]
  133. ])
  134. // Always populated by defaults.
  135. assert(
  136. context.parser.constructs.insideSpan.null,
  137. 'expected `insideSpan` to be populated'
  138. )
  139. // Between.
  140. nextEvents = push(
  141. nextEvents,
  142. resolveAll(
  143. context.parser.constructs.insideSpan.null,
  144. events.slice(open + 1, index),
  145. context
  146. )
  147. )
  148. // Closing.
  149. nextEvents = push(nextEvents, [
  150. ['exit', text, context],
  151. ['enter', closingSequence, context],
  152. ['exit', closingSequence, context],
  153. ['exit', group, context]
  154. ])
  155. // If there are more markers in the closing, add them after.
  156. if (events[index][1].end.offset - events[index][1].start.offset) {
  157. offset = 2
  158. nextEvents = push(nextEvents, [
  159. ['enter', events[index][1], context],
  160. ['exit', events[index][1], context]
  161. ])
  162. } else {
  163. offset = 0
  164. }
  165. splice(events, open - 1, index - open + 3, nextEvents)
  166. index = open + nextEvents.length - offset - 2
  167. break
  168. }
  169. }
  170. }
  171. }
  172. // Remove remaining sequences.
  173. index = -1
  174. while (++index < events.length) {
  175. if (events[index][1].type === 'attentionSequence') {
  176. events[index][1].type = 'data'
  177. }
  178. }
  179. return events
  180. }
  181. /**
  182. * @this {TokenizeContext}
  183. * @type {Tokenizer}
  184. */
  185. function tokenizeAttention(effects, ok) {
  186. const attentionMarkers = this.parser.constructs.attentionMarkers.null
  187. const previous = this.previous
  188. const before = classifyCharacter(previous)
  189. /** @type {NonNullable<Code>} */
  190. let marker
  191. return start
  192. /**
  193. * Before a sequence.
  194. *
  195. * ```markdown
  196. * > | **
  197. * ^
  198. * ```
  199. *
  200. * @type {State}
  201. */
  202. function start(code) {
  203. assert(
  204. code === codes.asterisk || code === codes.underscore,
  205. 'expected asterisk or underscore'
  206. )
  207. marker = code
  208. effects.enter('attentionSequence')
  209. return inside(code)
  210. }
  211. /**
  212. * In a sequence.
  213. *
  214. * ```markdown
  215. * > | **
  216. * ^^
  217. * ```
  218. *
  219. * @type {State}
  220. */
  221. function inside(code) {
  222. if (code === marker) {
  223. effects.consume(code)
  224. return inside
  225. }
  226. const token = effects.exit('attentionSequence')
  227. // To do: next major: move this to resolver, just like `markdown-rs`.
  228. const after = classifyCharacter(code)
  229. // Always populated by defaults.
  230. assert(attentionMarkers, 'expected `attentionMarkers` to be populated')
  231. const open =
  232. !after ||
  233. (after === constants.characterGroupPunctuation && before) ||
  234. attentionMarkers.includes(code)
  235. const close =
  236. !before ||
  237. (before === constants.characterGroupPunctuation && after) ||
  238. attentionMarkers.includes(previous)
  239. token._open = Boolean(
  240. marker === codes.asterisk ? open : open && (before || !close)
  241. )
  242. token._close = Boolean(
  243. marker === codes.asterisk ? close : close && (after || !open)
  244. )
  245. return ok(code)
  246. }
  247. }
  248. /**
  249. * Move a point a bit.
  250. *
  251. * Note: `move` only works inside lines! It’s not possible to move past other
  252. * chunks (replacement characters, tabs, or line endings).
  253. *
  254. * @param {Point} point
  255. * @param {number} offset
  256. * @returns {undefined}
  257. */
  258. function movePoint(point, offset) {
  259. point.column += offset
  260. point.offset += offset
  261. point._bufferIndex += offset
  262. }