index.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /**
  2. * @typedef {import('micromark-util-types').Chunk} Chunk
  3. * @typedef {import('micromark-util-types').Event} Event
  4. * @typedef {import('micromark-util-types').Token} Token
  5. */
  6. import {splice} from 'micromark-util-chunked'
  7. import {codes, types} from 'micromark-util-symbol'
  8. import {ok as assert} from 'devlop'
  9. /**
  10. * Tokenize subcontent.
  11. *
  12. * @param {Array<Event>} events
  13. * List of events.
  14. * @returns {boolean}
  15. * Whether subtokens were found.
  16. */
  17. // eslint-disable-next-line complexity
  18. export function subtokenize(events) {
  19. /** @type {Record<string, number>} */
  20. const jumps = {}
  21. let index = -1
  22. /** @type {Event} */
  23. let event
  24. /** @type {number | undefined} */
  25. let lineIndex
  26. /** @type {number} */
  27. let otherIndex
  28. /** @type {Event} */
  29. let otherEvent
  30. /** @type {Array<Event>} */
  31. let parameters
  32. /** @type {Array<Event>} */
  33. let subevents
  34. /** @type {boolean | undefined} */
  35. let more
  36. while (++index < events.length) {
  37. while (index in jumps) {
  38. index = jumps[index]
  39. }
  40. event = events[index]
  41. // Add a hook for the GFM tasklist extension, which needs to know if text
  42. // is in the first content of a list item.
  43. if (
  44. index &&
  45. event[1].type === types.chunkFlow &&
  46. events[index - 1][1].type === types.listItemPrefix
  47. ) {
  48. assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens')
  49. subevents = event[1]._tokenizer.events
  50. otherIndex = 0
  51. if (
  52. otherIndex < subevents.length &&
  53. subevents[otherIndex][1].type === types.lineEndingBlank
  54. ) {
  55. otherIndex += 2
  56. }
  57. if (
  58. otherIndex < subevents.length &&
  59. subevents[otherIndex][1].type === types.content
  60. ) {
  61. while (++otherIndex < subevents.length) {
  62. if (subevents[otherIndex][1].type === types.content) {
  63. break
  64. }
  65. if (subevents[otherIndex][1].type === types.chunkText) {
  66. subevents[otherIndex][1]._isInFirstContentOfListItem = true
  67. otherIndex++
  68. }
  69. }
  70. }
  71. }
  72. // Enter.
  73. if (event[0] === 'enter') {
  74. if (event[1].contentType) {
  75. Object.assign(jumps, subcontent(events, index))
  76. index = jumps[index]
  77. more = true
  78. }
  79. }
  80. // Exit.
  81. else if (event[1]._container) {
  82. otherIndex = index
  83. lineIndex = undefined
  84. while (otherIndex--) {
  85. otherEvent = events[otherIndex]
  86. if (
  87. otherEvent[1].type === types.lineEnding ||
  88. otherEvent[1].type === types.lineEndingBlank
  89. ) {
  90. if (otherEvent[0] === 'enter') {
  91. if (lineIndex) {
  92. events[lineIndex][1].type = types.lineEndingBlank
  93. }
  94. otherEvent[1].type = types.lineEnding
  95. lineIndex = otherIndex
  96. }
  97. } else {
  98. break
  99. }
  100. }
  101. if (lineIndex) {
  102. // Fix position.
  103. event[1].end = Object.assign({}, events[lineIndex][1].start)
  104. // Switch container exit w/ line endings.
  105. parameters = events.slice(lineIndex, index)
  106. parameters.unshift(event)
  107. splice(events, lineIndex, index - lineIndex + 1, parameters)
  108. }
  109. }
  110. }
  111. return !more
  112. }
  113. /**
  114. * Tokenize embedded tokens.
  115. *
  116. * @param {Array<Event>} events
  117. * @param {number} eventIndex
  118. * @returns {Record<string, number>}
  119. */
  120. function subcontent(events, eventIndex) {
  121. const token = events[eventIndex][1]
  122. const context = events[eventIndex][2]
  123. let startPosition = eventIndex - 1
  124. /** @type {Array<number>} */
  125. const startPositions = []
  126. assert(token.contentType, 'expected `contentType` on subtokens')
  127. const tokenizer =
  128. token._tokenizer || context.parser[token.contentType](token.start)
  129. const childEvents = tokenizer.events
  130. /** @type {Array<[number, number]>} */
  131. const jumps = []
  132. /** @type {Record<string, number>} */
  133. const gaps = {}
  134. /** @type {Array<Chunk>} */
  135. let stream
  136. /** @type {Token | undefined} */
  137. let previous
  138. let index = -1
  139. /** @type {Token | undefined} */
  140. let current = token
  141. let adjust = 0
  142. let start = 0
  143. const breaks = [start]
  144. // Loop forward through the linked tokens to pass them in order to the
  145. // subtokenizer.
  146. while (current) {
  147. // Find the position of the event for this token.
  148. while (events[++startPosition][1] !== current) {
  149. // Empty.
  150. }
  151. assert(
  152. !previous || current.previous === previous,
  153. 'expected previous to match'
  154. )
  155. assert(!previous || previous.next === current, 'expected next to match')
  156. startPositions.push(startPosition)
  157. if (!current._tokenizer) {
  158. stream = context.sliceStream(current)
  159. if (!current.next) {
  160. stream.push(codes.eof)
  161. }
  162. if (previous) {
  163. tokenizer.defineSkip(current.start)
  164. }
  165. if (current._isInFirstContentOfListItem) {
  166. tokenizer._gfmTasklistFirstContentOfListItem = true
  167. }
  168. tokenizer.write(stream)
  169. if (current._isInFirstContentOfListItem) {
  170. tokenizer._gfmTasklistFirstContentOfListItem = undefined
  171. }
  172. }
  173. // Unravel the next token.
  174. previous = current
  175. current = current.next
  176. }
  177. // Now, loop back through all events (and linked tokens), to figure out which
  178. // parts belong where.
  179. current = token
  180. while (++index < childEvents.length) {
  181. if (
  182. // Find a void token that includes a break.
  183. childEvents[index][0] === 'exit' &&
  184. childEvents[index - 1][0] === 'enter' &&
  185. childEvents[index][1].type === childEvents[index - 1][1].type &&
  186. childEvents[index][1].start.line !== childEvents[index][1].end.line
  187. ) {
  188. assert(current, 'expected a current token')
  189. start = index + 1
  190. breaks.push(start)
  191. // Help GC.
  192. current._tokenizer = undefined
  193. current.previous = undefined
  194. current = current.next
  195. }
  196. }
  197. // Help GC.
  198. tokenizer.events = []
  199. // If there’s one more token (which is the cases for lines that end in an
  200. // EOF), that’s perfect: the last point we found starts it.
  201. // If there isn’t then make sure any remaining content is added to it.
  202. if (current) {
  203. // Help GC.
  204. current._tokenizer = undefined
  205. current.previous = undefined
  206. assert(!current.next, 'expected no next token')
  207. } else {
  208. breaks.pop()
  209. }
  210. // Now splice the events from the subtokenizer into the current events,
  211. // moving back to front so that splice indices aren’t affected.
  212. index = breaks.length
  213. while (index--) {
  214. const slice = childEvents.slice(breaks[index], breaks[index + 1])
  215. const start = startPositions.pop()
  216. assert(start !== undefined, 'expected a start position when splicing')
  217. jumps.unshift([start, start + slice.length - 1])
  218. splice(events, start, 2, slice)
  219. }
  220. index = -1
  221. while (++index < jumps.length) {
  222. gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
  223. adjust += jumps[index][1] - jumps[index][0] - 1
  224. }
  225. return gaps
  226. }