index.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /**
  2. * @typedef {import('micromark-util-types').Chunk} Chunk
  3. * @typedef {import('micromark-util-types').Event} Event
  4. * @typedef {import('micromark-util-types').Token} Token
  5. */
  6. import {splice} from 'micromark-util-chunked'
  7. /**
  8. * Tokenize subcontent.
  9. *
  10. * @param {Array<Event>} events
  11. * List of events.
  12. * @returns {boolean}
  13. * Whether subtokens were found.
  14. */ // eslint-disable-next-line complexity
  15. export function subtokenize(events) {
  16. /** @type {Record<string, number>} */
  17. const jumps = {}
  18. let index = -1
  19. /** @type {Event} */
  20. let event
  21. /** @type {number | undefined} */
  22. let lineIndex
  23. /** @type {number} */
  24. let otherIndex
  25. /** @type {Event} */
  26. let otherEvent
  27. /** @type {Array<Event>} */
  28. let parameters
  29. /** @type {Array<Event>} */
  30. let subevents
  31. /** @type {boolean | undefined} */
  32. let more
  33. while (++index < events.length) {
  34. while (index in jumps) {
  35. index = jumps[index]
  36. }
  37. event = events[index]
  38. // Add a hook for the GFM tasklist extension, which needs to know if text
  39. // is in the first content of a list item.
  40. if (
  41. index &&
  42. event[1].type === 'chunkFlow' &&
  43. events[index - 1][1].type === 'listItemPrefix'
  44. ) {
  45. subevents = event[1]._tokenizer.events
  46. otherIndex = 0
  47. if (
  48. otherIndex < subevents.length &&
  49. subevents[otherIndex][1].type === 'lineEndingBlank'
  50. ) {
  51. otherIndex += 2
  52. }
  53. if (
  54. otherIndex < subevents.length &&
  55. subevents[otherIndex][1].type === 'content'
  56. ) {
  57. while (++otherIndex < subevents.length) {
  58. if (subevents[otherIndex][1].type === 'content') {
  59. break
  60. }
  61. if (subevents[otherIndex][1].type === 'chunkText') {
  62. subevents[otherIndex][1]._isInFirstContentOfListItem = true
  63. otherIndex++
  64. }
  65. }
  66. }
  67. }
  68. // Enter.
  69. if (event[0] === 'enter') {
  70. if (event[1].contentType) {
  71. Object.assign(jumps, subcontent(events, index))
  72. index = jumps[index]
  73. more = true
  74. }
  75. }
  76. // Exit.
  77. else if (event[1]._container) {
  78. otherIndex = index
  79. lineIndex = undefined
  80. while (otherIndex--) {
  81. otherEvent = events[otherIndex]
  82. if (
  83. otherEvent[1].type === 'lineEnding' ||
  84. otherEvent[1].type === 'lineEndingBlank'
  85. ) {
  86. if (otherEvent[0] === 'enter') {
  87. if (lineIndex) {
  88. events[lineIndex][1].type = 'lineEndingBlank'
  89. }
  90. otherEvent[1].type = 'lineEnding'
  91. lineIndex = otherIndex
  92. }
  93. } else {
  94. break
  95. }
  96. }
  97. if (lineIndex) {
  98. // Fix position.
  99. event[1].end = Object.assign({}, events[lineIndex][1].start)
  100. // Switch container exit w/ line endings.
  101. parameters = events.slice(lineIndex, index)
  102. parameters.unshift(event)
  103. splice(events, lineIndex, index - lineIndex + 1, parameters)
  104. }
  105. }
  106. }
  107. return !more
  108. }
  109. /**
  110. * Tokenize embedded tokens.
  111. *
  112. * @param {Array<Event>} events
  113. * @param {number} eventIndex
  114. * @returns {Record<string, number>}
  115. */
  116. function subcontent(events, eventIndex) {
  117. const token = events[eventIndex][1]
  118. const context = events[eventIndex][2]
  119. let startPosition = eventIndex - 1
  120. /** @type {Array<number>} */
  121. const startPositions = []
  122. const tokenizer =
  123. token._tokenizer || context.parser[token.contentType](token.start)
  124. const childEvents = tokenizer.events
  125. /** @type {Array<[number, number]>} */
  126. const jumps = []
  127. /** @type {Record<string, number>} */
  128. const gaps = {}
  129. /** @type {Array<Chunk>} */
  130. let stream
  131. /** @type {Token | undefined} */
  132. let previous
  133. let index = -1
  134. /** @type {Token | undefined} */
  135. let current = token
  136. let adjust = 0
  137. let start = 0
  138. const breaks = [start]
  139. // Loop forward through the linked tokens to pass them in order to the
  140. // subtokenizer.
  141. while (current) {
  142. // Find the position of the event for this token.
  143. while (events[++startPosition][1] !== current) {
  144. // Empty.
  145. }
  146. startPositions.push(startPosition)
  147. if (!current._tokenizer) {
  148. stream = context.sliceStream(current)
  149. if (!current.next) {
  150. stream.push(null)
  151. }
  152. if (previous) {
  153. tokenizer.defineSkip(current.start)
  154. }
  155. if (current._isInFirstContentOfListItem) {
  156. tokenizer._gfmTasklistFirstContentOfListItem = true
  157. }
  158. tokenizer.write(stream)
  159. if (current._isInFirstContentOfListItem) {
  160. tokenizer._gfmTasklistFirstContentOfListItem = undefined
  161. }
  162. }
  163. // Unravel the next token.
  164. previous = current
  165. current = current.next
  166. }
  167. // Now, loop back through all events (and linked tokens), to figure out which
  168. // parts belong where.
  169. current = token
  170. while (++index < childEvents.length) {
  171. if (
  172. // Find a void token that includes a break.
  173. childEvents[index][0] === 'exit' &&
  174. childEvents[index - 1][0] === 'enter' &&
  175. childEvents[index][1].type === childEvents[index - 1][1].type &&
  176. childEvents[index][1].start.line !== childEvents[index][1].end.line
  177. ) {
  178. start = index + 1
  179. breaks.push(start)
  180. // Help GC.
  181. current._tokenizer = undefined
  182. current.previous = undefined
  183. current = current.next
  184. }
  185. }
  186. // Help GC.
  187. tokenizer.events = []
  188. // If there’s one more token (which is the cases for lines that end in an
  189. // EOF), that’s perfect: the last point we found starts it.
  190. // If there isn’t then make sure any remaining content is added to it.
  191. if (current) {
  192. // Help GC.
  193. current._tokenizer = undefined
  194. current.previous = undefined
  195. } else {
  196. breaks.pop()
  197. }
  198. // Now splice the events from the subtokenizer into the current events,
  199. // moving back to front so that splice indices aren’t affected.
  200. index = breaks.length
  201. while (index--) {
  202. const slice = childEvents.slice(breaks[index], breaks[index + 1])
  203. const start = startPositions.pop()
  204. jumps.unshift([start, start + slice.length - 1])
  205. splice(events, start, 2, slice)
  206. }
  207. index = -1
  208. while (++index < jumps.length) {
  209. gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
  210. adjust += jumps[index][1] - jumps[index][0] - 1
  211. }
  212. return gaps
  213. }