document.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /**
  2. * @typedef {import('micromark-util-types').Construct} Construct
  3. * @typedef {import('micromark-util-types').ContainerState} ContainerState
  4. * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
  5. * @typedef {import('micromark-util-types').Initializer} Initializer
  6. * @typedef {import('micromark-util-types').Point} Point
  7. * @typedef {import('micromark-util-types').State} State
  8. * @typedef {import('micromark-util-types').Token} Token
  9. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  10. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  11. */
  12. /**
  13. * @typedef {[Construct, ContainerState]} StackItem
  14. */
  15. import {factorySpace} from 'micromark-factory-space'
  16. import {markdownLineEnding} from 'micromark-util-character'
  17. import {splice} from 'micromark-util-chunked'
  18. /** @type {InitialConstruct} */
  19. export const document = {
  20. tokenize: initializeDocument
  21. }
  22. /** @type {Construct} */
  23. const containerConstruct = {
  24. tokenize: tokenizeContainer
  25. }
  26. /**
  27. * @this {TokenizeContext}
  28. * @type {Initializer}
  29. */
  30. function initializeDocument(effects) {
  31. const self = this
  32. /** @type {Array<StackItem>} */
  33. const stack = []
  34. let continued = 0
  35. /** @type {TokenizeContext | undefined} */
  36. let childFlow
  37. /** @type {Token | undefined} */
  38. let childToken
  39. /** @type {number} */
  40. let lineStartOffset
  41. return start
  42. /** @type {State} */
  43. function start(code) {
  44. // First we iterate through the open blocks, starting with the root
  45. // document, and descending through last children down to the last open
  46. // block.
  47. // Each block imposes a condition that the line must satisfy if the block is
  48. // to remain open.
  49. // For example, a block quote requires a `>` character.
  50. // A paragraph requires a non-blank line.
  51. // In this phase we may match all or just some of the open blocks.
  52. // But we cannot close unmatched blocks yet, because we may have a lazy
  53. // continuation line.
  54. if (continued < stack.length) {
  55. const item = stack[continued]
  56. self.containerState = item[1]
  57. return effects.attempt(
  58. item[0].continuation,
  59. documentContinue,
  60. checkNewContainers
  61. )(code)
  62. }
  63. // Done.
  64. return checkNewContainers(code)
  65. }
  66. /** @type {State} */
  67. function documentContinue(code) {
  68. continued++
  69. // Note: this field is called `_closeFlow` but it also closes containers.
  70. // Perhaps a good idea to rename it but it’s already used in the wild by
  71. // extensions.
  72. if (self.containerState._closeFlow) {
  73. self.containerState._closeFlow = undefined
  74. if (childFlow) {
  75. closeFlow()
  76. }
  77. // Note: this algorithm for moving events around is similar to the
  78. // algorithm when dealing with lazy lines in `writeToChild`.
  79. const indexBeforeExits = self.events.length
  80. let indexBeforeFlow = indexBeforeExits
  81. /** @type {Point | undefined} */
  82. let point
  83. // Find the flow chunk.
  84. while (indexBeforeFlow--) {
  85. if (
  86. self.events[indexBeforeFlow][0] === 'exit' &&
  87. self.events[indexBeforeFlow][1].type === 'chunkFlow'
  88. ) {
  89. point = self.events[indexBeforeFlow][1].end
  90. break
  91. }
  92. }
  93. exitContainers(continued)
  94. // Fix positions.
  95. let index = indexBeforeExits
  96. while (index < self.events.length) {
  97. self.events[index][1].end = Object.assign({}, point)
  98. index++
  99. }
  100. // Inject the exits earlier (they’re still also at the end).
  101. splice(
  102. self.events,
  103. indexBeforeFlow + 1,
  104. 0,
  105. self.events.slice(indexBeforeExits)
  106. )
  107. // Discard the duplicate exits.
  108. self.events.length = index
  109. return checkNewContainers(code)
  110. }
  111. return start(code)
  112. }
  113. /** @type {State} */
  114. function checkNewContainers(code) {
  115. // Next, after consuming the continuation markers for existing blocks, we
  116. // look for new block starts (e.g. `>` for a block quote).
  117. // If we encounter a new block start, we close any blocks unmatched in
  118. // step 1 before creating the new block as a child of the last matched
  119. // block.
  120. if (continued === stack.length) {
  121. // No need to `check` whether there’s a container, of `exitContainers`
  122. // would be moot.
  123. // We can instead immediately `attempt` to parse one.
  124. if (!childFlow) {
  125. return documentContinued(code)
  126. }
  127. // If we have concrete content, such as block HTML or fenced code,
  128. // we can’t have containers “pierce” into them, so we can immediately
  129. // start.
  130. if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
  131. return flowStart(code)
  132. }
  133. // If we do have flow, it could still be a blank line,
  134. // but we’d be interrupting it w/ a new container if there’s a current
  135. // construct.
  136. // To do: next major: remove `_gfmTableDynamicInterruptHack` (no longer
  137. // needed in micromark-extension-gfm-table@1.0.6).
  138. self.interrupt = Boolean(
  139. childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
  140. )
  141. }
  142. // Check if there is a new container.
  143. self.containerState = {}
  144. return effects.check(
  145. containerConstruct,
  146. thereIsANewContainer,
  147. thereIsNoNewContainer
  148. )(code)
  149. }
  150. /** @type {State} */
  151. function thereIsANewContainer(code) {
  152. if (childFlow) closeFlow()
  153. exitContainers(continued)
  154. return documentContinued(code)
  155. }
  156. /** @type {State} */
  157. function thereIsNoNewContainer(code) {
  158. self.parser.lazy[self.now().line] = continued !== stack.length
  159. lineStartOffset = self.now().offset
  160. return flowStart(code)
  161. }
  162. /** @type {State} */
  163. function documentContinued(code) {
  164. // Try new containers.
  165. self.containerState = {}
  166. return effects.attempt(
  167. containerConstruct,
  168. containerContinue,
  169. flowStart
  170. )(code)
  171. }
  172. /** @type {State} */
  173. function containerContinue(code) {
  174. continued++
  175. stack.push([self.currentConstruct, self.containerState])
  176. // Try another.
  177. return documentContinued(code)
  178. }
  179. /** @type {State} */
  180. function flowStart(code) {
  181. if (code === null) {
  182. if (childFlow) closeFlow()
  183. exitContainers(0)
  184. effects.consume(code)
  185. return
  186. }
  187. childFlow = childFlow || self.parser.flow(self.now())
  188. effects.enter('chunkFlow', {
  189. contentType: 'flow',
  190. previous: childToken,
  191. _tokenizer: childFlow
  192. })
  193. return flowContinue(code)
  194. }
  195. /** @type {State} */
  196. function flowContinue(code) {
  197. if (code === null) {
  198. writeToChild(effects.exit('chunkFlow'), true)
  199. exitContainers(0)
  200. effects.consume(code)
  201. return
  202. }
  203. if (markdownLineEnding(code)) {
  204. effects.consume(code)
  205. writeToChild(effects.exit('chunkFlow'))
  206. // Get ready for the next line.
  207. continued = 0
  208. self.interrupt = undefined
  209. return start
  210. }
  211. effects.consume(code)
  212. return flowContinue
  213. }
  214. /**
  215. * @param {Token} token
  216. * @param {boolean | undefined} [eof]
  217. * @returns {undefined}
  218. */
  219. function writeToChild(token, eof) {
  220. const stream = self.sliceStream(token)
  221. if (eof) stream.push(null)
  222. token.previous = childToken
  223. if (childToken) childToken.next = token
  224. childToken = token
  225. childFlow.defineSkip(token.start)
  226. childFlow.write(stream)
  227. // Alright, so we just added a lazy line:
  228. //
  229. // ```markdown
  230. // > a
  231. // b.
  232. //
  233. // Or:
  234. //
  235. // > ~~~c
  236. // d
  237. //
  238. // Or:
  239. //
  240. // > | e |
  241. // f
  242. // ```
  243. //
  244. // The construct in the second example (fenced code) does not accept lazy
  245. // lines, so it marked itself as done at the end of its first line, and
  246. // then the content construct parses `d`.
  247. // Most constructs in markdown match on the first line: if the first line
  248. // forms a construct, a non-lazy line can’t “unmake” it.
  249. //
  250. // The construct in the third example is potentially a GFM table, and
  251. // those are *weird*.
  252. // It *could* be a table, from the first line, if the following line
  253. // matches a condition.
  254. // In this case, that second line is lazy, which “unmakes” the first line
  255. // and turns the whole into one content block.
  256. //
  257. // We’ve now parsed the non-lazy and the lazy line, and can figure out
  258. // whether the lazy line started a new flow block.
  259. // If it did, we exit the current containers between the two flow blocks.
  260. if (self.parser.lazy[token.start.line]) {
  261. let index = childFlow.events.length
  262. while (index--) {
  263. if (
  264. // The token starts before the line ending…
  265. childFlow.events[index][1].start.offset < lineStartOffset &&
  266. // …and either is not ended yet…
  267. (!childFlow.events[index][1].end ||
  268. // …or ends after it.
  269. childFlow.events[index][1].end.offset > lineStartOffset)
  270. ) {
  271. // Exit: there’s still something open, which means it’s a lazy line
  272. // part of something.
  273. return
  274. }
  275. }
  276. // Note: this algorithm for moving events around is similar to the
  277. // algorithm when closing flow in `documentContinue`.
  278. const indexBeforeExits = self.events.length
  279. let indexBeforeFlow = indexBeforeExits
  280. /** @type {boolean | undefined} */
  281. let seen
  282. /** @type {Point | undefined} */
  283. let point
  284. // Find the previous chunk (the one before the lazy line).
  285. while (indexBeforeFlow--) {
  286. if (
  287. self.events[indexBeforeFlow][0] === 'exit' &&
  288. self.events[indexBeforeFlow][1].type === 'chunkFlow'
  289. ) {
  290. if (seen) {
  291. point = self.events[indexBeforeFlow][1].end
  292. break
  293. }
  294. seen = true
  295. }
  296. }
  297. exitContainers(continued)
  298. // Fix positions.
  299. index = indexBeforeExits
  300. while (index < self.events.length) {
  301. self.events[index][1].end = Object.assign({}, point)
  302. index++
  303. }
  304. // Inject the exits earlier (they’re still also at the end).
  305. splice(
  306. self.events,
  307. indexBeforeFlow + 1,
  308. 0,
  309. self.events.slice(indexBeforeExits)
  310. )
  311. // Discard the duplicate exits.
  312. self.events.length = index
  313. }
  314. }
  315. /**
  316. * @param {number} size
  317. * @returns {undefined}
  318. */
  319. function exitContainers(size) {
  320. let index = stack.length
  321. // Exit open containers.
  322. while (index-- > size) {
  323. const entry = stack[index]
  324. self.containerState = entry[1]
  325. entry[0].exit.call(self, effects)
  326. }
  327. stack.length = size
  328. }
  329. function closeFlow() {
  330. childFlow.write([null])
  331. childToken = undefined
  332. childFlow = undefined
  333. self.containerState._closeFlow = undefined
  334. }
  335. }
  336. /**
  337. * @this {TokenizeContext}
  338. * @type {Tokenizer}
  339. */
  340. function tokenizeContainer(effects, ok, nok) {
  341. // Always populated by defaults.
  342. return factorySpace(
  343. effects,
  344. effects.attempt(this.parser.constructs.document, ok, nok),
  345. 'linePrefix',
  346. this.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4
  347. )
  348. }