document.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /**
  2. * @typedef {import('micromark-util-types').Construct} Construct
  3. * @typedef {import('micromark-util-types').ContainerState} ContainerState
  4. * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
  5. * @typedef {import('micromark-util-types').Initializer} Initializer
  6. * @typedef {import('micromark-util-types').Point} Point
  7. * @typedef {import('micromark-util-types').State} State
  8. * @typedef {import('micromark-util-types').Token} Token
  9. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  10. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  11. */
  12. /**
  13. * @typedef {[Construct, ContainerState]} StackItem
  14. */
  15. import {factorySpace} from 'micromark-factory-space'
  16. import {markdownLineEnding} from 'micromark-util-character'
  17. import {splice} from 'micromark-util-chunked'
  18. import {codes, constants, types} from 'micromark-util-symbol'
  19. import {ok as assert} from 'devlop'
  20. /** @type {InitialConstruct} */
  21. export const document = {tokenize: initializeDocument}
  22. /** @type {Construct} */
  23. const containerConstruct = {tokenize: tokenizeContainer}
  24. /**
  25. * @this {TokenizeContext}
  26. * @type {Initializer}
  27. */
  28. function initializeDocument(effects) {
  29. const self = this
  30. /** @type {Array<StackItem>} */
  31. const stack = []
  32. let continued = 0
  33. /** @type {TokenizeContext | undefined} */
  34. let childFlow
  35. /** @type {Token | undefined} */
  36. let childToken
  37. /** @type {number} */
  38. let lineStartOffset
  39. return start
  40. /** @type {State} */
  41. function start(code) {
  42. // First we iterate through the open blocks, starting with the root
  43. // document, and descending through last children down to the last open
  44. // block.
  45. // Each block imposes a condition that the line must satisfy if the block is
  46. // to remain open.
  47. // For example, a block quote requires a `>` character.
  48. // A paragraph requires a non-blank line.
  49. // In this phase we may match all or just some of the open blocks.
  50. // But we cannot close unmatched blocks yet, because we may have a lazy
  51. // continuation line.
  52. if (continued < stack.length) {
  53. const item = stack[continued]
  54. self.containerState = item[1]
  55. assert(
  56. item[0].continuation,
  57. 'expected `continuation` to be defined on container construct'
  58. )
  59. return effects.attempt(
  60. item[0].continuation,
  61. documentContinue,
  62. checkNewContainers
  63. )(code)
  64. }
  65. // Done.
  66. return checkNewContainers(code)
  67. }
  68. /** @type {State} */
  69. function documentContinue(code) {
  70. assert(
  71. self.containerState,
  72. 'expected `containerState` to be defined after continuation'
  73. )
  74. continued++
  75. // Note: this field is called `_closeFlow` but it also closes containers.
  76. // Perhaps a good idea to rename it but it’s already used in the wild by
  77. // extensions.
  78. if (self.containerState._closeFlow) {
  79. self.containerState._closeFlow = undefined
  80. if (childFlow) {
  81. closeFlow()
  82. }
  83. // Note: this algorithm for moving events around is similar to the
  84. // algorithm when dealing with lazy lines in `writeToChild`.
  85. const indexBeforeExits = self.events.length
  86. let indexBeforeFlow = indexBeforeExits
  87. /** @type {Point | undefined} */
  88. let point
  89. // Find the flow chunk.
  90. while (indexBeforeFlow--) {
  91. if (
  92. self.events[indexBeforeFlow][0] === 'exit' &&
  93. self.events[indexBeforeFlow][1].type === types.chunkFlow
  94. ) {
  95. point = self.events[indexBeforeFlow][1].end
  96. break
  97. }
  98. }
  99. assert(point, 'could not find previous flow chunk')
  100. exitContainers(continued)
  101. // Fix positions.
  102. let index = indexBeforeExits
  103. while (index < self.events.length) {
  104. self.events[index][1].end = Object.assign({}, point)
  105. index++
  106. }
  107. // Inject the exits earlier (they’re still also at the end).
  108. splice(
  109. self.events,
  110. indexBeforeFlow + 1,
  111. 0,
  112. self.events.slice(indexBeforeExits)
  113. )
  114. // Discard the duplicate exits.
  115. self.events.length = index
  116. return checkNewContainers(code)
  117. }
  118. return start(code)
  119. }
  120. /** @type {State} */
  121. function checkNewContainers(code) {
  122. // Next, after consuming the continuation markers for existing blocks, we
  123. // look for new block starts (e.g. `>` for a block quote).
  124. // If we encounter a new block start, we close any blocks unmatched in
  125. // step 1 before creating the new block as a child of the last matched
  126. // block.
  127. if (continued === stack.length) {
  128. // No need to `check` whether there’s a container, of `exitContainers`
  129. // would be moot.
  130. // We can instead immediately `attempt` to parse one.
  131. if (!childFlow) {
  132. return documentContinued(code)
  133. }
  134. // If we have concrete content, such as block HTML or fenced code,
  135. // we can’t have containers “pierce” into them, so we can immediately
  136. // start.
  137. if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
  138. return flowStart(code)
  139. }
  140. // If we do have flow, it could still be a blank line,
  141. // but we’d be interrupting it w/ a new container if there’s a current
  142. // construct.
  143. // To do: next major: remove `_gfmTableDynamicInterruptHack` (no longer
  144. // needed in micromark-extension-gfm-table@1.0.6).
  145. self.interrupt = Boolean(
  146. childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
  147. )
  148. }
  149. // Check if there is a new container.
  150. self.containerState = {}
  151. return effects.check(
  152. containerConstruct,
  153. thereIsANewContainer,
  154. thereIsNoNewContainer
  155. )(code)
  156. }
  157. /** @type {State} */
  158. function thereIsANewContainer(code) {
  159. if (childFlow) closeFlow()
  160. exitContainers(continued)
  161. return documentContinued(code)
  162. }
  163. /** @type {State} */
  164. function thereIsNoNewContainer(code) {
  165. self.parser.lazy[self.now().line] = continued !== stack.length
  166. lineStartOffset = self.now().offset
  167. return flowStart(code)
  168. }
  169. /** @type {State} */
  170. function documentContinued(code) {
  171. // Try new containers.
  172. self.containerState = {}
  173. return effects.attempt(
  174. containerConstruct,
  175. containerContinue,
  176. flowStart
  177. )(code)
  178. }
  179. /** @type {State} */
  180. function containerContinue(code) {
  181. assert(
  182. self.currentConstruct,
  183. 'expected `currentConstruct` to be defined on tokenizer'
  184. )
  185. assert(
  186. self.containerState,
  187. 'expected `containerState` to be defined on tokenizer'
  188. )
  189. continued++
  190. stack.push([self.currentConstruct, self.containerState])
  191. // Try another.
  192. return documentContinued(code)
  193. }
  194. /** @type {State} */
  195. function flowStart(code) {
  196. if (code === codes.eof) {
  197. if (childFlow) closeFlow()
  198. exitContainers(0)
  199. effects.consume(code)
  200. return
  201. }
  202. childFlow = childFlow || self.parser.flow(self.now())
  203. effects.enter(types.chunkFlow, {
  204. contentType: constants.contentTypeFlow,
  205. previous: childToken,
  206. _tokenizer: childFlow
  207. })
  208. return flowContinue(code)
  209. }
  210. /** @type {State} */
  211. function flowContinue(code) {
  212. if (code === codes.eof) {
  213. writeToChild(effects.exit(types.chunkFlow), true)
  214. exitContainers(0)
  215. effects.consume(code)
  216. return
  217. }
  218. if (markdownLineEnding(code)) {
  219. effects.consume(code)
  220. writeToChild(effects.exit(types.chunkFlow))
  221. // Get ready for the next line.
  222. continued = 0
  223. self.interrupt = undefined
  224. return start
  225. }
  226. effects.consume(code)
  227. return flowContinue
  228. }
  229. /**
  230. * @param {Token} token
  231. * @param {boolean | undefined} [eof]
  232. * @returns {undefined}
  233. */
  234. function writeToChild(token, eof) {
  235. assert(childFlow, 'expected `childFlow` to be defined when continuing')
  236. const stream = self.sliceStream(token)
  237. if (eof) stream.push(null)
  238. token.previous = childToken
  239. if (childToken) childToken.next = token
  240. childToken = token
  241. childFlow.defineSkip(token.start)
  242. childFlow.write(stream)
  243. // Alright, so we just added a lazy line:
  244. //
  245. // ```markdown
  246. // > a
  247. // b.
  248. //
  249. // Or:
  250. //
  251. // > ~~~c
  252. // d
  253. //
  254. // Or:
  255. //
  256. // > | e |
  257. // f
  258. // ```
  259. //
  260. // The construct in the second example (fenced code) does not accept lazy
  261. // lines, so it marked itself as done at the end of its first line, and
  262. // then the content construct parses `d`.
  263. // Most constructs in markdown match on the first line: if the first line
  264. // forms a construct, a non-lazy line can’t “unmake” it.
  265. //
  266. // The construct in the third example is potentially a GFM table, and
  267. // those are *weird*.
  268. // It *could* be a table, from the first line, if the following line
  269. // matches a condition.
  270. // In this case, that second line is lazy, which “unmakes” the first line
  271. // and turns the whole into one content block.
  272. //
  273. // We’ve now parsed the non-lazy and the lazy line, and can figure out
  274. // whether the lazy line started a new flow block.
  275. // If it did, we exit the current containers between the two flow blocks.
  276. if (self.parser.lazy[token.start.line]) {
  277. let index = childFlow.events.length
  278. while (index--) {
  279. if (
  280. // The token starts before the line ending…
  281. childFlow.events[index][1].start.offset < lineStartOffset &&
  282. // …and either is not ended yet…
  283. (!childFlow.events[index][1].end ||
  284. // …or ends after it.
  285. childFlow.events[index][1].end.offset > lineStartOffset)
  286. ) {
  287. // Exit: there’s still something open, which means it’s a lazy line
  288. // part of something.
  289. return
  290. }
  291. }
  292. // Note: this algorithm for moving events around is similar to the
  293. // algorithm when closing flow in `documentContinue`.
  294. const indexBeforeExits = self.events.length
  295. let indexBeforeFlow = indexBeforeExits
  296. /** @type {boolean | undefined} */
  297. let seen
  298. /** @type {Point | undefined} */
  299. let point
  300. // Find the previous chunk (the one before the lazy line).
  301. while (indexBeforeFlow--) {
  302. if (
  303. self.events[indexBeforeFlow][0] === 'exit' &&
  304. self.events[indexBeforeFlow][1].type === types.chunkFlow
  305. ) {
  306. if (seen) {
  307. point = self.events[indexBeforeFlow][1].end
  308. break
  309. }
  310. seen = true
  311. }
  312. }
  313. assert(point, 'could not find previous flow chunk')
  314. exitContainers(continued)
  315. // Fix positions.
  316. index = indexBeforeExits
  317. while (index < self.events.length) {
  318. self.events[index][1].end = Object.assign({}, point)
  319. index++
  320. }
  321. // Inject the exits earlier (they’re still also at the end).
  322. splice(
  323. self.events,
  324. indexBeforeFlow + 1,
  325. 0,
  326. self.events.slice(indexBeforeExits)
  327. )
  328. // Discard the duplicate exits.
  329. self.events.length = index
  330. }
  331. }
  332. /**
  333. * @param {number} size
  334. * @returns {undefined}
  335. */
  336. function exitContainers(size) {
  337. let index = stack.length
  338. // Exit open containers.
  339. while (index-- > size) {
  340. const entry = stack[index]
  341. self.containerState = entry[1]
  342. assert(
  343. entry[0].exit,
  344. 'expected `exit` to be defined on container construct'
  345. )
  346. entry[0].exit.call(self, effects)
  347. }
  348. stack.length = size
  349. }
  350. function closeFlow() {
  351. assert(
  352. self.containerState,
  353. 'expected `containerState` to be defined when closing flow'
  354. )
  355. assert(childFlow, 'expected `childFlow` to be defined when closing it')
  356. childFlow.write([codes.eof])
  357. childToken = undefined
  358. childFlow = undefined
  359. self.containerState._closeFlow = undefined
  360. }
  361. }
  362. /**
  363. * @this {TokenizeContext}
  364. * @type {Tokenizer}
  365. */
  366. function tokenizeContainer(effects, ok, nok) {
  367. // Always populated by defaults.
  368. assert(
  369. this.parser.constructs.disable.null,
  370. 'expected `disable.null` to be populated'
  371. )
  372. return factorySpace(
  373. effects,
  374. effects.attempt(this.parser.constructs.document, ok, nok),
  375. types.linePrefix,
  376. this.parser.constructs.disable.null.includes('codeIndented')
  377. ? undefined
  378. : constants.tabSize
  379. )
  380. }