123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433 |
- /**
- * @typedef {import('micromark-util-types').Construct} Construct
- * @typedef {import('micromark-util-types').ContainerState} ContainerState
- * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
- * @typedef {import('micromark-util-types').Initializer} Initializer
- * @typedef {import('micromark-util-types').Point} Point
- * @typedef {import('micromark-util-types').State} State
- * @typedef {import('micromark-util-types').Token} Token
- * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
- * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
- */
- /**
- * @typedef {[Construct, ContainerState]} StackItem
- */
- import {factorySpace} from 'micromark-factory-space'
- import {markdownLineEnding} from 'micromark-util-character'
- import {splice} from 'micromark-util-chunked'
- import {codes, constants, types} from 'micromark-util-symbol'
- import {ok as assert} from 'devlop'
- /** @type {InitialConstruct} */
- export const document = {tokenize: initializeDocument}
- /** @type {Construct} */
- const containerConstruct = {tokenize: tokenizeContainer}
- /**
- * @this {TokenizeContext}
- * @type {Initializer}
- */
- function initializeDocument(effects) {
- const self = this
- /** @type {Array<StackItem>} */
- const stack = []
- let continued = 0
- /** @type {TokenizeContext | undefined} */
- let childFlow
- /** @type {Token | undefined} */
- let childToken
- /** @type {number} */
- let lineStartOffset
- return start
- /** @type {State} */
- function start(code) {
- // First we iterate through the open blocks, starting with the root
- // document, and descending through last children down to the last open
- // block.
- // Each block imposes a condition that the line must satisfy if the block is
- // to remain open.
- // For example, a block quote requires a `>` character.
- // A paragraph requires a non-blank line.
- // In this phase we may match all or just some of the open blocks.
- // But we cannot close unmatched blocks yet, because we may have a lazy
- // continuation line.
- if (continued < stack.length) {
- const item = stack[continued]
- self.containerState = item[1]
- assert(
- item[0].continuation,
- 'expected `continuation` to be defined on container construct'
- )
- return effects.attempt(
- item[0].continuation,
- documentContinue,
- checkNewContainers
- )(code)
- }
- // Done.
- return checkNewContainers(code)
- }
- /** @type {State} */
- function documentContinue(code) {
- assert(
- self.containerState,
- 'expected `containerState` to be defined after continuation'
- )
- continued++
- // Note: this field is called `_closeFlow` but it also closes containers.
- // Perhaps a good idea to rename it but it’s already used in the wild by
- // extensions.
- if (self.containerState._closeFlow) {
- self.containerState._closeFlow = undefined
- if (childFlow) {
- closeFlow()
- }
- // Note: this algorithm for moving events around is similar to the
- // algorithm when dealing with lazy lines in `writeToChild`.
- const indexBeforeExits = self.events.length
- let indexBeforeFlow = indexBeforeExits
- /** @type {Point | undefined} */
- let point
- // Find the flow chunk.
- while (indexBeforeFlow--) {
- if (
- self.events[indexBeforeFlow][0] === 'exit' &&
- self.events[indexBeforeFlow][1].type === types.chunkFlow
- ) {
- point = self.events[indexBeforeFlow][1].end
- break
- }
- }
- assert(point, 'could not find previous flow chunk')
- exitContainers(continued)
- // Fix positions.
- let index = indexBeforeExits
- while (index < self.events.length) {
- self.events[index][1].end = Object.assign({}, point)
- index++
- }
- // Inject the exits earlier (they’re still also at the end).
- splice(
- self.events,
- indexBeforeFlow + 1,
- 0,
- self.events.slice(indexBeforeExits)
- )
- // Discard the duplicate exits.
- self.events.length = index
- return checkNewContainers(code)
- }
- return start(code)
- }
- /** @type {State} */
- function checkNewContainers(code) {
- // Next, after consuming the continuation markers for existing blocks, we
- // look for new block starts (e.g. `>` for a block quote).
- // If we encounter a new block start, we close any blocks unmatched in
- // step 1 before creating the new block as a child of the last matched
- // block.
- if (continued === stack.length) {
- // No need to `check` whether there’s a container, of `exitContainers`
- // would be moot.
- // We can instead immediately `attempt` to parse one.
- if (!childFlow) {
- return documentContinued(code)
- }
- // If we have concrete content, such as block HTML or fenced code,
- // we can’t have containers “pierce” into them, so we can immediately
- // start.
- if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
- return flowStart(code)
- }
- // If we do have flow, it could still be a blank line,
- // but we’d be interrupting it w/ a new container if there’s a current
- // construct.
- // To do: next major: remove `_gfmTableDynamicInterruptHack` (no longer
- // needed in micromark-extension-gfm-table@1.0.6).
- self.interrupt = Boolean(
- childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
- )
- }
- // Check if there is a new container.
- self.containerState = {}
- return effects.check(
- containerConstruct,
- thereIsANewContainer,
- thereIsNoNewContainer
- )(code)
- }
- /** @type {State} */
- function thereIsANewContainer(code) {
- if (childFlow) closeFlow()
- exitContainers(continued)
- return documentContinued(code)
- }
- /** @type {State} */
- function thereIsNoNewContainer(code) {
- self.parser.lazy[self.now().line] = continued !== stack.length
- lineStartOffset = self.now().offset
- return flowStart(code)
- }
- /** @type {State} */
- function documentContinued(code) {
- // Try new containers.
- self.containerState = {}
- return effects.attempt(
- containerConstruct,
- containerContinue,
- flowStart
- )(code)
- }
- /** @type {State} */
- function containerContinue(code) {
- assert(
- self.currentConstruct,
- 'expected `currentConstruct` to be defined on tokenizer'
- )
- assert(
- self.containerState,
- 'expected `containerState` to be defined on tokenizer'
- )
- continued++
- stack.push([self.currentConstruct, self.containerState])
- // Try another.
- return documentContinued(code)
- }
- /** @type {State} */
- function flowStart(code) {
- if (code === codes.eof) {
- if (childFlow) closeFlow()
- exitContainers(0)
- effects.consume(code)
- return
- }
- childFlow = childFlow || self.parser.flow(self.now())
- effects.enter(types.chunkFlow, {
- contentType: constants.contentTypeFlow,
- previous: childToken,
- _tokenizer: childFlow
- })
- return flowContinue(code)
- }
- /** @type {State} */
- function flowContinue(code) {
- if (code === codes.eof) {
- writeToChild(effects.exit(types.chunkFlow), true)
- exitContainers(0)
- effects.consume(code)
- return
- }
- if (markdownLineEnding(code)) {
- effects.consume(code)
- writeToChild(effects.exit(types.chunkFlow))
- // Get ready for the next line.
- continued = 0
- self.interrupt = undefined
- return start
- }
- effects.consume(code)
- return flowContinue
- }
- /**
- * @param {Token} token
- * @param {boolean | undefined} [eof]
- * @returns {undefined}
- */
- function writeToChild(token, eof) {
- assert(childFlow, 'expected `childFlow` to be defined when continuing')
- const stream = self.sliceStream(token)
- if (eof) stream.push(null)
- token.previous = childToken
- if (childToken) childToken.next = token
- childToken = token
- childFlow.defineSkip(token.start)
- childFlow.write(stream)
- // Alright, so we just added a lazy line:
- //
- // ```markdown
- // > a
- // b.
- //
- // Or:
- //
- // > ~~~c
- // d
- //
- // Or:
- //
- // > | e |
- // f
- // ```
- //
- // The construct in the second example (fenced code) does not accept lazy
- // lines, so it marked itself as done at the end of its first line, and
- // then the content construct parses `d`.
- // Most constructs in markdown match on the first line: if the first line
- // forms a construct, a non-lazy line can’t “unmake” it.
- //
- // The construct in the third example is potentially a GFM table, and
- // those are *weird*.
- // It *could* be a table, from the first line, if the following line
- // matches a condition.
- // In this case, that second line is lazy, which “unmakes” the first line
- // and turns the whole into one content block.
- //
- // We’ve now parsed the non-lazy and the lazy line, and can figure out
- // whether the lazy line started a new flow block.
- // If it did, we exit the current containers between the two flow blocks.
- if (self.parser.lazy[token.start.line]) {
- let index = childFlow.events.length
- while (index--) {
- if (
- // The token starts before the line ending…
- childFlow.events[index][1].start.offset < lineStartOffset &&
- // …and either is not ended yet…
- (!childFlow.events[index][1].end ||
- // …or ends after it.
- childFlow.events[index][1].end.offset > lineStartOffset)
- ) {
- // Exit: there’s still something open, which means it’s a lazy line
- // part of something.
- return
- }
- }
- // Note: this algorithm for moving events around is similar to the
- // algorithm when closing flow in `documentContinue`.
- const indexBeforeExits = self.events.length
- let indexBeforeFlow = indexBeforeExits
- /** @type {boolean | undefined} */
- let seen
- /** @type {Point | undefined} */
- let point
- // Find the previous chunk (the one before the lazy line).
- while (indexBeforeFlow--) {
- if (
- self.events[indexBeforeFlow][0] === 'exit' &&
- self.events[indexBeforeFlow][1].type === types.chunkFlow
- ) {
- if (seen) {
- point = self.events[indexBeforeFlow][1].end
- break
- }
- seen = true
- }
- }
- assert(point, 'could not find previous flow chunk')
- exitContainers(continued)
- // Fix positions.
- index = indexBeforeExits
- while (index < self.events.length) {
- self.events[index][1].end = Object.assign({}, point)
- index++
- }
- // Inject the exits earlier (they’re still also at the end).
- splice(
- self.events,
- indexBeforeFlow + 1,
- 0,
- self.events.slice(indexBeforeExits)
- )
- // Discard the duplicate exits.
- self.events.length = index
- }
- }
- /**
- * @param {number} size
- * @returns {undefined}
- */
- function exitContainers(size) {
- let index = stack.length
- // Exit open containers.
- while (index-- > size) {
- const entry = stack[index]
- self.containerState = entry[1]
- assert(
- entry[0].exit,
- 'expected `exit` to be defined on container construct'
- )
- entry[0].exit.call(self, effects)
- }
- stack.length = size
- }
- function closeFlow() {
- assert(
- self.containerState,
- 'expected `containerState` to be defined when closing flow'
- )
- assert(childFlow, 'expected `childFlow` to be defined when closing it')
- childFlow.write([codes.eof])
- childToken = undefined
- childFlow = undefined
- self.containerState._closeFlow = undefined
- }
- }
- /**
- * @this {TokenizeContext}
- * @type {Tokenizer}
- */
- function tokenizeContainer(effects, ok, nok) {
- // Always populated by defaults.
- assert(
- this.parser.constructs.disable.null,
- 'expected `disable.null` to be populated'
- )
- return factorySpace(
- effects,
- effects.attempt(this.parser.constructs.document, ok, nok),
- types.linePrefix,
- this.parser.constructs.disable.null.includes('codeIndented')
- ? undefined
- : constants.tabSize
- )
- }
|