text.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
  4. * @typedef {import('micromark-util-types').Initializer} Initializer
  5. * @typedef {import('micromark-util-types').Resolver} Resolver
  6. * @typedef {import('micromark-util-types').State} State
  7. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  8. */
  9. import {codes, constants, types} from 'micromark-util-symbol'
  10. import {ok as assert} from 'devlop'
  11. export const resolver = {resolveAll: createResolver()}
  12. export const string = initializeFactory('string')
  13. export const text = initializeFactory('text')
  14. /**
  15. * @param {'string' | 'text'} field
  16. * @returns {InitialConstruct}
  17. */
  18. function initializeFactory(field) {
  19. return {
  20. tokenize: initializeText,
  21. resolveAll: createResolver(
  22. field === 'text' ? resolveAllLineSuffixes : undefined
  23. )
  24. }
  25. /**
  26. * @this {TokenizeContext}
  27. * @type {Initializer}
  28. */
  29. function initializeText(effects) {
  30. const self = this
  31. const constructs = this.parser.constructs[field]
  32. const text = effects.attempt(constructs, start, notText)
  33. return start
  34. /** @type {State} */
  35. function start(code) {
  36. return atBreak(code) ? text(code) : notText(code)
  37. }
  38. /** @type {State} */
  39. function notText(code) {
  40. if (code === codes.eof) {
  41. effects.consume(code)
  42. return
  43. }
  44. effects.enter(types.data)
  45. effects.consume(code)
  46. return data
  47. }
  48. /** @type {State} */
  49. function data(code) {
  50. if (atBreak(code)) {
  51. effects.exit(types.data)
  52. return text(code)
  53. }
  54. // Data.
  55. effects.consume(code)
  56. return data
  57. }
  58. /**
  59. * @param {Code} code
  60. * @returns {boolean}
  61. */
  62. function atBreak(code) {
  63. if (code === codes.eof) {
  64. return true
  65. }
  66. const list = constructs[code]
  67. let index = -1
  68. if (list) {
  69. // Always populated by defaults.
  70. assert(Array.isArray(list), 'expected `disable.null` to be populated')
  71. while (++index < list.length) {
  72. const item = list[index]
  73. if (!item.previous || item.previous.call(self, self.previous)) {
  74. return true
  75. }
  76. }
  77. }
  78. return false
  79. }
  80. }
  81. }
  82. /**
  83. * @param {Resolver | undefined} [extraResolver]
  84. * @returns {Resolver}
  85. */
  86. function createResolver(extraResolver) {
  87. return resolveAllText
  88. /** @type {Resolver} */
  89. function resolveAllText(events, context) {
  90. let index = -1
  91. /** @type {number | undefined} */
  92. let enter
  93. // A rather boring computation (to merge adjacent `data` events) which
  94. // improves mm performance by 29%.
  95. while (++index <= events.length) {
  96. if (enter === undefined) {
  97. if (events[index] && events[index][1].type === types.data) {
  98. enter = index
  99. index++
  100. }
  101. } else if (!events[index] || events[index][1].type !== types.data) {
  102. // Don’t do anything if there is one data token.
  103. if (index !== enter + 2) {
  104. events[enter][1].end = events[index - 1][1].end
  105. events.splice(enter + 2, index - enter - 2)
  106. index = enter + 2
  107. }
  108. enter = undefined
  109. }
  110. }
  111. return extraResolver ? extraResolver(events, context) : events
  112. }
  113. }
  114. /**
  115. * A rather ugly set of instructions which again looks at chunks in the input
  116. * stream.
  117. * The reason to do this here is that it is *much* faster to parse in reverse.
  118. * And that we can’t hook into `null` to split the line suffix before an EOF.
  119. * To do: figure out if we can make this into a clean utility, or even in core.
  120. * As it will be useful for GFMs literal autolink extension (and maybe even
  121. * tables?)
  122. *
  123. * @type {Resolver}
  124. */
  125. function resolveAllLineSuffixes(events, context) {
  126. let eventIndex = 0 // Skip first.
  127. while (++eventIndex <= events.length) {
  128. if (
  129. (eventIndex === events.length ||
  130. events[eventIndex][1].type === types.lineEnding) &&
  131. events[eventIndex - 1][1].type === types.data
  132. ) {
  133. const data = events[eventIndex - 1][1]
  134. const chunks = context.sliceStream(data)
  135. let index = chunks.length
  136. let bufferIndex = -1
  137. let size = 0
  138. /** @type {boolean | undefined} */
  139. let tabs
  140. while (index--) {
  141. const chunk = chunks[index]
  142. if (typeof chunk === 'string') {
  143. bufferIndex = chunk.length
  144. while (chunk.charCodeAt(bufferIndex - 1) === codes.space) {
  145. size++
  146. bufferIndex--
  147. }
  148. if (bufferIndex) break
  149. bufferIndex = -1
  150. }
  151. // Number
  152. else if (chunk === codes.horizontalTab) {
  153. tabs = true
  154. size++
  155. } else if (chunk === codes.virtualSpace) {
  156. // Empty
  157. } else {
  158. // Replacement character, exit.
  159. index++
  160. break
  161. }
  162. }
  163. if (size) {
  164. const token = {
  165. type:
  166. eventIndex === events.length ||
  167. tabs ||
  168. size < constants.hardBreakPrefixSizeMin
  169. ? types.lineSuffix
  170. : types.hardBreakTrailing,
  171. start: {
  172. line: data.end.line,
  173. column: data.end.column - size,
  174. offset: data.end.offset - size,
  175. _index: data.start._index + index,
  176. _bufferIndex: index
  177. ? bufferIndex
  178. : data.start._bufferIndex + bufferIndex
  179. },
  180. end: Object.assign({}, data.end)
  181. }
  182. data.end = Object.assign({}, token.start)
  183. if (data.start.offset === data.end.offset) {
  184. Object.assign(data, token)
  185. } else {
  186. events.splice(
  187. eventIndex,
  188. 0,
  189. ['enter', token, context],
  190. ['exit', token, context]
  191. )
  192. eventIndex += 2
  193. }
  194. }
  195. eventIndex++
  196. }
  197. }
  198. return events
  199. }