text.js 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
  4. * @typedef {import('micromark-util-types').Initializer} Initializer
  5. * @typedef {import('micromark-util-types').Resolver} Resolver
  6. * @typedef {import('micromark-util-types').State} State
  7. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  8. */
  9. export const resolver = {
  10. resolveAll: createResolver()
  11. }
  12. export const string = initializeFactory('string')
  13. export const text = initializeFactory('text')
  14. /**
  15. * @param {'string' | 'text'} field
  16. * @returns {InitialConstruct}
  17. */
  18. function initializeFactory(field) {
  19. return {
  20. tokenize: initializeText,
  21. resolveAll: createResolver(
  22. field === 'text' ? resolveAllLineSuffixes : undefined
  23. )
  24. }
  25. /**
  26. * @this {TokenizeContext}
  27. * @type {Initializer}
  28. */
  29. function initializeText(effects) {
  30. const self = this
  31. const constructs = this.parser.constructs[field]
  32. const text = effects.attempt(constructs, start, notText)
  33. return start
  34. /** @type {State} */
  35. function start(code) {
  36. return atBreak(code) ? text(code) : notText(code)
  37. }
  38. /** @type {State} */
  39. function notText(code) {
  40. if (code === null) {
  41. effects.consume(code)
  42. return
  43. }
  44. effects.enter('data')
  45. effects.consume(code)
  46. return data
  47. }
  48. /** @type {State} */
  49. function data(code) {
  50. if (atBreak(code)) {
  51. effects.exit('data')
  52. return text(code)
  53. }
  54. // Data.
  55. effects.consume(code)
  56. return data
  57. }
  58. /**
  59. * @param {Code} code
  60. * @returns {boolean}
  61. */
  62. function atBreak(code) {
  63. if (code === null) {
  64. return true
  65. }
  66. const list = constructs[code]
  67. let index = -1
  68. if (list) {
  69. // Always populated by defaults.
  70. while (++index < list.length) {
  71. const item = list[index]
  72. if (!item.previous || item.previous.call(self, self.previous)) {
  73. return true
  74. }
  75. }
  76. }
  77. return false
  78. }
  79. }
  80. }
  81. /**
  82. * @param {Resolver | undefined} [extraResolver]
  83. * @returns {Resolver}
  84. */
  85. function createResolver(extraResolver) {
  86. return resolveAllText
  87. /** @type {Resolver} */
  88. function resolveAllText(events, context) {
  89. let index = -1
  90. /** @type {number | undefined} */
  91. let enter
  92. // A rather boring computation (to merge adjacent `data` events) which
  93. // improves mm performance by 29%.
  94. while (++index <= events.length) {
  95. if (enter === undefined) {
  96. if (events[index] && events[index][1].type === 'data') {
  97. enter = index
  98. index++
  99. }
  100. } else if (!events[index] || events[index][1].type !== 'data') {
  101. // Don’t do anything if there is one data token.
  102. if (index !== enter + 2) {
  103. events[enter][1].end = events[index - 1][1].end
  104. events.splice(enter + 2, index - enter - 2)
  105. index = enter + 2
  106. }
  107. enter = undefined
  108. }
  109. }
  110. return extraResolver ? extraResolver(events, context) : events
  111. }
  112. }
  113. /**
  114. * A rather ugly set of instructions which again looks at chunks in the input
  115. * stream.
  116. * The reason to do this here is that it is *much* faster to parse in reverse.
  117. * And that we can’t hook into `null` to split the line suffix before an EOF.
  118. * To do: figure out if we can make this into a clean utility, or even in core.
  119. * As it will be useful for GFMs literal autolink extension (and maybe even
  120. * tables?)
  121. *
  122. * @type {Resolver}
  123. */
  124. function resolveAllLineSuffixes(events, context) {
  125. let eventIndex = 0 // Skip first.
  126. while (++eventIndex <= events.length) {
  127. if (
  128. (eventIndex === events.length ||
  129. events[eventIndex][1].type === 'lineEnding') &&
  130. events[eventIndex - 1][1].type === 'data'
  131. ) {
  132. const data = events[eventIndex - 1][1]
  133. const chunks = context.sliceStream(data)
  134. let index = chunks.length
  135. let bufferIndex = -1
  136. let size = 0
  137. /** @type {boolean | undefined} */
  138. let tabs
  139. while (index--) {
  140. const chunk = chunks[index]
  141. if (typeof chunk === 'string') {
  142. bufferIndex = chunk.length
  143. while (chunk.charCodeAt(bufferIndex - 1) === 32) {
  144. size++
  145. bufferIndex--
  146. }
  147. if (bufferIndex) break
  148. bufferIndex = -1
  149. }
  150. // Number
  151. else if (chunk === -2) {
  152. tabs = true
  153. size++
  154. } else if (chunk === -1) {
  155. // Empty
  156. } else {
  157. // Replacement character, exit.
  158. index++
  159. break
  160. }
  161. }
  162. if (size) {
  163. const token = {
  164. type:
  165. eventIndex === events.length || tabs || size < 2
  166. ? 'lineSuffix'
  167. : 'hardBreakTrailing',
  168. start: {
  169. line: data.end.line,
  170. column: data.end.column - size,
  171. offset: data.end.offset - size,
  172. _index: data.start._index + index,
  173. _bufferIndex: index
  174. ? bufferIndex
  175. : data.start._bufferIndex + bufferIndex
  176. },
  177. end: Object.assign({}, data.end)
  178. }
  179. data.end = Object.assign({}, token.start)
  180. if (data.start.offset === data.end.offset) {
  181. Object.assign(data, token)
  182. } else {
  183. events.splice(
  184. eventIndex,
  185. 0,
  186. ['enter', token, context],
  187. ['exit', token, context]
  188. )
  189. eventIndex += 2
  190. }
  191. }
  192. eventIndex++
  193. }
  194. }
  195. return events
  196. }