preprocess.js 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /**
  2. * @typedef {import('micromark-util-types').Chunk} Chunk
  3. * @typedef {import('micromark-util-types').Code} Code
  4. * @typedef {import('micromark-util-types').Encoding} Encoding
  5. * @typedef {import('micromark-util-types').Value} Value
  6. */
  7. /**
  8. * @callback Preprocessor
  9. * @param {Value} value
  10. * @param {Encoding | null | undefined} [encoding]
  11. * @param {boolean | null | undefined} [end=false]
  12. * @returns {Array<Chunk>}
  13. */
  14. import {codes, constants} from 'micromark-util-symbol'
  15. const search = /[\0\t\n\r]/g
  16. /**
  17. * @returns {Preprocessor}
  18. */
  19. export function preprocess() {
  20. let column = 1
  21. let buffer = ''
  22. /** @type {boolean | undefined} */
  23. let start = true
  24. /** @type {boolean | undefined} */
  25. let atCarriageReturn
  26. return preprocessor
  27. /** @type {Preprocessor} */
  28. // eslint-disable-next-line complexity
  29. function preprocessor(value, encoding, end) {
  30. /** @type {Array<Chunk>} */
  31. const chunks = []
  32. /** @type {RegExpMatchArray | null} */
  33. let match
  34. /** @type {number} */
  35. let next
  36. /** @type {number} */
  37. let startPosition
  38. /** @type {number} */
  39. let endPosition
  40. /** @type {Code} */
  41. let code
  42. value =
  43. buffer +
  44. (typeof value === 'string'
  45. ? value.toString()
  46. : new TextDecoder(encoding || undefined).decode(value))
  47. startPosition = 0
  48. buffer = ''
  49. if (start) {
  50. // To do: `markdown-rs` actually parses BOMs (byte order mark).
  51. if (value.charCodeAt(0) === codes.byteOrderMarker) {
  52. startPosition++
  53. }
  54. start = undefined
  55. }
  56. while (startPosition < value.length) {
  57. search.lastIndex = startPosition
  58. match = search.exec(value)
  59. endPosition =
  60. match && match.index !== undefined ? match.index : value.length
  61. code = value.charCodeAt(endPosition)
  62. if (!match) {
  63. buffer = value.slice(startPosition)
  64. break
  65. }
  66. if (
  67. code === codes.lf &&
  68. startPosition === endPosition &&
  69. atCarriageReturn
  70. ) {
  71. chunks.push(codes.carriageReturnLineFeed)
  72. atCarriageReturn = undefined
  73. } else {
  74. if (atCarriageReturn) {
  75. chunks.push(codes.carriageReturn)
  76. atCarriageReturn = undefined
  77. }
  78. if (startPosition < endPosition) {
  79. chunks.push(value.slice(startPosition, endPosition))
  80. column += endPosition - startPosition
  81. }
  82. switch (code) {
  83. case codes.nul: {
  84. chunks.push(codes.replacementCharacter)
  85. column++
  86. break
  87. }
  88. case codes.ht: {
  89. next = Math.ceil(column / constants.tabSize) * constants.tabSize
  90. chunks.push(codes.horizontalTab)
  91. while (column++ < next) chunks.push(codes.virtualSpace)
  92. break
  93. }
  94. case codes.lf: {
  95. chunks.push(codes.lineFeed)
  96. column = 1
  97. break
  98. }
  99. default: {
  100. atCarriageReturn = true
  101. column = 1
  102. }
  103. }
  104. }
  105. startPosition = endPosition + 1
  106. }
  107. if (end) {
  108. if (atCarriageReturn) chunks.push(codes.carriageReturn)
  109. if (buffer) chunks.push(buffer)
  110. chunks.push(codes.eof)
  111. }
  112. return chunks
  113. }
  114. }