preprocess.js 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /**
  2. * @typedef {import('micromark-util-types').Chunk} Chunk
  3. * @typedef {import('micromark-util-types').Code} Code
  4. * @typedef {import('micromark-util-types').Encoding} Encoding
  5. * @typedef {import('micromark-util-types').Value} Value
  6. */
  7. /**
  8. * @callback Preprocessor
  9. * @param {Value} value
  10. * @param {Encoding | null | undefined} [encoding]
  11. * @param {boolean | null | undefined} [end=false]
  12. * @returns {Array<Chunk>}
  13. */
  14. const search = /[\0\t\n\r]/g
  15. /**
  16. * @returns {Preprocessor}
  17. */
  18. export function preprocess() {
  19. let column = 1
  20. let buffer = ''
  21. /** @type {boolean | undefined} */
  22. let start = true
  23. /** @type {boolean | undefined} */
  24. let atCarriageReturn
  25. return preprocessor
  26. /** @type {Preprocessor} */
  27. // eslint-disable-next-line complexity
  28. function preprocessor(value, encoding, end) {
  29. /** @type {Array<Chunk>} */
  30. const chunks = []
  31. /** @type {RegExpMatchArray | null} */
  32. let match
  33. /** @type {number} */
  34. let next
  35. /** @type {number} */
  36. let startPosition
  37. /** @type {number} */
  38. let endPosition
  39. /** @type {Code} */
  40. let code
  41. value =
  42. buffer +
  43. (typeof value === 'string'
  44. ? value.toString()
  45. : new TextDecoder(encoding || undefined).decode(value))
  46. startPosition = 0
  47. buffer = ''
  48. if (start) {
  49. // To do: `markdown-rs` actually parses BOMs (byte order mark).
  50. if (value.charCodeAt(0) === 65279) {
  51. startPosition++
  52. }
  53. start = undefined
  54. }
  55. while (startPosition < value.length) {
  56. search.lastIndex = startPosition
  57. match = search.exec(value)
  58. endPosition =
  59. match && match.index !== undefined ? match.index : value.length
  60. code = value.charCodeAt(endPosition)
  61. if (!match) {
  62. buffer = value.slice(startPosition)
  63. break
  64. }
  65. if (code === 10 && startPosition === endPosition && atCarriageReturn) {
  66. chunks.push(-3)
  67. atCarriageReturn = undefined
  68. } else {
  69. if (atCarriageReturn) {
  70. chunks.push(-5)
  71. atCarriageReturn = undefined
  72. }
  73. if (startPosition < endPosition) {
  74. chunks.push(value.slice(startPosition, endPosition))
  75. column += endPosition - startPosition
  76. }
  77. switch (code) {
  78. case 0: {
  79. chunks.push(65533)
  80. column++
  81. break
  82. }
  83. case 9: {
  84. next = Math.ceil(column / 4) * 4
  85. chunks.push(-2)
  86. while (column++ < next) chunks.push(-1)
  87. break
  88. }
  89. case 10: {
  90. chunks.push(-4)
  91. column = 1
  92. break
  93. }
  94. default: {
  95. atCarriageReturn = true
  96. column = 1
  97. }
  98. }
  99. }
  100. startPosition = endPosition + 1
  101. }
  102. if (end) {
  103. if (atCarriageReturn) chunks.push(-5)
  104. if (buffer) chunks.push(buffer)
  105. chunks.push(null)
  106. }
  107. return chunks
  108. }
  109. }