123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- /**
- * @typedef {import('micromark-util-types').Chunk} Chunk
- * @typedef {import('micromark-util-types').Code} Code
- * @typedef {import('micromark-util-types').Encoding} Encoding
- * @typedef {import('micromark-util-types').Value} Value
- */
- /**
- * @callback Preprocessor
- * @param {Value} value
- * @param {Encoding | null | undefined} [encoding]
- * @param {boolean | null | undefined} [end=false]
- * @returns {Array<Chunk>}
- */
- import {codes, constants} from 'micromark-util-symbol'
- const search = /[\0\t\n\r]/g
- /**
- * @returns {Preprocessor}
- */
- export function preprocess() {
- let column = 1
- let buffer = ''
- /** @type {boolean | undefined} */
- let start = true
- /** @type {boolean | undefined} */
- let atCarriageReturn
- return preprocessor
- /** @type {Preprocessor} */
- // eslint-disable-next-line complexity
- function preprocessor(value, encoding, end) {
- /** @type {Array<Chunk>} */
- const chunks = []
- /** @type {RegExpMatchArray | null} */
- let match
- /** @type {number} */
- let next
- /** @type {number} */
- let startPosition
- /** @type {number} */
- let endPosition
- /** @type {Code} */
- let code
- value =
- buffer +
- (typeof value === 'string'
- ? value.toString()
- : new TextDecoder(encoding || undefined).decode(value))
- startPosition = 0
- buffer = ''
- if (start) {
- // To do: `markdown-rs` actually parses BOMs (byte order mark).
- if (value.charCodeAt(0) === codes.byteOrderMarker) {
- startPosition++
- }
- start = undefined
- }
- while (startPosition < value.length) {
- search.lastIndex = startPosition
- match = search.exec(value)
- endPosition =
- match && match.index !== undefined ? match.index : value.length
- code = value.charCodeAt(endPosition)
- if (!match) {
- buffer = value.slice(startPosition)
- break
- }
- if (
- code === codes.lf &&
- startPosition === endPosition &&
- atCarriageReturn
- ) {
- chunks.push(codes.carriageReturnLineFeed)
- atCarriageReturn = undefined
- } else {
- if (atCarriageReturn) {
- chunks.push(codes.carriageReturn)
- atCarriageReturn = undefined
- }
- if (startPosition < endPosition) {
- chunks.push(value.slice(startPosition, endPosition))
- column += endPosition - startPosition
- }
- switch (code) {
- case codes.nul: {
- chunks.push(codes.replacementCharacter)
- column++
- break
- }
- case codes.ht: {
- next = Math.ceil(column / constants.tabSize) * constants.tabSize
- chunks.push(codes.horizontalTab)
- while (column++ < next) chunks.push(codes.virtualSpace)
- break
- }
- case codes.lf: {
- chunks.push(codes.lineFeed)
- column = 1
- break
- }
- default: {
- atCarriageReturn = true
- column = 1
- }
- }
- }
- startPosition = endPosition + 1
- }
- if (end) {
- if (atCarriageReturn) chunks.push(codes.carriageReturn)
- if (buffer) chunks.push(buffer)
- chunks.push(codes.eof)
- }
- return chunks
- }
- }
|