123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- /**
- * @typedef {import('../types.js').SafeConfig} SafeConfig
- * @typedef {import('../types.js').State} State
- */
- import {patternInScope} from './pattern-in-scope.js'
- /**
- * Make a string safe for embedding in markdown constructs.
- *
- * In markdown, almost all punctuation characters can, in certain cases,
- * result in something.
- * Whether they do is highly subjective to where they happen and in what
- * they happen.
- *
- * To solve this, `mdast-util-to-markdown` tracks:
- *
- * * Characters before and after something;
- * * What “constructs” we are in.
- *
- * This information is then used by this function to escape or encode
- * special characters.
- *
- * @param {State} state
- * Info passed around about the current state.
- * @param {string | null | undefined} input
- * Raw value to make safe.
- * @param {SafeConfig} config
- * Configuration.
- * @returns {string}
- * Serialized markdown safe for embedding.
- */
- export function safe(state, input, config) {
- const value = (config.before || '') + (input || '') + (config.after || '')
- /** @type {Array<number>} */
- const positions = []
- /** @type {Array<string>} */
- const result = []
- /** @type {Record<number, {before: boolean, after: boolean}>} */
- const infos = {}
- let index = -1
- while (++index < state.unsafe.length) {
- const pattern = state.unsafe[index]
- if (!patternInScope(state.stack, pattern)) {
- continue
- }
- const expression = state.compilePattern(pattern)
- /** @type {RegExpExecArray | null} */
- let match
- while ((match = expression.exec(value))) {
- const before = 'before' in pattern || Boolean(pattern.atBreak)
- const after = 'after' in pattern
- const position = match.index + (before ? match[1].length : 0)
- if (positions.includes(position)) {
- if (infos[position].before && !before) {
- infos[position].before = false
- }
- if (infos[position].after && !after) {
- infos[position].after = false
- }
- } else {
- positions.push(position)
- infos[position] = {before, after}
- }
- }
- }
- positions.sort(numerical)
- let start = config.before ? config.before.length : 0
- const end = value.length - (config.after ? config.after.length : 0)
- index = -1
- while (++index < positions.length) {
- const position = positions[index]
- // Character before or after matched:
- if (position < start || position >= end) {
- continue
- }
- // If this character is supposed to be escaped because it has a condition on
- // the next character, and the next character is definitly being escaped,
- // then skip this escape.
- if (
- (position + 1 < end &&
- positions[index + 1] === position + 1 &&
- infos[position].after &&
- !infos[position + 1].before &&
- !infos[position + 1].after) ||
- (positions[index - 1] === position - 1 &&
- infos[position].before &&
- !infos[position - 1].before &&
- !infos[position - 1].after)
- ) {
- continue
- }
- if (start !== position) {
- // If we have to use a character reference, an ampersand would be more
- // correct, but as backslashes only care about punctuation, either will
- // do the trick
- result.push(escapeBackslashes(value.slice(start, position), '\\'))
- }
- start = position
- if (
- /[!-/:-@[-`{-~]/.test(value.charAt(position)) &&
- (!config.encode || !config.encode.includes(value.charAt(position)))
- ) {
- // Character escape.
- result.push('\\')
- } else {
- // Character reference.
- result.push(
- '&#x' + value.charCodeAt(position).toString(16).toUpperCase() + ';'
- )
- start++
- }
- }
- result.push(escapeBackslashes(value.slice(start, end), config.after))
- return result.join('')
- }
- /**
- * @param {number} a
- * @param {number} b
- * @returns {number}
- */
- function numerical(a, b) {
- return a - b
- }
- /**
- * @param {string} value
- * @param {string} after
- * @returns {string}
- */
- function escapeBackslashes(value, after) {
- const expression = /\\(?=[!-/:-@[-`{-~])/g
- /** @type {Array<number>} */
- const positions = []
- /** @type {Array<string>} */
- const results = []
- const whole = value + after
- let index = -1
- let start = 0
- /** @type {RegExpExecArray | null} */
- let match
- while ((match = expression.exec(whole))) {
- positions.push(match.index)
- }
- while (++index < positions.length) {
- if (start !== positions[index]) {
- results.push(value.slice(start, positions[index]))
- }
- results.push('\\')
- start = positions[index]
- }
- results.push(value.slice(start))
- return results.join('')
- }
|