safe.js 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /**
  2. * @typedef {import('../types.js').SafeConfig} SafeConfig
  3. * @typedef {import('../types.js').State} State
  4. */
  5. import {patternInScope} from './pattern-in-scope.js'
  6. /**
  7. * Make a string safe for embedding in markdown constructs.
  8. *
  9. * In markdown, almost all punctuation characters can, in certain cases,
  10. * result in something.
  11. * Whether they do is highly subjective to where they happen and in what
  12. * they happen.
  13. *
  14. * To solve this, `mdast-util-to-markdown` tracks:
  15. *
  16. * * Characters before and after something;
  17. * * What “constructs” we are in.
  18. *
  19. * This information is then used by this function to escape or encode
  20. * special characters.
  21. *
  22. * @param {State} state
  23. * Info passed around about the current state.
  24. * @param {string | null | undefined} input
  25. * Raw value to make safe.
  26. * @param {SafeConfig} config
  27. * Configuration.
  28. * @returns {string}
  29. * Serialized markdown safe for embedding.
  30. */
  31. export function safe(state, input, config) {
  32. const value = (config.before || '') + (input || '') + (config.after || '')
  33. /** @type {Array<number>} */
  34. const positions = []
  35. /** @type {Array<string>} */
  36. const result = []
  37. /** @type {Record<number, {before: boolean, after: boolean}>} */
  38. const infos = {}
  39. let index = -1
  40. while (++index < state.unsafe.length) {
  41. const pattern = state.unsafe[index]
  42. if (!patternInScope(state.stack, pattern)) {
  43. continue
  44. }
  45. const expression = state.compilePattern(pattern)
  46. /** @type {RegExpExecArray | null} */
  47. let match
  48. while ((match = expression.exec(value))) {
  49. const before = 'before' in pattern || Boolean(pattern.atBreak)
  50. const after = 'after' in pattern
  51. const position = match.index + (before ? match[1].length : 0)
  52. if (positions.includes(position)) {
  53. if (infos[position].before && !before) {
  54. infos[position].before = false
  55. }
  56. if (infos[position].after && !after) {
  57. infos[position].after = false
  58. }
  59. } else {
  60. positions.push(position)
  61. infos[position] = {before, after}
  62. }
  63. }
  64. }
  65. positions.sort(numerical)
  66. let start = config.before ? config.before.length : 0
  67. const end = value.length - (config.after ? config.after.length : 0)
  68. index = -1
  69. while (++index < positions.length) {
  70. const position = positions[index]
  71. // Character before or after matched:
  72. if (position < start || position >= end) {
  73. continue
  74. }
  75. // If this character is supposed to be escaped because it has a condition on
  76. // the next character, and the next character is definitly being escaped,
  77. // then skip this escape.
  78. if (
  79. (position + 1 < end &&
  80. positions[index + 1] === position + 1 &&
  81. infos[position].after &&
  82. !infos[position + 1].before &&
  83. !infos[position + 1].after) ||
  84. (positions[index - 1] === position - 1 &&
  85. infos[position].before &&
  86. !infos[position - 1].before &&
  87. !infos[position - 1].after)
  88. ) {
  89. continue
  90. }
  91. if (start !== position) {
  92. // If we have to use a character reference, an ampersand would be more
  93. // correct, but as backslashes only care about punctuation, either will
  94. // do the trick
  95. result.push(escapeBackslashes(value.slice(start, position), '\\'))
  96. }
  97. start = position
  98. if (
  99. /[!-/:-@[-`{-~]/.test(value.charAt(position)) &&
  100. (!config.encode || !config.encode.includes(value.charAt(position)))
  101. ) {
  102. // Character escape.
  103. result.push('\\')
  104. } else {
  105. // Character reference.
  106. result.push(
  107. '&#x' + value.charCodeAt(position).toString(16).toUpperCase() + ';'
  108. )
  109. start++
  110. }
  111. }
  112. result.push(escapeBackslashes(value.slice(start, end), config.after))
  113. return result.join('')
  114. }
  115. /**
  116. * @param {number} a
  117. * @param {number} b
  118. * @returns {number}
  119. */
  120. function numerical(a, b) {
  121. return a - b
  122. }
  123. /**
  124. * @param {string} value
  125. * @param {string} after
  126. * @returns {string}
  127. */
  128. function escapeBackslashes(value, after) {
  129. const expression = /\\(?=[!-/:-@[-`{-~])/g
  130. /** @type {Array<number>} */
  131. const positions = []
  132. /** @type {Array<string>} */
  133. const results = []
  134. const whole = value + after
  135. let index = -1
  136. let start = 0
  137. /** @type {RegExpExecArray | null} */
  138. let match
  139. while ((match = expression.exec(whole))) {
  140. positions.push(match.index)
  141. }
  142. while (++index < positions.length) {
  143. if (start !== positions[index]) {
  144. results.push(value.slice(start, positions[index]))
  145. }
  146. results.push('\\')
  147. start = positions[index]
  148. }
  149. results.push(value.slice(start))
  150. return results.join('')
  151. }