unsafe.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /**
  2. * @typedef {import('./types.js').ConstructName} ConstructName
  3. * @typedef {import('./types.js').Unsafe} Unsafe
  4. */
  5. /**
  6. * List of constructs that occur in phrasing (paragraphs, headings), but cannot
  7. * contain things like attention (emphasis, strong), images, or links.
  8. * So they sort of cancel each other out.
  9. * Note: could use a better name.
  10. *
  11. * @type {Array<ConstructName>}
  12. */
  13. const fullPhrasingSpans = [
  14. 'autolink',
  15. 'destinationLiteral',
  16. 'destinationRaw',
  17. 'reference',
  18. 'titleQuote',
  19. 'titleApostrophe'
  20. ]
  21. /** @type {Array<Unsafe>} */
  22. export const unsafe = [
  23. {character: '\t', after: '[\\r\\n]', inConstruct: 'phrasing'},
  24. {character: '\t', before: '[\\r\\n]', inConstruct: 'phrasing'},
  25. {
  26. character: '\t',
  27. inConstruct: ['codeFencedLangGraveAccent', 'codeFencedLangTilde']
  28. },
  29. {
  30. character: '\r',
  31. inConstruct: [
  32. 'codeFencedLangGraveAccent',
  33. 'codeFencedLangTilde',
  34. 'codeFencedMetaGraveAccent',
  35. 'codeFencedMetaTilde',
  36. 'destinationLiteral',
  37. 'headingAtx'
  38. ]
  39. },
  40. {
  41. character: '\n',
  42. inConstruct: [
  43. 'codeFencedLangGraveAccent',
  44. 'codeFencedLangTilde',
  45. 'codeFencedMetaGraveAccent',
  46. 'codeFencedMetaTilde',
  47. 'destinationLiteral',
  48. 'headingAtx'
  49. ]
  50. },
  51. {character: ' ', after: '[\\r\\n]', inConstruct: 'phrasing'},
  52. {character: ' ', before: '[\\r\\n]', inConstruct: 'phrasing'},
  53. {
  54. character: ' ',
  55. inConstruct: ['codeFencedLangGraveAccent', 'codeFencedLangTilde']
  56. },
  57. // An exclamation mark can start an image, if it is followed by a link or
  58. // a link reference.
  59. {
  60. character: '!',
  61. after: '\\[',
  62. inConstruct: 'phrasing',
  63. notInConstruct: fullPhrasingSpans
  64. },
  65. // A quote can break out of a title.
  66. {character: '"', inConstruct: 'titleQuote'},
  67. // A number sign could start an ATX heading if it starts a line.
  68. {atBreak: true, character: '#'},
  69. {character: '#', inConstruct: 'headingAtx', after: '(?:[\r\n]|$)'},
  70. // Dollar sign and percentage are not used in markdown.
  71. // An ampersand could start a character reference.
  72. {character: '&', after: '[#A-Za-z]', inConstruct: 'phrasing'},
  73. // An apostrophe can break out of a title.
  74. {character: "'", inConstruct: 'titleApostrophe'},
  75. // A left paren could break out of a destination raw.
  76. {character: '(', inConstruct: 'destinationRaw'},
  77. // A left paren followed by `]` could make something into a link or image.
  78. {
  79. before: '\\]',
  80. character: '(',
  81. inConstruct: 'phrasing',
  82. notInConstruct: fullPhrasingSpans
  83. },
  84. // A right paren could start a list item or break out of a destination
  85. // raw.
  86. {atBreak: true, before: '\\d+', character: ')'},
  87. {character: ')', inConstruct: 'destinationRaw'},
  88. // An asterisk can start thematic breaks, list items, emphasis, strong.
  89. {atBreak: true, character: '*', after: '(?:[ \t\r\n*])'},
  90. {character: '*', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans},
  91. // A plus sign could start a list item.
  92. {atBreak: true, character: '+', after: '(?:[ \t\r\n])'},
  93. // A dash can start thematic breaks, list items, and setext heading
  94. // underlines.
  95. {atBreak: true, character: '-', after: '(?:[ \t\r\n-])'},
  96. // A dot could start a list item.
  97. {atBreak: true, before: '\\d+', character: '.', after: '(?:[ \t\r\n]|$)'},
  98. // Slash, colon, and semicolon are not used in markdown for constructs.
  99. // A less than can start html (flow or text) or an autolink.
  100. // HTML could start with an exclamation mark (declaration, cdata, comment),
  101. // slash (closing tag), question mark (instruction), or a letter (tag).
  102. // An autolink also starts with a letter.
  103. // Finally, it could break out of a destination literal.
  104. {atBreak: true, character: '<', after: '[!/?A-Za-z]'},
  105. {
  106. character: '<',
  107. after: '[!/?A-Za-z]',
  108. inConstruct: 'phrasing',
  109. notInConstruct: fullPhrasingSpans
  110. },
  111. {character: '<', inConstruct: 'destinationLiteral'},
  112. // An equals to can start setext heading underlines.
  113. {atBreak: true, character: '='},
  114. // A greater than can start block quotes and it can break out of a
  115. // destination literal.
  116. {atBreak: true, character: '>'},
  117. {character: '>', inConstruct: 'destinationLiteral'},
  118. // Question mark and at sign are not used in markdown for constructs.
  119. // A left bracket can start definitions, references, labels,
  120. {atBreak: true, character: '['},
  121. {character: '[', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans},
  122. {character: '[', inConstruct: ['label', 'reference']},
  123. // A backslash can start an escape (when followed by punctuation) or a
  124. // hard break (when followed by an eol).
  125. // Note: typical escapes are handled in `safe`!
  126. {character: '\\', after: '[\\r\\n]', inConstruct: 'phrasing'},
  127. // A right bracket can exit labels.
  128. {character: ']', inConstruct: ['label', 'reference']},
  129. // Caret is not used in markdown for constructs.
  130. // An underscore can start emphasis, strong, or a thematic break.
  131. {atBreak: true, character: '_'},
  132. {character: '_', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans},
  133. // A grave accent can start code (fenced or text), or it can break out of
  134. // a grave accent code fence.
  135. {atBreak: true, character: '`'},
  136. {
  137. character: '`',
  138. inConstruct: ['codeFencedLangGraveAccent', 'codeFencedMetaGraveAccent']
  139. },
  140. {character: '`', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans},
  141. // Left brace, vertical bar, right brace are not used in markdown for
  142. // constructs.
  143. // A tilde can start code (fenced).
  144. {atBreak: true, character: '~'}
  145. ]