index.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. */
  4. import {codes} from 'micromark-util-symbol'
  5. /**
  6. * Check whether the character code represents an ASCII alpha (`a` through `z`,
  7. * case insensitive).
  8. *
  9. * An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
  10. *
  11. * An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
  12. * to U+005A (`Z`).
  13. *
  14. * An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
  15. * to U+007A (`z`).
  16. *
  17. * @param code
  18. * Code.
  19. * @returns {boolean}
  20. * Whether it matches.
  21. */
  22. export const asciiAlpha = regexCheck(/[A-Za-z]/)
  23. /**
  24. * Check whether the character code represents an ASCII alphanumeric (`a`
  25. * through `z`, case insensitive, or `0` through `9`).
  26. *
  27. * An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
  28. * (see `asciiAlpha`).
  29. *
  30. * @param code
  31. * Code.
  32. * @returns {boolean}
  33. * Whether it matches.
  34. */
  35. export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)
  36. /**
  37. * Check whether the character code represents an ASCII atext.
  38. *
  39. * atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
  40. * the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
  41. * U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
  42. * SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
  43. * CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
  44. * (`{`) to U+007E TILDE (`~`).
  45. *
  46. * See:
  47. * **\[RFC5322]**:
  48. * [Internet Message Format](https://tools.ietf.org/html/rfc5322).
  49. * P. Resnick.
  50. * IETF.
  51. *
  52. * @param code
  53. * Code.
  54. * @returns {boolean}
  55. * Whether it matches.
  56. */
  57. export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)
  58. /**
  59. * Check whether a character code is an ASCII control character.
  60. *
  61. * An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
  62. * to U+001F (US), or U+007F (DEL).
  63. *
  64. * @param {Code} code
  65. * Code.
  66. * @returns {boolean}
  67. * Whether it matches.
  68. */
  69. export function asciiControl(code) {
  70. return (
  71. // Special whitespace codes (which have negative values), C0 and Control
  72. // character DEL
  73. code !== null && (code < codes.space || code === codes.del)
  74. )
  75. }
  76. /**
  77. * Check whether the character code represents an ASCII digit (`0` through `9`).
  78. *
  79. * An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
  80. * U+0039 (`9`).
  81. *
  82. * @param code
  83. * Code.
  84. * @returns {boolean}
  85. * Whether it matches.
  86. */
  87. export const asciiDigit = regexCheck(/\d/)
  88. /**
  89. * Check whether the character code represents an ASCII hex digit (`a` through
  90. * `f`, case insensitive, or `0` through `9`).
  91. *
  92. * An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
  93. * digit, or an ASCII lower hex digit.
  94. *
  95. * An **ASCII upper hex digit** is a character in the inclusive range U+0041
  96. * (`A`) to U+0046 (`F`).
  97. *
  98. * An **ASCII lower hex digit** is a character in the inclusive range U+0061
  99. * (`a`) to U+0066 (`f`).
  100. *
  101. * @param code
  102. * Code.
  103. * @returns {boolean}
  104. * Whether it matches.
  105. */
  106. export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)
  107. /**
  108. * Check whether the character code represents ASCII punctuation.
  109. *
  110. * An **ASCII punctuation** is a character in the inclusive ranges U+0021
  111. * EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
  112. * SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
  113. * (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
  114. *
  115. * @param code
  116. * Code.
  117. * @returns {boolean}
  118. * Whether it matches.
  119. */
  120. export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)
  121. /**
  122. * Check whether a character code is a markdown line ending.
  123. *
  124. * A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
  125. * LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
  126. *
  127. * In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
  128. * RETURN (CR) are replaced by these virtual characters depending on whether
  129. * they occurred together.
  130. *
  131. * @param {Code} code
  132. * Code.
  133. * @returns {boolean}
  134. * Whether it matches.
  135. */
  136. export function markdownLineEnding(code) {
  137. return code !== null && code < codes.horizontalTab
  138. }
  139. /**
  140. * Check whether a character code is a markdown line ending (see
  141. * `markdownLineEnding`) or markdown space (see `markdownSpace`).
  142. *
  143. * @param {Code} code
  144. * Code.
  145. * @returns {boolean}
  146. * Whether it matches.
  147. */
  148. export function markdownLineEndingOrSpace(code) {
  149. return code !== null && (code < codes.nul || code === codes.space)
  150. }
  151. /**
  152. * Check whether a character code is a markdown space.
  153. *
  154. * A **markdown space** is the concrete character U+0020 SPACE (SP) and the
  155. * virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
  156. *
  157. * In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
  158. * replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
  159. * SPACE (VS) characters, depending on the column at which the tab occurred.
  160. *
  161. * @param {Code} code
  162. * Code.
  163. * @returns {boolean}
  164. * Whether it matches.
  165. */
  166. export function markdownSpace(code) {
  167. return (
  168. code === codes.horizontalTab ||
  169. code === codes.virtualSpace ||
  170. code === codes.space
  171. )
  172. }
  173. // Size note: removing ASCII from the regex and using `asciiPunctuation` here
  174. // In fact adds to the bundle size.
  175. /**
  176. * Check whether the character code represents Unicode punctuation.
  177. *
  178. * A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
  179. * Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
  180. * (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
  181. * (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
  182. * punctuation (see `asciiPunctuation`).
  183. *
  184. * See:
  185. * **\[UNICODE]**:
  186. * [The Unicode Standard](https://www.unicode.org/versions/).
  187. * Unicode Consortium.
  188. *
  189. * @param code
  190. * Code.
  191. * @returns
  192. * Whether it matches.
  193. */
  194. export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u)
  195. /**
  196. * Check whether the character code represents Unicode whitespace.
  197. *
  198. * Note that this does handle micromark specific markdown whitespace characters.
  199. * See `markdownLineEndingOrSpace` to check that.
  200. *
  201. * A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
  202. * Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
  203. * U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
  204. *
  205. * See:
  206. * **\[UNICODE]**:
  207. * [The Unicode Standard](https://www.unicode.org/versions/).
  208. * Unicode Consortium.
  209. *
  210. * @param code
  211. * Code.
  212. * @returns
  213. * Whether it matches.
  214. */
  215. export const unicodeWhitespace = regexCheck(/\s/)
  216. /**
  217. * Create a code check from a regex.
  218. *
  219. * @param {RegExp} regex
  220. * @returns {(code: Code) => boolean}
  221. */
  222. function regexCheck(regex) {
  223. return check
  224. /**
  225. * Check whether a code matches the bound regex.
  226. *
  227. * @param {Code} code
  228. * Character code.
  229. * @returns {boolean}
  230. * Whether the character code matches the bound regex.
  231. */
  232. function check(code) {
  233. return code !== null && code > -1 && regex.test(String.fromCharCode(code))
  234. }
  235. }