index.js 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /**
  2. * @typedef {import('micromark-util-types').Code} Code
  3. */
  4. /**
  5. * Check whether the character code represents an ASCII alpha (`a` through `z`,
  6. * case insensitive).
  7. *
  8. * An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
  9. *
  10. * An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
  11. * to U+005A (`Z`).
  12. *
  13. * An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
  14. * to U+007A (`z`).
  15. *
  16. * @param code
  17. * Code.
  18. * @returns {boolean}
  19. * Whether it matches.
  20. */
  21. export const asciiAlpha = regexCheck(/[A-Za-z]/);
  22. /**
  23. * Check whether the character code represents an ASCII alphanumeric (`a`
  24. * through `z`, case insensitive, or `0` through `9`).
  25. *
  26. * An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
  27. * (see `asciiAlpha`).
  28. *
  29. * @param code
  30. * Code.
  31. * @returns {boolean}
  32. * Whether it matches.
  33. */
  34. export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/);
  35. /**
  36. * Check whether the character code represents an ASCII atext.
  37. *
  38. * atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
  39. * the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
  40. * U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
  41. * SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
  42. * CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
  43. * (`{`) to U+007E TILDE (`~`).
  44. *
  45. * See:
  46. * **\[RFC5322]**:
  47. * [Internet Message Format](https://tools.ietf.org/html/rfc5322).
  48. * P. Resnick.
  49. * IETF.
  50. *
  51. * @param code
  52. * Code.
  53. * @returns {boolean}
  54. * Whether it matches.
  55. */
  56. export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/);
  57. /**
  58. * Check whether a character code is an ASCII control character.
  59. *
  60. * An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
  61. * to U+001F (US), or U+007F (DEL).
  62. *
  63. * @param {Code} code
  64. * Code.
  65. * @returns {boolean}
  66. * Whether it matches.
  67. */
  68. export function asciiControl(code) {
  69. return (
  70. // Special whitespace codes (which have negative values), C0 and Control
  71. // character DEL
  72. code !== null && (code < 32 || code === 127)
  73. );
  74. }
  75. /**
  76. * Check whether the character code represents an ASCII digit (`0` through `9`).
  77. *
  78. * An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
  79. * U+0039 (`9`).
  80. *
  81. * @param code
  82. * Code.
  83. * @returns {boolean}
  84. * Whether it matches.
  85. */
  86. export const asciiDigit = regexCheck(/\d/);
  87. /**
  88. * Check whether the character code represents an ASCII hex digit (`a` through
  89. * `f`, case insensitive, or `0` through `9`).
  90. *
  91. * An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
  92. * digit, or an ASCII lower hex digit.
  93. *
  94. * An **ASCII upper hex digit** is a character in the inclusive range U+0041
  95. * (`A`) to U+0046 (`F`).
  96. *
  97. * An **ASCII lower hex digit** is a character in the inclusive range U+0061
  98. * (`a`) to U+0066 (`f`).
  99. *
  100. * @param code
  101. * Code.
  102. * @returns {boolean}
  103. * Whether it matches.
  104. */
  105. export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/);
  106. /**
  107. * Check whether the character code represents ASCII punctuation.
  108. *
  109. * An **ASCII punctuation** is a character in the inclusive ranges U+0021
  110. * EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
  111. * SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
  112. * (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
  113. *
  114. * @param code
  115. * Code.
  116. * @returns {boolean}
  117. * Whether it matches.
  118. */
  119. export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/);
  120. /**
  121. * Check whether a character code is a markdown line ending.
  122. *
  123. * A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
  124. * LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
  125. *
  126. * In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
  127. * RETURN (CR) are replaced by these virtual characters depending on whether
  128. * they occurred together.
  129. *
  130. * @param {Code} code
  131. * Code.
  132. * @returns {boolean}
  133. * Whether it matches.
  134. */
  135. export function markdownLineEnding(code) {
  136. return code !== null && code < -2;
  137. }
  138. /**
  139. * Check whether a character code is a markdown line ending (see
  140. * `markdownLineEnding`) or markdown space (see `markdownSpace`).
  141. *
  142. * @param {Code} code
  143. * Code.
  144. * @returns {boolean}
  145. * Whether it matches.
  146. */
  147. export function markdownLineEndingOrSpace(code) {
  148. return code !== null && (code < 0 || code === 32);
  149. }
  150. /**
  151. * Check whether a character code is a markdown space.
  152. *
  153. * A **markdown space** is the concrete character U+0020 SPACE (SP) and the
  154. * virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
  155. *
  156. * In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
  157. * replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
  158. * SPACE (VS) characters, depending on the column at which the tab occurred.
  159. *
  160. * @param {Code} code
  161. * Code.
  162. * @returns {boolean}
  163. * Whether it matches.
  164. */
  165. export function markdownSpace(code) {
  166. return code === -2 || code === -1 || code === 32;
  167. }
  168. // Size note: removing ASCII from the regex and using `asciiPunctuation` here
  169. // In fact adds to the bundle size.
  170. /**
  171. * Check whether the character code represents Unicode punctuation.
  172. *
  173. * A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
  174. * Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
  175. * (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
  176. * (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
  177. * punctuation (see `asciiPunctuation`).
  178. *
  179. * See:
  180. * **\[UNICODE]**:
  181. * [The Unicode Standard](https://www.unicode.org/versions/).
  182. * Unicode Consortium.
  183. *
  184. * @param code
  185. * Code.
  186. * @returns
  187. * Whether it matches.
  188. */
  189. export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
  190. /**
  191. * Check whether the character code represents Unicode whitespace.
  192. *
  193. * Note that this does handle micromark specific markdown whitespace characters.
  194. * See `markdownLineEndingOrSpace` to check that.
  195. *
  196. * A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
  197. * Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
  198. * U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
  199. *
  200. * See:
  201. * **\[UNICODE]**:
  202. * [The Unicode Standard](https://www.unicode.org/versions/).
  203. * Unicode Consortium.
  204. *
  205. * @param code
  206. * Code.
  207. * @returns
  208. * Whether it matches.
  209. */
  210. export const unicodeWhitespace = regexCheck(/\s/);
  211. /**
  212. * Create a code check from a regex.
  213. *
  214. * @param {RegExp} regex
  215. * @returns {(code: Code) => boolean}
  216. */
  217. function regexCheck(regex) {
  218. return check;
  219. /**
  220. * Check whether a code matches the bound regex.
  221. *
  222. * @param {Code} code
  223. * Character code.
  224. * @returns {boolean}
  225. * Whether the character code matches the bound regex.
  226. */
  227. function check(code) {
  228. return code !== null && code > -1 && regex.test(String.fromCharCode(code));
  229. }
  230. }