123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- /**
- * @typedef {import('micromark-util-types').Code} Code
- */
- /**
- * Check whether the character code represents an ASCII alpha (`a` through `z`,
- * case insensitive).
- *
- * An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
- *
- * An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
- * to U+005A (`Z`).
- *
- * An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
- * to U+007A (`z`).
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiAlpha = regexCheck(/[A-Za-z]/);
- /**
- * Check whether the character code represents an ASCII alphanumeric (`a`
- * through `z`, case insensitive, or `0` through `9`).
- *
- * An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
- * (see `asciiAlpha`).
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/);
- /**
- * Check whether the character code represents an ASCII atext.
- *
- * atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
- * the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
- * U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
- * SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
- * CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
- * (`{`) to U+007E TILDE (`~`).
- *
- * See:
- * **\[RFC5322]**:
- * [Internet Message Format](https://tools.ietf.org/html/rfc5322).
- * P. Resnick.
- * IETF.
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/);
- /**
- * Check whether a character code is an ASCII control character.
- *
- * An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
- * to U+001F (US), or U+007F (DEL).
- *
- * @param {Code} code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export function asciiControl(code) {
- return (
- // Special whitespace codes (which have negative values), C0 and Control
- // character DEL
- code !== null && (code < 32 || code === 127)
- );
- }
- /**
- * Check whether the character code represents an ASCII digit (`0` through `9`).
- *
- * An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
- * U+0039 (`9`).
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiDigit = regexCheck(/\d/);
- /**
- * Check whether the character code represents an ASCII hex digit (`a` through
- * `f`, case insensitive, or `0` through `9`).
- *
- * An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
- * digit, or an ASCII lower hex digit.
- *
- * An **ASCII upper hex digit** is a character in the inclusive range U+0041
- * (`A`) to U+0046 (`F`).
- *
- * An **ASCII lower hex digit** is a character in the inclusive range U+0061
- * (`a`) to U+0066 (`f`).
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/);
- /**
- * Check whether the character code represents ASCII punctuation.
- *
- * An **ASCII punctuation** is a character in the inclusive ranges U+0021
- * EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
- * SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
- * (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
- *
- * @param code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/);
- /**
- * Check whether a character code is a markdown line ending.
- *
- * A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
- * LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
- *
- * In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
- * RETURN (CR) are replaced by these virtual characters depending on whether
- * they occurred together.
- *
- * @param {Code} code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export function markdownLineEnding(code) {
- return code !== null && code < -2;
- }
- /**
- * Check whether a character code is a markdown line ending (see
- * `markdownLineEnding`) or markdown space (see `markdownSpace`).
- *
- * @param {Code} code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export function markdownLineEndingOrSpace(code) {
- return code !== null && (code < 0 || code === 32);
- }
- /**
- * Check whether a character code is a markdown space.
- *
- * A **markdown space** is the concrete character U+0020 SPACE (SP) and the
- * virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
- *
- * In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
- * replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
- * SPACE (VS) characters, depending on the column at which the tab occurred.
- *
- * @param {Code} code
- * Code.
- * @returns {boolean}
- * Whether it matches.
- */
- export function markdownSpace(code) {
- return code === -2 || code === -1 || code === 32;
- }
- // Size note: removing ASCII from the regex and using `asciiPunctuation` here
- // In fact adds to the bundle size.
- /**
- * Check whether the character code represents Unicode punctuation.
- *
- * A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
- * Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
- * (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
- * (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
- * punctuation (see `asciiPunctuation`).
- *
- * See:
- * **\[UNICODE]**:
- * [The Unicode Standard](https://www.unicode.org/versions/).
- * Unicode Consortium.
- *
- * @param code
- * Code.
- * @returns
- * Whether it matches.
- */
- export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
- /**
- * Check whether the character code represents Unicode whitespace.
- *
- * Note that this does handle micromark specific markdown whitespace characters.
- * See `markdownLineEndingOrSpace` to check that.
- *
- * A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
- * Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
- * U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
- *
- * See:
- * **\[UNICODE]**:
- * [The Unicode Standard](https://www.unicode.org/versions/).
- * Unicode Consortium.
- *
- * @param code
- * Code.
- * @returns
- * Whether it matches.
- */
- export const unicodeWhitespace = regexCheck(/\s/);
- /**
- * Create a code check from a regex.
- *
- * @param {RegExp} regex
- * @returns {(code: Code) => boolean}
- */
- function regexCheck(regex) {
- return check;
- /**
- * Check whether a code matches the bound regex.
- *
- * @param {Code} code
- * Character code.
- * @returns {boolean}
- * Whether the character code matches the bound regex.
- */
- function check(code) {
- return code !== null && code > -1 && regex.test(String.fromCharCode(code));
- }
- }
|