| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 | /** * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */import {decodeNamedCharacterReference} from 'decode-named-character-reference'import {  asciiAlphanumeric,  asciiDigit,  asciiHexDigit} from 'micromark-util-character'import {codes, constants, types} from 'micromark-util-symbol'import {ok as assert} from 'devlop'/** @type {Construct} */export const characterReference = {  name: 'characterReference',  tokenize: tokenizeCharacterReference}/** * @this {TokenizeContext} * @type {Tokenizer} */function tokenizeCharacterReference(effects, ok, nok) {  const self = this  let size = 0  /** @type {number} */  let max  /** @type {(code: Code) => boolean} */  let test  return start  /**   * Start of character reference.   *   * ```markdown   * > | a&b   *      ^   * > | a{b   *      ^   * > | a	b   *      ^   * ```   *   * @type {State}   */  function start(code) {    assert(code === codes.ampersand, 'expected `&`')    effects.enter(types.characterReference)    effects.enter(types.characterReferenceMarker)    effects.consume(code)    effects.exit(types.characterReferenceMarker)    return open  }  /**   * After `&`, at `#` for numeric references or alphanumeric for named   * references.   *   * ```markdown   * > | a&b   *       ^   * > | a{b   *       ^   * > | a	b   *       ^   * ```   *   * @type {State}   */  function open(code) {    if (code === codes.numberSign) {      effects.enter(types.characterReferenceMarkerNumeric)      effects.consume(code)      effects.exit(types.characterReferenceMarkerNumeric)      return numeric    }    effects.enter(types.characterReferenceValue)    max = constants.characterReferenceNamedSizeMax    test = asciiAlphanumeric    return value(code)  }  /**   * After `#`, at `x` for hexadecimals or digit for decimals.   *   * ```markdown   * > | a{b   *        ^   * > | a	b   *        ^   * ```   *   * @type {State}   */  function numeric(code) {    if (code === codes.uppercaseX || code === codes.lowercaseX) {      effects.enter(types.characterReferenceMarkerHexadecimal)      effects.consume(code)      effects.exit(types.characterReferenceMarkerHexadecimal)      effects.enter(types.characterReferenceValue)      max = constants.characterReferenceHexadecimalSizeMax      test = asciiHexDigit      return value    }    effects.enter(types.characterReferenceValue)    max = constants.characterReferenceDecimalSizeMax    test = asciiDigit    return value(code)  }  /**   * After markers (`&#x`, `&#`, or `&`), in value, before `;`.   *   * The character reference kind defines what and how many characters are   * allowed.   *   * ```markdown   * > | a&b   *       ^^^   * > | a{b   *        ^^^   * > | a	b   *         ^   * ```   *   * @type {State}   */  function value(code) {    if (code === codes.semicolon && size) {      const token = effects.exit(types.characterReferenceValue)      if (        test === asciiAlphanumeric &&        !decodeNamedCharacterReference(self.sliceSerialize(token))      ) {        return nok(code)      }      // To do: `markdown-rs` uses a different name:      // `CharacterReferenceMarkerSemi`.      effects.enter(types.characterReferenceMarker)      effects.consume(code)      effects.exit(types.characterReferenceMarker)      effects.exit(types.characterReference)      return ok    }    if (test(code) && size++ < max) {      effects.consume(code)      return value    }    return nok(code)  }}
 |