123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- /**
- * @typedef {import('micromark-util-types').Code} Code
- * @typedef {import('micromark-util-types').Construct} Construct
- * @typedef {import('micromark-util-types').State} State
- * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
- * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
- */
- import {decodeNamedCharacterReference} from 'decode-named-character-reference'
- import {
- asciiAlphanumeric,
- asciiDigit,
- asciiHexDigit
- } from 'micromark-util-character'
- import {codes, constants, types} from 'micromark-util-symbol'
- import {ok as assert} from 'devlop'
- /** @type {Construct} */
- export const characterReference = {
- name: 'characterReference',
- tokenize: tokenizeCharacterReference
- }
- /**
- * @this {TokenizeContext}
- * @type {Tokenizer}
- */
- function tokenizeCharacterReference(effects, ok, nok) {
- const self = this
- let size = 0
- /** @type {number} */
- let max
- /** @type {(code: Code) => boolean} */
- let test
- return start
- /**
- * Start of character reference.
- *
- * ```markdown
- * > | a&b
- * ^
- * > | a{b
- * ^
- * > | a	b
- * ^
- * ```
- *
- * @type {State}
- */
- function start(code) {
- assert(code === codes.ampersand, 'expected `&`')
- effects.enter(types.characterReference)
- effects.enter(types.characterReferenceMarker)
- effects.consume(code)
- effects.exit(types.characterReferenceMarker)
- return open
- }
- /**
- * After `&`, at `#` for numeric references or alphanumeric for named
- * references.
- *
- * ```markdown
- * > | a&b
- * ^
- * > | a{b
- * ^
- * > | a	b
- * ^
- * ```
- *
- * @type {State}
- */
- function open(code) {
- if (code === codes.numberSign) {
- effects.enter(types.characterReferenceMarkerNumeric)
- effects.consume(code)
- effects.exit(types.characterReferenceMarkerNumeric)
- return numeric
- }
- effects.enter(types.characterReferenceValue)
- max = constants.characterReferenceNamedSizeMax
- test = asciiAlphanumeric
- return value(code)
- }
- /**
- * After `#`, at `x` for hexadecimals or digit for decimals.
- *
- * ```markdown
- * > | a{b
- * ^
- * > | a	b
- * ^
- * ```
- *
- * @type {State}
- */
- function numeric(code) {
- if (code === codes.uppercaseX || code === codes.lowercaseX) {
- effects.enter(types.characterReferenceMarkerHexadecimal)
- effects.consume(code)
- effects.exit(types.characterReferenceMarkerHexadecimal)
- effects.enter(types.characterReferenceValue)
- max = constants.characterReferenceHexadecimalSizeMax
- test = asciiHexDigit
- return value
- }
- effects.enter(types.characterReferenceValue)
- max = constants.characterReferenceDecimalSizeMax
- test = asciiDigit
- return value(code)
- }
- /**
- * After markers (`&#x`, `&#`, or `&`), in value, before `;`.
- *
- * The character reference kind defines what and how many characters are
- * allowed.
- *
- * ```markdown
- * > | a&b
- * ^^^
- * > | a{b
- * ^^^
- * > | a	b
- * ^
- * ```
- *
- * @type {State}
- */
- function value(code) {
- if (code === codes.semicolon && size) {
- const token = effects.exit(types.characterReferenceValue)
- if (
- test === asciiAlphanumeric &&
- !decodeNamedCharacterReference(self.sliceSerialize(token))
- ) {
- return nok(code)
- }
- // To do: `markdown-rs` uses a different name:
- // `CharacterReferenceMarkerSemi`.
- effects.enter(types.characterReferenceMarker)
- effects.consume(code)
- effects.exit(types.characterReferenceMarker)
- effects.exit(types.characterReference)
- return ok
- }
- if (test(code) && size++ < max) {
- effects.consume(code)
- return value
- }
- return nok(code)
- }
- }
|