123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780 |
- /**
- * @typedef {import('micromark-util-types').Code} Code
- * @typedef {import('micromark-util-types').Construct} Construct
- * @typedef {import('micromark-util-types').State} State
- * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
- * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
- */
- import {factorySpace} from 'micromark-factory-space'
- import {
- asciiAlpha,
- asciiAlphanumeric,
- markdownLineEnding,
- markdownLineEndingOrSpace,
- markdownSpace
- } from 'micromark-util-character'
- import {codes, constants, types} from 'micromark-util-symbol'
- import {ok as assert} from 'devlop'
- /** @type {Construct} */
- export const htmlText = {name: 'htmlText', tokenize: tokenizeHtmlText}
- /**
- * @this {TokenizeContext}
- * @type {Tokenizer}
- */
- function tokenizeHtmlText(effects, ok, nok) {
- const self = this
- /** @type {NonNullable<Code> | undefined} */
- let marker
- /** @type {number} */
- let index
- /** @type {State} */
- let returnState
- return start
- /**
- * Start of HTML (text).
- *
- * ```markdown
- * > | a <b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function start(code) {
- assert(code === codes.lessThan, 'expected `<`')
- effects.enter(types.htmlText)
- effects.enter(types.htmlTextData)
- effects.consume(code)
- return open
- }
- /**
- * After `<`, at tag name or other stuff.
- *
- * ```markdown
- * > | a <b> c
- * ^
- * > | a <!doctype> c
- * ^
- * > | a <!--b--> c
- * ^
- * ```
- *
- * @type {State}
- */
- function open(code) {
- if (code === codes.exclamationMark) {
- effects.consume(code)
- return declarationOpen
- }
- if (code === codes.slash) {
- effects.consume(code)
- return tagCloseStart
- }
- if (code === codes.questionMark) {
- effects.consume(code)
- return instruction
- }
- // ASCII alphabetical.
- if (asciiAlpha(code)) {
- effects.consume(code)
- return tagOpen
- }
- return nok(code)
- }
- /**
- * After `<!`, at declaration, comment, or CDATA.
- *
- * ```markdown
- * > | a <!doctype> c
- * ^
- * > | a <!--b--> c
- * ^
- * > | a <![CDATA[>&<]]> c
- * ^
- * ```
- *
- * @type {State}
- */
- function declarationOpen(code) {
- if (code === codes.dash) {
- effects.consume(code)
- return commentOpenInside
- }
- if (code === codes.leftSquareBracket) {
- effects.consume(code)
- index = 0
- return cdataOpenInside
- }
- if (asciiAlpha(code)) {
- effects.consume(code)
- return declaration
- }
- return nok(code)
- }
- /**
- * In a comment, after `<!-`, at another `-`.
- *
- * ```markdown
- * > | a <!--b--> c
- * ^
- * ```
- *
- * @type {State}
- */
- function commentOpenInside(code) {
- if (code === codes.dash) {
- effects.consume(code)
- return commentEnd
- }
- return nok(code)
- }
- /**
- * In comment.
- *
- * ```markdown
- * > | a <!--b--> c
- * ^
- * ```
- *
- * @type {State}
- */
- function comment(code) {
- if (code === codes.eof) {
- return nok(code)
- }
- if (code === codes.dash) {
- effects.consume(code)
- return commentClose
- }
- if (markdownLineEnding(code)) {
- returnState = comment
- return lineEndingBefore(code)
- }
- effects.consume(code)
- return comment
- }
- /**
- * In comment, after `-`.
- *
- * ```markdown
- * > | a <!--b--> c
- * ^
- * ```
- *
- * @type {State}
- */
- function commentClose(code) {
- if (code === codes.dash) {
- effects.consume(code)
- return commentEnd
- }
- return comment(code)
- }
- /**
- * In comment, after `--`.
- *
- * ```markdown
- * > | a <!--b--> c
- * ^
- * ```
- *
- * @type {State}
- */
- function commentEnd(code) {
- return code === codes.greaterThan
- ? end(code)
- : code === codes.dash
- ? commentClose(code)
- : comment(code)
- }
- /**
- * After `<![`, in CDATA, expecting `CDATA[`.
- *
- * ```markdown
- * > | a <![CDATA[>&<]]> b
- * ^^^^^^
- * ```
- *
- * @type {State}
- */
- function cdataOpenInside(code) {
- const value = constants.cdataOpeningString
- if (code === value.charCodeAt(index++)) {
- effects.consume(code)
- return index === value.length ? cdata : cdataOpenInside
- }
- return nok(code)
- }
- /**
- * In CDATA.
- *
- * ```markdown
- * > | a <![CDATA[>&<]]> b
- * ^^^
- * ```
- *
- * @type {State}
- */
- function cdata(code) {
- if (code === codes.eof) {
- return nok(code)
- }
- if (code === codes.rightSquareBracket) {
- effects.consume(code)
- return cdataClose
- }
- if (markdownLineEnding(code)) {
- returnState = cdata
- return lineEndingBefore(code)
- }
- effects.consume(code)
- return cdata
- }
- /**
- * In CDATA, after `]`, at another `]`.
- *
- * ```markdown
- * > | a <![CDATA[>&<]]> b
- * ^
- * ```
- *
- * @type {State}
- */
- function cdataClose(code) {
- if (code === codes.rightSquareBracket) {
- effects.consume(code)
- return cdataEnd
- }
- return cdata(code)
- }
- /**
- * In CDATA, after `]]`, at `>`.
- *
- * ```markdown
- * > | a <![CDATA[>&<]]> b
- * ^
- * ```
- *
- * @type {State}
- */
- function cdataEnd(code) {
- if (code === codes.greaterThan) {
- return end(code)
- }
- if (code === codes.rightSquareBracket) {
- effects.consume(code)
- return cdataEnd
- }
- return cdata(code)
- }
- /**
- * In declaration.
- *
- * ```markdown
- * > | a <!b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function declaration(code) {
- if (code === codes.eof || code === codes.greaterThan) {
- return end(code)
- }
- if (markdownLineEnding(code)) {
- returnState = declaration
- return lineEndingBefore(code)
- }
- effects.consume(code)
- return declaration
- }
- /**
- * In instruction.
- *
- * ```markdown
- * > | a <?b?> c
- * ^
- * ```
- *
- * @type {State}
- */
- function instruction(code) {
- if (code === codes.eof) {
- return nok(code)
- }
- if (code === codes.questionMark) {
- effects.consume(code)
- return instructionClose
- }
- if (markdownLineEnding(code)) {
- returnState = instruction
- return lineEndingBefore(code)
- }
- effects.consume(code)
- return instruction
- }
- /**
- * In instruction, after `?`, at `>`.
- *
- * ```markdown
- * > | a <?b?> c
- * ^
- * ```
- *
- * @type {State}
- */
- function instructionClose(code) {
- return code === codes.greaterThan ? end(code) : instruction(code)
- }
- /**
- * After `</`, in closing tag, at tag name.
- *
- * ```markdown
- * > | a </b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function tagCloseStart(code) {
- // ASCII alphabetical.
- if (asciiAlpha(code)) {
- effects.consume(code)
- return tagClose
- }
- return nok(code)
- }
- /**
- * After `</x`, in a tag name.
- *
- * ```markdown
- * > | a </b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function tagClose(code) {
- // ASCII alphanumerical and `-`.
- if (code === codes.dash || asciiAlphanumeric(code)) {
- effects.consume(code)
- return tagClose
- }
- return tagCloseBetween(code)
- }
- /**
- * In closing tag, after tag name.
- *
- * ```markdown
- * > | a </b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function tagCloseBetween(code) {
- if (markdownLineEnding(code)) {
- returnState = tagCloseBetween
- return lineEndingBefore(code)
- }
- if (markdownSpace(code)) {
- effects.consume(code)
- return tagCloseBetween
- }
- return end(code)
- }
- /**
- * After `<x`, in opening tag name.
- *
- * ```markdown
- * > | a <b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpen(code) {
- // ASCII alphanumerical and `-`.
- if (code === codes.dash || asciiAlphanumeric(code)) {
- effects.consume(code)
- return tagOpen
- }
- if (
- code === codes.slash ||
- code === codes.greaterThan ||
- markdownLineEndingOrSpace(code)
- ) {
- return tagOpenBetween(code)
- }
- return nok(code)
- }
- /**
- * In opening tag, after tag name.
- *
- * ```markdown
- * > | a <b> c
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenBetween(code) {
- if (code === codes.slash) {
- effects.consume(code)
- return end
- }
- // ASCII alphabetical and `:` and `_`.
- if (code === codes.colon || code === codes.underscore || asciiAlpha(code)) {
- effects.consume(code)
- return tagOpenAttributeName
- }
- if (markdownLineEnding(code)) {
- returnState = tagOpenBetween
- return lineEndingBefore(code)
- }
- if (markdownSpace(code)) {
- effects.consume(code)
- return tagOpenBetween
- }
- return end(code)
- }
- /**
- * In attribute name.
- *
- * ```markdown
- * > | a <b c> d
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeName(code) {
- // ASCII alphabetical and `-`, `.`, `:`, and `_`.
- if (
- code === codes.dash ||
- code === codes.dot ||
- code === codes.colon ||
- code === codes.underscore ||
- asciiAlphanumeric(code)
- ) {
- effects.consume(code)
- return tagOpenAttributeName
- }
- return tagOpenAttributeNameAfter(code)
- }
- /**
- * After attribute name, before initializer, the end of the tag, or
- * whitespace.
- *
- * ```markdown
- * > | a <b c> d
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeNameAfter(code) {
- if (code === codes.equalsTo) {
- effects.consume(code)
- return tagOpenAttributeValueBefore
- }
- if (markdownLineEnding(code)) {
- returnState = tagOpenAttributeNameAfter
- return lineEndingBefore(code)
- }
- if (markdownSpace(code)) {
- effects.consume(code)
- return tagOpenAttributeNameAfter
- }
- return tagOpenBetween(code)
- }
- /**
- * Before unquoted, double quoted, or single quoted attribute value, allowing
- * whitespace.
- *
- * ```markdown
- * > | a <b c=d> e
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeValueBefore(code) {
- if (
- code === codes.eof ||
- code === codes.lessThan ||
- code === codes.equalsTo ||
- code === codes.greaterThan ||
- code === codes.graveAccent
- ) {
- return nok(code)
- }
- if (code === codes.quotationMark || code === codes.apostrophe) {
- effects.consume(code)
- marker = code
- return tagOpenAttributeValueQuoted
- }
- if (markdownLineEnding(code)) {
- returnState = tagOpenAttributeValueBefore
- return lineEndingBefore(code)
- }
- if (markdownSpace(code)) {
- effects.consume(code)
- return tagOpenAttributeValueBefore
- }
- effects.consume(code)
- return tagOpenAttributeValueUnquoted
- }
- /**
- * In double or single quoted attribute value.
- *
- * ```markdown
- * > | a <b c="d"> e
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeValueQuoted(code) {
- if (code === marker) {
- effects.consume(code)
- marker = undefined
- return tagOpenAttributeValueQuotedAfter
- }
- if (code === codes.eof) {
- return nok(code)
- }
- if (markdownLineEnding(code)) {
- returnState = tagOpenAttributeValueQuoted
- return lineEndingBefore(code)
- }
- effects.consume(code)
- return tagOpenAttributeValueQuoted
- }
- /**
- * In unquoted attribute value.
- *
- * ```markdown
- * > | a <b c=d> e
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeValueUnquoted(code) {
- if (
- code === codes.eof ||
- code === codes.quotationMark ||
- code === codes.apostrophe ||
- code === codes.lessThan ||
- code === codes.equalsTo ||
- code === codes.graveAccent
- ) {
- return nok(code)
- }
- if (
- code === codes.slash ||
- code === codes.greaterThan ||
- markdownLineEndingOrSpace(code)
- ) {
- return tagOpenBetween(code)
- }
- effects.consume(code)
- return tagOpenAttributeValueUnquoted
- }
- /**
- * After double or single quoted attribute value, before whitespace or the end
- * of the tag.
- *
- * ```markdown
- * > | a <b c="d"> e
- * ^
- * ```
- *
- * @type {State}
- */
- function tagOpenAttributeValueQuotedAfter(code) {
- if (
- code === codes.slash ||
- code === codes.greaterThan ||
- markdownLineEndingOrSpace(code)
- ) {
- return tagOpenBetween(code)
- }
- return nok(code)
- }
- /**
- * In certain circumstances of a tag where only an `>` is allowed.
- *
- * ```markdown
- * > | a <b c="d"> e
- * ^
- * ```
- *
- * @type {State}
- */
- function end(code) {
- if (code === codes.greaterThan) {
- effects.consume(code)
- effects.exit(types.htmlTextData)
- effects.exit(types.htmlText)
- return ok
- }
- return nok(code)
- }
- /**
- * At eol.
- *
- * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
- * > empty tokens.
- *
- * ```markdown
- * > | a <!--a
- * ^
- * | b-->
- * ```
- *
- * @type {State}
- */
- function lineEndingBefore(code) {
- assert(returnState, 'expected return state')
- assert(markdownLineEnding(code), 'expected eol')
- effects.exit(types.htmlTextData)
- effects.enter(types.lineEnding)
- effects.consume(code)
- effects.exit(types.lineEnding)
- return lineEndingAfter
- }
- /**
- * After eol, at optional whitespace.
- *
- * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
- * > empty tokens.
- *
- * ```markdown
- * | a <!--a
- * > | b-->
- * ^
- * ```
- *
- * @type {State}
- */
- function lineEndingAfter(code) {
- // Always populated by defaults.
- assert(
- self.parser.constructs.disable.null,
- 'expected `disable.null` to be populated'
- )
- return markdownSpace(code)
- ? factorySpace(
- effects,
- lineEndingAfterPrefix,
- types.linePrefix,
- self.parser.constructs.disable.null.includes('codeIndented')
- ? undefined
- : constants.tabSize
- )(code)
- : lineEndingAfterPrefix(code)
- }
- /**
- * After eol, after optional whitespace.
- *
- * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
- * > empty tokens.
- *
- * ```markdown
- * | a <!--a
- * > | b-->
- * ^
- * ```
- *
- * @type {State}
- */
- function lineEndingAfterPrefix(code) {
- effects.enter(types.htmlTextData)
- return returnState(code)
- }
- }
|