autolink.js 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /**
  2. * @typedef {import('micromark-util-types').Construct} Construct
  3. * @typedef {import('micromark-util-types').State} State
  4. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  5. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  6. */
  7. import {
  8. asciiAlpha,
  9. asciiAlphanumeric,
  10. asciiAtext,
  11. asciiControl
  12. } from 'micromark-util-character'
  13. import {codes, constants, types} from 'micromark-util-symbol'
  14. import {ok as assert} from 'devlop'
  15. /** @type {Construct} */
  16. export const autolink = {name: 'autolink', tokenize: tokenizeAutolink}
  17. /**
  18. * @this {TokenizeContext}
  19. * @type {Tokenizer}
  20. */
  21. function tokenizeAutolink(effects, ok, nok) {
  22. let size = 0
  23. return start
  24. /**
  25. * Start of an autolink.
  26. *
  27. * ```markdown
  28. * > | a<https://example.com>b
  29. * ^
  30. * > | a<user@example.com>b
  31. * ^
  32. * ```
  33. *
  34. * @type {State}
  35. */
  36. function start(code) {
  37. assert(code === codes.lessThan, 'expected `<`')
  38. effects.enter(types.autolink)
  39. effects.enter(types.autolinkMarker)
  40. effects.consume(code)
  41. effects.exit(types.autolinkMarker)
  42. effects.enter(types.autolinkProtocol)
  43. return open
  44. }
  45. /**
  46. * After `<`, at protocol or atext.
  47. *
  48. * ```markdown
  49. * > | a<https://example.com>b
  50. * ^
  51. * > | a<user@example.com>b
  52. * ^
  53. * ```
  54. *
  55. * @type {State}
  56. */
  57. function open(code) {
  58. if (asciiAlpha(code)) {
  59. effects.consume(code)
  60. return schemeOrEmailAtext
  61. }
  62. return emailAtext(code)
  63. }
  64. /**
  65. * At second byte of protocol or atext.
  66. *
  67. * ```markdown
  68. * > | a<https://example.com>b
  69. * ^
  70. * > | a<user@example.com>b
  71. * ^
  72. * ```
  73. *
  74. * @type {State}
  75. */
  76. function schemeOrEmailAtext(code) {
  77. // ASCII alphanumeric and `+`, `-`, and `.`.
  78. if (
  79. code === codes.plusSign ||
  80. code === codes.dash ||
  81. code === codes.dot ||
  82. asciiAlphanumeric(code)
  83. ) {
  84. // Count the previous alphabetical from `open` too.
  85. size = 1
  86. return schemeInsideOrEmailAtext(code)
  87. }
  88. return emailAtext(code)
  89. }
  90. /**
  91. * In ambiguous protocol or atext.
  92. *
  93. * ```markdown
  94. * > | a<https://example.com>b
  95. * ^
  96. * > | a<user@example.com>b
  97. * ^
  98. * ```
  99. *
  100. * @type {State}
  101. */
  102. function schemeInsideOrEmailAtext(code) {
  103. if (code === codes.colon) {
  104. effects.consume(code)
  105. size = 0
  106. return urlInside
  107. }
  108. // ASCII alphanumeric and `+`, `-`, and `.`.
  109. if (
  110. (code === codes.plusSign ||
  111. code === codes.dash ||
  112. code === codes.dot ||
  113. asciiAlphanumeric(code)) &&
  114. size++ < constants.autolinkSchemeSizeMax
  115. ) {
  116. effects.consume(code)
  117. return schemeInsideOrEmailAtext
  118. }
  119. size = 0
  120. return emailAtext(code)
  121. }
  122. /**
  123. * After protocol, in URL.
  124. *
  125. * ```markdown
  126. * > | a<https://example.com>b
  127. * ^
  128. * ```
  129. *
  130. * @type {State}
  131. */
  132. function urlInside(code) {
  133. if (code === codes.greaterThan) {
  134. effects.exit(types.autolinkProtocol)
  135. effects.enter(types.autolinkMarker)
  136. effects.consume(code)
  137. effects.exit(types.autolinkMarker)
  138. effects.exit(types.autolink)
  139. return ok
  140. }
  141. // ASCII control, space, or `<`.
  142. if (
  143. code === codes.eof ||
  144. code === codes.space ||
  145. code === codes.lessThan ||
  146. asciiControl(code)
  147. ) {
  148. return nok(code)
  149. }
  150. effects.consume(code)
  151. return urlInside
  152. }
  153. /**
  154. * In email atext.
  155. *
  156. * ```markdown
  157. * > | a<user.name@example.com>b
  158. * ^
  159. * ```
  160. *
  161. * @type {State}
  162. */
  163. function emailAtext(code) {
  164. if (code === codes.atSign) {
  165. effects.consume(code)
  166. return emailAtSignOrDot
  167. }
  168. if (asciiAtext(code)) {
  169. effects.consume(code)
  170. return emailAtext
  171. }
  172. return nok(code)
  173. }
  174. /**
  175. * In label, after at-sign or dot.
  176. *
  177. * ```markdown
  178. * > | a<user.name@example.com>b
  179. * ^ ^
  180. * ```
  181. *
  182. * @type {State}
  183. */
  184. function emailAtSignOrDot(code) {
  185. return asciiAlphanumeric(code) ? emailLabel(code) : nok(code)
  186. }
  187. /**
  188. * In label, where `.` and `>` are allowed.
  189. *
  190. * ```markdown
  191. * > | a<user.name@example.com>b
  192. * ^
  193. * ```
  194. *
  195. * @type {State}
  196. */
  197. function emailLabel(code) {
  198. if (code === codes.dot) {
  199. effects.consume(code)
  200. size = 0
  201. return emailAtSignOrDot
  202. }
  203. if (code === codes.greaterThan) {
  204. // Exit, then change the token type.
  205. effects.exit(types.autolinkProtocol).type = types.autolinkEmail
  206. effects.enter(types.autolinkMarker)
  207. effects.consume(code)
  208. effects.exit(types.autolinkMarker)
  209. effects.exit(types.autolink)
  210. return ok
  211. }
  212. return emailValue(code)
  213. }
  214. /**
  215. * In label, where `.` and `>` are *not* allowed.
  216. *
  217. * Though, this is also used in `emailLabel` to parse other values.
  218. *
  219. * ```markdown
  220. * > | a<user.name@ex-ample.com>b
  221. * ^
  222. * ```
  223. *
  224. * @type {State}
  225. */
  226. function emailValue(code) {
  227. // ASCII alphanumeric or `-`.
  228. if (
  229. (code === codes.dash || asciiAlphanumeric(code)) &&
  230. size++ < constants.autolinkDomainSizeMax
  231. ) {
  232. const next = code === codes.dash ? emailValue : emailLabel
  233. effects.consume(code)
  234. return next
  235. }
  236. return nok(code)
  237. }
  238. }