autolink.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /**
  2. * @typedef {import('micromark-util-types').Construct} Construct
  3. * @typedef {import('micromark-util-types').State} State
  4. * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
  5. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  6. */
  7. import {
  8. asciiAlpha,
  9. asciiAlphanumeric,
  10. asciiAtext,
  11. asciiControl
  12. } from 'micromark-util-character'
  13. /** @type {Construct} */
  14. export const autolink = {
  15. name: 'autolink',
  16. tokenize: tokenizeAutolink
  17. }
  18. /**
  19. * @this {TokenizeContext}
  20. * @type {Tokenizer}
  21. */
  22. function tokenizeAutolink(effects, ok, nok) {
  23. let size = 0
  24. return start
  25. /**
  26. * Start of an autolink.
  27. *
  28. * ```markdown
  29. * > | a<https://example.com>b
  30. * ^
  31. * > | a<user@example.com>b
  32. * ^
  33. * ```
  34. *
  35. * @type {State}
  36. */
  37. function start(code) {
  38. effects.enter('autolink')
  39. effects.enter('autolinkMarker')
  40. effects.consume(code)
  41. effects.exit('autolinkMarker')
  42. effects.enter('autolinkProtocol')
  43. return open
  44. }
  45. /**
  46. * After `<`, at protocol or atext.
  47. *
  48. * ```markdown
  49. * > | a<https://example.com>b
  50. * ^
  51. * > | a<user@example.com>b
  52. * ^
  53. * ```
  54. *
  55. * @type {State}
  56. */
  57. function open(code) {
  58. if (asciiAlpha(code)) {
  59. effects.consume(code)
  60. return schemeOrEmailAtext
  61. }
  62. return emailAtext(code)
  63. }
  64. /**
  65. * At second byte of protocol or atext.
  66. *
  67. * ```markdown
  68. * > | a<https://example.com>b
  69. * ^
  70. * > | a<user@example.com>b
  71. * ^
  72. * ```
  73. *
  74. * @type {State}
  75. */
  76. function schemeOrEmailAtext(code) {
  77. // ASCII alphanumeric and `+`, `-`, and `.`.
  78. if (code === 43 || code === 45 || code === 46 || asciiAlphanumeric(code)) {
  79. // Count the previous alphabetical from `open` too.
  80. size = 1
  81. return schemeInsideOrEmailAtext(code)
  82. }
  83. return emailAtext(code)
  84. }
  85. /**
  86. * In ambiguous protocol or atext.
  87. *
  88. * ```markdown
  89. * > | a<https://example.com>b
  90. * ^
  91. * > | a<user@example.com>b
  92. * ^
  93. * ```
  94. *
  95. * @type {State}
  96. */
  97. function schemeInsideOrEmailAtext(code) {
  98. if (code === 58) {
  99. effects.consume(code)
  100. size = 0
  101. return urlInside
  102. }
  103. // ASCII alphanumeric and `+`, `-`, and `.`.
  104. if (
  105. (code === 43 || code === 45 || code === 46 || asciiAlphanumeric(code)) &&
  106. size++ < 32
  107. ) {
  108. effects.consume(code)
  109. return schemeInsideOrEmailAtext
  110. }
  111. size = 0
  112. return emailAtext(code)
  113. }
  114. /**
  115. * After protocol, in URL.
  116. *
  117. * ```markdown
  118. * > | a<https://example.com>b
  119. * ^
  120. * ```
  121. *
  122. * @type {State}
  123. */
  124. function urlInside(code) {
  125. if (code === 62) {
  126. effects.exit('autolinkProtocol')
  127. effects.enter('autolinkMarker')
  128. effects.consume(code)
  129. effects.exit('autolinkMarker')
  130. effects.exit('autolink')
  131. return ok
  132. }
  133. // ASCII control, space, or `<`.
  134. if (code === null || code === 32 || code === 60 || asciiControl(code)) {
  135. return nok(code)
  136. }
  137. effects.consume(code)
  138. return urlInside
  139. }
  140. /**
  141. * In email atext.
  142. *
  143. * ```markdown
  144. * > | a<user.name@example.com>b
  145. * ^
  146. * ```
  147. *
  148. * @type {State}
  149. */
  150. function emailAtext(code) {
  151. if (code === 64) {
  152. effects.consume(code)
  153. return emailAtSignOrDot
  154. }
  155. if (asciiAtext(code)) {
  156. effects.consume(code)
  157. return emailAtext
  158. }
  159. return nok(code)
  160. }
  161. /**
  162. * In label, after at-sign or dot.
  163. *
  164. * ```markdown
  165. * > | a<user.name@example.com>b
  166. * ^ ^
  167. * ```
  168. *
  169. * @type {State}
  170. */
  171. function emailAtSignOrDot(code) {
  172. return asciiAlphanumeric(code) ? emailLabel(code) : nok(code)
  173. }
  174. /**
  175. * In label, where `.` and `>` are allowed.
  176. *
  177. * ```markdown
  178. * > | a<user.name@example.com>b
  179. * ^
  180. * ```
  181. *
  182. * @type {State}
  183. */
  184. function emailLabel(code) {
  185. if (code === 46) {
  186. effects.consume(code)
  187. size = 0
  188. return emailAtSignOrDot
  189. }
  190. if (code === 62) {
  191. // Exit, then change the token type.
  192. effects.exit('autolinkProtocol').type = 'autolinkEmail'
  193. effects.enter('autolinkMarker')
  194. effects.consume(code)
  195. effects.exit('autolinkMarker')
  196. effects.exit('autolink')
  197. return ok
  198. }
  199. return emailValue(code)
  200. }
  201. /**
  202. * In label, where `.` and `>` are *not* allowed.
  203. *
  204. * Though, this is also used in `emailLabel` to parse other values.
  205. *
  206. * ```markdown
  207. * > | a<user.name@ex-ample.com>b
  208. * ^
  209. * ```
  210. *
  211. * @type {State}
  212. */
  213. function emailValue(code) {
  214. // ASCII alphanumeric or `-`.
  215. if ((code === 45 || asciiAlphanumeric(code)) && size++ < 63) {
  216. const next = code === 45 ? emailValue : emailLabel
  217. effects.consume(code)
  218. return next
  219. }
  220. return nok(code)
  221. }
  222. }