123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import {asciiAlphanumeric} from 'micromark-util-character'
- import {encode} from 'micromark-util-encode'
- /**
- * Make a value safe for injection as a URL.
- *
- * This encodes unsafe characters with percent-encoding and skips already
- * encoded sequences (see `normalizeUri`).
- * Further unsafe characters are encoded as character references (see
- * `micromark-util-encode`).
- *
- * A regex of allowed protocols can be given, in which case the URL is
- * sanitized.
- * For example, `/^(https?|ircs?|mailto|xmpp)$/i` can be used for `a[href]`, or
- * `/^https?$/i` for `img[src]` (this is what `github.com` allows).
- * If the URL includes an unknown protocol (one not matched by `protocol`, such
- * as a dangerous example, `javascript:`), the value is ignored.
- *
- * @param {string | null | undefined} url
- * URI to sanitize.
- * @param {RegExp | null | undefined} [protocol]
- * Allowed protocols.
- * @returns {string}
- * Sanitized URI.
- */
- export function sanitizeUri(url, protocol) {
- const value = encode(normalizeUri(url || ''))
- if (!protocol) {
- return value
- }
- const colon = value.indexOf(':')
- const questionMark = value.indexOf('?')
- const numberSign = value.indexOf('#')
- const slash = value.indexOf('/')
- if (
- // If there is no protocol, it’s relative.
- colon < 0 ||
- // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
- (slash > -1 && colon > slash) ||
- (questionMark > -1 && colon > questionMark) ||
- (numberSign > -1 && colon > numberSign) ||
- // It is a protocol, it should be allowed.
- protocol.test(value.slice(0, colon))
- ) {
- return value
- }
- return ''
- }
- /**
- * Normalize a URL.
- *
- * Encode unsafe characters with percent-encoding, skipping already encoded
- * sequences.
- *
- * @param {string} value
- * URI to normalize.
- * @returns {string}
- * Normalized URI.
- */
- export function normalizeUri(value) {
- /** @type {Array<string>} */
- const result = []
- let index = -1
- let start = 0
- let skip = 0
- while (++index < value.length) {
- const code = value.charCodeAt(index)
- /** @type {string} */
- let replace = ''
- // A correct percent encoded value.
- if (
- code === 37 &&
- asciiAlphanumeric(value.charCodeAt(index + 1)) &&
- asciiAlphanumeric(value.charCodeAt(index + 2))
- ) {
- skip = 2
- }
- // ASCII.
- else if (code < 128) {
- if (!/[!#$&-;=?-Z_a-z~]/.test(String.fromCharCode(code))) {
- replace = String.fromCharCode(code)
- }
- }
- // Astral.
- else if (code > 55_295 && code < 57_344) {
- const next = value.charCodeAt(index + 1)
- // A correct surrogate pair.
- if (code < 56_320 && next > 56_319 && next < 57_344) {
- replace = String.fromCharCode(code, next)
- skip = 1
- }
- // Lone surrogate.
- else {
- replace = '\uFFFD'
- }
- }
- // Unicode.
- else {
- replace = String.fromCharCode(code)
- }
- if (replace) {
- result.push(value.slice(start, index), encodeURIComponent(replace))
- start = index + skip + 1
- replace = ''
- }
- if (skip) {
- index += skip
- skip = 0
- }
- }
- return result.join('') + value.slice(start)
- }
|