123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- const safeRegex = require("../");
- const REPETITION_LIMIT = 25;
- const REPETITION_TOO_MUCH = REPETITION_LIMIT + 1;
- // TODO Named character classes
- test("linear-time regexes are safe", () => {
- const linTime = [
- // regular RE's
- /a/,
- /a*/,
- /a+/,
- /a?/,
- /a{3,5}/,
- /a|b/,
- /(ab)/,
- /(ab)\1/,
- /\bOakland\b/,
- /(a+)|(b+)/,
- // RE's in a string
- "^foo/bar",
- // non-RE, non-string
- 1,
- ];
- linTime.forEach(re => {
- expect(safeRegex(re)).toBe(true);
- });
- });
- test("linear-time regexes are safe, under varying repetition limits", () => {
- const re1 = RegExp("a?".repeat(REPETITION_LIMIT) + "a".repeat(REPETITION_LIMIT));
- expect(safeRegex(re1)).toBe(true);
- const LOW_REPETITION_LIMIT = 3;
- const re2 = RegExp(Array(LOW_REPETITION_LIMIT).join("a?"));
- expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(true);
- });
- test("poly-time regexes are safe (at least according to our heuristics)", () => {
- const polyTime = [
- /^a+a+$/, // QOA
- /^a+aa+$/, // QOA with obvious intermediate run
- /^a+aaaa+$/, // QOA with obvious intermediate run
- /^a+[a-z]a+$/, // QOA with obvious intermediate run
- /^a+\wa+$/, // QOA with intermediate character class
- /^a+(\w|\d)a+$/, // QOA with valid path through
- /^a+b?a+$/, // QOA with valid path through
- /^a+(cde)*a+$/, // QOA with valid path through
- /^.*a*$/, // QOA by subset
- /^\w*\d*$/, // QOA by intersection
- /^\S+@\S+\.\S+$/, // Example from Django
- /a+$/, // QOA under partial-match
- /abc.*$/, // QOA under partial-match
- // TODO It would be nice to have one of the regexes that are poly-time even when they match, due to non-greedy quantifiers (p-NFA)
- ];
- polyTime.forEach(re => {
- expect(safeRegex(re)).toBe(true);
- });
- });
- test("exp-time regexes due to star height are unsafe", () => {
- const expTime = [
- // Straightforward star height
- /(a*)*$/,
- /(a?)*$/,
- /(a*)?$/,
- /(a*)+$/,
- /(a+)*$/,
- /(\wa+)*$/, // Prefix
- /(\..*)*$/, // Suffix
- // Branching and nesting.
- /(a*|b)+$/,
- /(a|b*)+$/,
- /(((b*)))+$/,
- /(((b*))+)$/,
- /(((b*)+))$/,
- /(((b)*)+)$/,
- /(((b)*))+$/,
- // Misc. more complex cases
- /^(a?){25}(a){25}$/,
- /(x+x+)+y/,
- /foo|(x+x+)+y/,
- /(a+){10}y/,
- /(a+){2}y/,
- /(.*){1,32000}[bc]/,
- // RE's in a string
- "(a+)+",
- ];
- expTime.forEach(re => {
- expect(safeRegex(re)).toBe(false);
- });
- });
- test("linear-time regexes with star height > 1", () => {
- // TODO These are false positives, Fix once we improve analysis
- const linTime = [
- /(ab*)+$/,
- /(b*a)+$/,
- ];
- linTime.forEach(re => {
- expect(safeRegex(re)).toBe(false);
- });
- });
- test("exp-time regexes due to disjunction are safe (according to current heuristics)", () => {
- // TODO These are false negatives. Fix once we improve analysis
- const expTime = [
- /(a|a)*$/, // QOD: obvious
- /(a|\w)*$/, // QOD due to overlap
- /([abc]|b)*$/, // QOD due to overlap
- /(\w\w\w|bab)*$/, // QOD due to overlap, with multi-step internal paths
- ];
- expTime.forEach(re => {
- expect(safeRegex(re)).toBe(true);
- });
- });
- test("regex that exceeds repetition limit is unsafe", () => {
- const re1 = RegExp("a?".repeat(REPETITION_TOO_MUCH) + "a".repeat(REPETITION_TOO_MUCH));
- expect(safeRegex(re1)).toBe(false);
- const LOW_REPETITION_LIMIT = 3;
- const re2 = RegExp("a?".repeat(LOW_REPETITION_LIMIT + 1));
- expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(false);
- });
- test("invalid regexes default to unsafe", () => {
- const invalid = [
- "(a+",
- "[a-z",
- "*Oakland*",
- "hey(yoo) )",
- "abcde(?>hellow)",
- "[abc",
- ];
- invalid.forEach(re => {
- expect(safeRegex(re)).toBe(false);
- });
- });
|