regex.spec.js 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. const safeRegex = require("../");
  2. const REPETITION_LIMIT = 25;
  3. const REPETITION_TOO_MUCH = REPETITION_LIMIT + 1;
  4. // TODO Named character classes
  5. test("linear-time regexes are safe", () => {
  6. const linTime = [
  7. // regular RE's
  8. /a/,
  9. /a*/,
  10. /a+/,
  11. /a?/,
  12. /a{3,5}/,
  13. /a|b/,
  14. /(ab)/,
  15. /(ab)\1/,
  16. /\bOakland\b/,
  17. /(a+)|(b+)/,
  18. // RE's in a string
  19. "^foo/bar",
  20. // non-RE, non-string
  21. 1,
  22. ];
  23. linTime.forEach(re => {
  24. expect(safeRegex(re)).toBe(true);
  25. });
  26. });
  27. test("linear-time regexes are safe, under varying repetition limits", () => {
  28. const re1 = RegExp("a?".repeat(REPETITION_LIMIT) + "a".repeat(REPETITION_LIMIT));
  29. expect(safeRegex(re1)).toBe(true);
  30. const LOW_REPETITION_LIMIT = 3;
  31. const re2 = RegExp(Array(LOW_REPETITION_LIMIT).join("a?"));
  32. expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(true);
  33. });
  34. test("poly-time regexes are safe (at least according to our heuristics)", () => {
  35. const polyTime = [
  36. /^a+a+$/, // QOA
  37. /^a+aa+$/, // QOA with obvious intermediate run
  38. /^a+aaaa+$/, // QOA with obvious intermediate run
  39. /^a+[a-z]a+$/, // QOA with obvious intermediate run
  40. /^a+\wa+$/, // QOA with intermediate character class
  41. /^a+(\w|\d)a+$/, // QOA with valid path through
  42. /^a+b?a+$/, // QOA with valid path through
  43. /^a+(cde)*a+$/, // QOA with valid path through
  44. /^.*a*$/, // QOA by subset
  45. /^\w*\d*$/, // QOA by intersection
  46. /^\S+@\S+\.\S+$/, // Example from Django
  47. /a+$/, // QOA under partial-match
  48. /abc.*$/, // QOA under partial-match
  49. // TODO It would be nice to have one of the regexes that are poly-time even when they match, due to non-greedy quantifiers (p-NFA)
  50. ];
  51. polyTime.forEach(re => {
  52. expect(safeRegex(re)).toBe(true);
  53. });
  54. });
  55. test("exp-time regexes due to star height are unsafe", () => {
  56. const expTime = [
  57. // Straightforward star height
  58. /(a*)*$/,
  59. /(a?)*$/,
  60. /(a*)?$/,
  61. /(a*)+$/,
  62. /(a+)*$/,
  63. /(\wa+)*$/, // Prefix
  64. /(\..*)*$/, // Suffix
  65. // Branching and nesting.
  66. /(a*|b)+$/,
  67. /(a|b*)+$/,
  68. /(((b*)))+$/,
  69. /(((b*))+)$/,
  70. /(((b*)+))$/,
  71. /(((b)*)+)$/,
  72. /(((b)*))+$/,
  73. // Misc. more complex cases
  74. /^(a?){25}(a){25}$/,
  75. /(x+x+)+y/,
  76. /foo|(x+x+)+y/,
  77. /(a+){10}y/,
  78. /(a+){2}y/,
  79. /(.*){1,32000}[bc]/,
  80. // RE's in a string
  81. "(a+)+",
  82. ];
  83. expTime.forEach(re => {
  84. expect(safeRegex(re)).toBe(false);
  85. });
  86. });
  87. test("linear-time regexes with star height > 1", () => {
  88. // TODO These are false positives, Fix once we improve analysis
  89. const linTime = [
  90. /(ab*)+$/,
  91. /(b*a)+$/,
  92. ];
  93. linTime.forEach(re => {
  94. expect(safeRegex(re)).toBe(false);
  95. });
  96. });
  97. test("exp-time regexes due to disjunction are safe (according to current heuristics)", () => {
  98. // TODO These are false negatives. Fix once we improve analysis
  99. const expTime = [
  100. /(a|a)*$/, // QOD: obvious
  101. /(a|\w)*$/, // QOD due to overlap
  102. /([abc]|b)*$/, // QOD due to overlap
  103. /(\w\w\w|bab)*$/, // QOD due to overlap, with multi-step internal paths
  104. ];
  105. expTime.forEach(re => {
  106. expect(safeRegex(re)).toBe(true);
  107. });
  108. });
  109. test("regex that exceeds repetition limit is unsafe", () => {
  110. const re1 = RegExp("a?".repeat(REPETITION_TOO_MUCH) + "a".repeat(REPETITION_TOO_MUCH));
  111. expect(safeRegex(re1)).toBe(false);
  112. const LOW_REPETITION_LIMIT = 3;
  113. const re2 = RegExp("a?".repeat(LOW_REPETITION_LIMIT + 1));
  114. expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(false);
  115. });
  116. test("invalid regexes default to unsafe", () => {
  117. const invalid = [
  118. "(a+",
  119. "[a-z",
  120. "*Oakland*",
  121. "hey(yoo) )",
  122. "abcde(?>hellow)",
  123. "[abc",
  124. ];
  125. invalid.forEach(re => {
  126. expect(safeRegex(re)).toBe(false);
  127. });
  128. });