mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
381 lines
8.7 KiB
Ruby
381 lines
8.7 KiB
Ruby
# NOT GOOD; attack: "_" + "__".repeat(100)
|
|
# Adapted from marked (https://github.com/markedjs/marked), which is licensed
|
|
# under the MIT license; see file marked-LICENSE.
|
|
bad1 = /^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)/
|
|
|
|
# GOOD
|
|
# Adapted from marked (https://github.com/markedjs/marked), which is licensed
|
|
# under the MIT license; see file marked-LICENSE.
|
|
good1 = /^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)/
|
|
|
|
# GOOD - there is no witness in the end that could cause the regexp to not match
|
|
# Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
|
|
# which is licensed under the MIT license; see file brace-expansion-LICENSE.
|
|
good2 = /(.*,)+.+/
|
|
|
|
# NOT GOOD; attack: " '" + "\\\\".repeat(100)
|
|
# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
|
|
# which is licensed under the MIT license; see file CodeMirror-LICENSE.
|
|
bad2 = /^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?/
|
|
|
|
# GOOD
|
|
# Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
|
|
good2 = /\(\*(?:[\s\S]*?\(\*[\s\S]*?\*\))*[\s\S]*?\*\)/
|
|
|
|
# GOOD
|
|
# Adapted from jest (https://github.com/facebook/jest), which is licensed
|
|
# under the MIT license; see file jest-LICENSE.
|
|
good3 = /^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/
|
|
|
|
# NOT GOOD, variant of good3; attack: "a|\n:|\n" + "||\n".repeat(100)
|
|
bad4 = /^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)a/
|
|
|
|
# NOT GOOD; attack: "/" + "\\/a".repeat(100)
|
|
# Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
|
|
# which is licensed under the Apache License 2.0; see file ANodeBlog-LICENSE.
|
|
bad5 = /\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/
|
|
|
|
# NOT GOOD; attack: "##".repeat(100) + "\na"
|
|
# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
|
|
# which is licensed under the MIT license; see file CodeMirror-LICENSE.
|
|
bad6 = /^([\s\[\{\(]|#.*)*$/
|
|
|
|
# GOOD
|
|
good4 = /(\r\n|\r|\n)+/
|
|
|
|
# BAD - PoC: `node -e "/((?:[^\"\']|\".*?\"|\'.*?\')*?)([(,)]|$)/.test(\"'''''''''''''''''''''''''''''''''''''''''''''\\\"\");"`. It's complicated though, because the regexp still matches something, it just matches the empty-string after the attack string.
|
|
actuallyBad = /((?:[^"']|".*?"|'.*?')*?)([(,)]|$)/
|
|
|
|
# NOT GOOD; attack: "a" + "[]".repeat(100) + ".b\n"
|
|
# Adapted from Knockout (https://github.com/knockout/knockout), which is
|
|
# licensed under the MIT license; see file knockout-LICENSE
|
|
bad6 = /^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$/i
|
|
|
|
# GOOD
|
|
good6 = /(a|.)*/
|
|
|
|
# Testing the NFA - only some of the below are detected.
|
|
bad7 = /^([a-z]+)+$/
|
|
bad8 = /^([a-z]*)*$/
|
|
bad9 = /^([a-zA-Z0-9])(([\\.-]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$/
|
|
bad10 = /^(([a-z])+.)+[A-Z]([a-z])+$/
|
|
|
|
# NOT GOOD; attack: "[" + "][".repeat(100) + "]!"
|
|
# Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
|
|
# is licensed under the MIT license; see file Prototype.js-LICENSE.
|
|
bad11 = /(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)/
|
|
|
|
# NOT GOOD; attack: "'" + "\\a".repeat(100) + '"'
|
|
# Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
|
|
# under the MIT license; see file Prism-LICENSE.
|
|
bad12 = /("|')(\\?.)*?\1/
|
|
|
|
# NOT GOOD
|
|
bad13 = /(b|a?b)*c/
|
|
|
|
# NOT GOOD
|
|
bad15 = /(a|aa?)*b/
|
|
|
|
# GOOD
|
|
good7 = /(.|\n)*!/
|
|
|
|
# NOT GOOD; attack: "\n".repeat(100) + "."
|
|
bad16 = /(.|\n)*!/m
|
|
|
|
# GOOD
|
|
good8 = /([\w.]+)*/
|
|
|
|
# BAD - we don't yet parse regexps constructed from strings
|
|
bad17 = Regexp.new '(a|aa?)*b'
|
|
|
|
# GOOD - not used as regexp
|
|
good9 = '(a|aa?)*b'
|
|
|
|
# NOT GOOD
|
|
bad18 = /(([\S\s]|[^a])*)"/
|
|
|
|
# GOOD - there is no witness in the end that could cause the regexp to not match
|
|
good10 = /([^"']+)*/
|
|
|
|
# NOT GOOD
|
|
bad20 = /((.|[^a])*)"/
|
|
|
|
# GOOD
|
|
good10 = /((a|[^a])*)"/
|
|
|
|
# NOT GOOD
|
|
bad21 = /((b|[^a])*)"/
|
|
|
|
# NOT GOOD
|
|
bad22 = /((G|[^a])*)"/
|
|
|
|
# NOT GOOD
|
|
bad23 = /(([0-9]|[^a])*)"/
|
|
|
|
# BAD - missing result
|
|
bad24 = /(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?/
|
|
|
|
# BAD - missing result
|
|
bad25 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"/
|
|
|
|
# GOOD
|
|
bad26 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"/
|
|
|
|
# NOT GOOD
|
|
bad27 = /(([a-z]|[d-h])*)"/
|
|
|
|
# NOT GOOD
|
|
bad27 = /(([^a-z]|[^0-9])*)"/
|
|
|
|
# NOT GOOD
|
|
bad28 = /((\d|[0-9])*)"/
|
|
|
|
# NOT GOOD
|
|
bad29 = /((\s|\s)*)"/
|
|
|
|
# NOT GOOD
|
|
bad30 = /((\w|G)*)"/
|
|
|
|
# GOOD
|
|
good11 = /((\s|\d)*)"/
|
|
|
|
# NOT GOOD
|
|
bad31 = /((\d|\w)*)"/
|
|
|
|
# NOT GOOD
|
|
bad32 = /((\d|5)*)"/
|
|
|
|
# BAD - \f is not handled correctly
|
|
bad33 = /((\s|[\f])*)"/
|
|
|
|
# BAD - \v is not handled correctly
|
|
bad34 = /((\s|[\v]|\\v)*)"/
|
|
|
|
# NOT GOOD
|
|
bad35 = /((\f|[\f])*)"/
|
|
|
|
# NOT GOOD
|
|
bad36 = /((\W|\D)*)"/
|
|
|
|
# NOT GOOD
|
|
bad37 = /((\S|\w)*)"/
|
|
|
|
# NOT GOOD
|
|
bad38 = /((\S|[\w])*)"/
|
|
|
|
# NOT GOOD
|
|
bad39 = /((1s|[\da-z])*)"/
|
|
|
|
# NOT GOOD
|
|
bad40 = /((0|[\d])*)"/
|
|
|
|
# NOT GOOD
|
|
bad41 = /(([\d]+)*)"/
|
|
|
|
# GOOD - there is no witness in the end that could cause the regexp to not match
|
|
good12 = /(\d+(X\d+)?)+/
|
|
|
|
# GOOD - there is no witness in the end that could cause the regexp to not match
|
|
good13 = /([0-9]+(X[0-9]*)?)*/
|
|
|
|
# GOOD
|
|
good15 = /^([^>]+)*(>|$)/
|
|
|
|
# NOT GOOD
|
|
bad43 = /^([^>a]+)*(>|$)/
|
|
|
|
# NOT GOOD
|
|
bad44 = /(\n\s*)+$/
|
|
|
|
# NOT GOOD
|
|
bad45 = /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/
|
|
|
|
# NOT GOOD
|
|
bad46 = /\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}/
|
|
|
|
# NOT GOOD
|
|
bad47 = /(a+|b+|c+)*c/
|
|
|
|
# NOT GOOD
|
|
bad48 = /(((a+a?)*)+b+)/
|
|
|
|
# NOT GOOD
|
|
bad49 = /(a+)+bbbb/
|
|
|
|
# GOOD
|
|
good16 = /(a+)+aaaaa*a+/
|
|
|
|
# NOT GOOD
|
|
bad50 = /(a+)+aaaaa$/
|
|
|
|
# GOOD
|
|
good17 = /(\n+)+\n\n/
|
|
|
|
# NOT GOOD
|
|
bad51 = /(\n+)+\n\n$/
|
|
|
|
# NOT GOOD
|
|
bad52 = /([^X]+)*$/
|
|
|
|
# NOT GOOD
|
|
bad53 = /(([^X]b)+)*$/
|
|
|
|
# GOOD
|
|
good18 = /(([^X]b)+)*($|[^X]b)/
|
|
|
|
# NOT GOOD
|
|
bad54 = /(([^X]b)+)*($|[^X]c)/
|
|
|
|
# GOOD
|
|
good20 = /((ab)+)*ababab/
|
|
|
|
# GOOD
|
|
good21 = /((ab)+)*abab(ab)*(ab)+/
|
|
|
|
# GOOD
|
|
good22 = /((ab)+)*/
|
|
|
|
# NOT GOOD
|
|
bad55 = /((ab)+)*$/
|
|
|
|
# GOOD
|
|
good23 = /((ab)+)*[a1][b1][a2][b2][a3][b3]/
|
|
|
|
# NOT GOOD
|
|
bad56 = /([\n\s]+)*(.)/
|
|
|
|
# GOOD - any witness passes through the accept state.
|
|
good24 = /(A*A*X)*/
|
|
|
|
# GOOD
|
|
good26 = /([^\\\]]+)*/
|
|
|
|
# NOT GOOD
|
|
bad59 = /(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-/
|
|
|
|
# NOT GOOD
|
|
bad60 = /(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamnlongstringforstresstestingthequery)*-/
|
|
|
|
# NOT GOOD
|
|
bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
|
|
|
|
# GOOD
|
|
good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
|
|
|
|
# GOOD
|
|
#good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
|
|
|
|
# GOOD
|
|
#good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
|
|
|
|
# NOT GOOD (but cannot currently construct a prefix)
|
|
bad62 = /a{2,3}(b+)+X/
|
|
|
|
# NOT GOOD (and a good prefix test)
|
|
bad63 = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/
|
|
|
|
# GOOD
|
|
good30 = /(a+)*[\S\s][\S\s][\S\s]?/
|
|
|
|
# GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[^]{2,3}`).
|
|
good31 = /(a+)*[\S\s]{2,3}/
|
|
|
|
# GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[^]{2,}` when constructing the NFA).
|
|
good32 = /(a+)*([\S\s]{2,}|X)$/
|
|
|
|
# GOOD
|
|
good33 = /(a+)*([\S\s]*|X)$/
|
|
|
|
# NOT GOOD
|
|
bad64 = /((a+)*$|[\S\s]+)/
|
|
|
|
# GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
|
|
good34 = /([\S\s]+|(a+)*$)/
|
|
|
|
# GOOD
|
|
good35 = /((;|^)a+)+$/
|
|
|
|
# NOT GOOD (a good prefix test)
|
|
bad65 = /(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f/
|
|
|
|
# NOT GOOD
|
|
bad66 = /^ab(c+)+$/
|
|
|
|
# NOT GOOD
|
|
bad67 = /(\d(\s+)*){20}/
|
|
|
|
# GOOD - but we spuriously conclude that a rejecting suffix exists.
|
|
good36 = /(([^\/]|X)+)(\/[\S\s]*)*$/
|
|
|
|
# GOOD - but we spuriously conclude that a rejecting suffix exists.
|
|
good37 = /^((x([^Y]+)?)*(Y|$))/
|
|
|
|
# NOT GOOD
|
|
bad68 = /(a*)+b/
|
|
|
|
# NOT GOOD
|
|
bad69 = /foo([\w-]*)+bar/
|
|
|
|
# NOT GOOD
|
|
bad70 = /((ab)*)+c/
|
|
|
|
# NOT GOOD
|
|
bad71 = /(a?a?)*b/
|
|
|
|
# GOOD
|
|
good38 = /(a?)*b/
|
|
|
|
# NOT GOOD - but not detected
|
|
bad72 = /(c?a?)*b/
|
|
|
|
# NOT GOOD
|
|
bad73 = /(?:a|a?)+b/
|
|
|
|
# NOT GOOD - but not detected.
|
|
bad74 = /(a?b?)*$/
|
|
|
|
# NOT GOOD
|
|
bad76 = /PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)/
|
|
|
|
# NOT GOOD - but not detected
|
|
bad77 = /^((a)+\w)+$/
|
|
|
|
# NOT GOOD
|
|
bad78 = /^(b+.)+$/
|
|
|
|
# GOOD
|
|
good39 = /a*b/
|
|
|
|
# All 4 bad combinations of nested * and +
|
|
bad79 = /(a*)*b/
|
|
bad80 = /(a+)*b/
|
|
bad81 = /(a*)+b/
|
|
bad82 = /(a+)+b/
|
|
|
|
# GOOD
|
|
good40 = /(a|b)+/
|
|
good41 = /(?:[\s;,"'<>(){}|\[\]@=+*]|:(?![\/\\]))+/
|
|
|
|
# NOT GOOD
|
|
bad83 = /^((?:a{|-)|\w\{)+X$/
|
|
bad84 = /^((?:a{0|-)|\w\{\d)+X$/
|
|
bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/
|
|
bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/
|
|
|
|
# GOOD:
|
|
good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
|
|
|
|
# NOT GOOD
|
|
bad87 = /^X(\u0061|a)*Y$/
|
|
|
|
# GOOD
|
|
good43 = /^X(\u0061|b)+Y$/
|
|
|
|
# NOT GOOD
|
|
bad88 = /X([[:digit:]]|\d)+Y/
|
|
|
|
# NOT GOOD
|
|
bad89 = /\G(a|\w)*$/
|
|
bad90 = /\b(a|\w)*$/
|
|
|