Files
codeql/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql

43 lines
1.1 KiB
Plaintext
Raw Blame History

/**
* @name Duplication in regular expression character class
* @description Duplicate characters in a class have no effect and may indicate an error in the regular expression.
* @kind problem
* @tags reliability
* readability
* @problem.severity warning
* @sub-severity low
* @precision very-high
* @id py/regex/duplicate-in-character-class
*/
import python
import semmle.python.regex
predicate duplicate_char_in_class(RegExp r, string char) {
exists(int i, int j, int x, int y, int start, int end |
i != x and
j != y and
start < i and
j < end and
start < x and
y < end and
r.character(i, j) and
char = r.getText().substring(i, j) and
r.character(x, y) and
char = r.getText().substring(x, y) and
r.charSet(start, end)
) and
/* Exclude <20> as we use it for any unencodable character */
char != "<22>" and
//Ignore whitespace in verbose mode
not (
r.getAMode() = "VERBOSE" and
char in [" ", "\t", "\r", "\n"]
)
}
from RegExp r, string char
where duplicate_char_in_class(r, char)
select r,
"This regular expression includes duplicate character '" + char + "' in a set of characters."