Files
codeql/javascript/ql/lib/semmle/javascript/CharacterEscapes.qll
2022-03-16 22:56:52 +01:00

105 lines
3.8 KiB
Plaintext

/**
* Provides classes for reasoning about character escapes in literals.
*/
import javascript
module CharacterEscapes {
/**
* Provides sets of characters (as strings) with specific string/regexp characteristics.
*/
private module Sets {
string sharedEscapeChars() { result = "fnrtvux0\\" }
string regexpAssertionChars() { result = "bB" }
string regexpCharClassChars() { result = "cdDpPsSwW" }
string regexpBackreferenceChars() { result = "123456789k" }
string regexpMetaChars() { result = "^$*+?.()|{}[]-" }
}
/**
* Gets the `i`th character of `raw`, which is preceded by an uneven number of backslashes.
*/
bindingset[raw]
string getAnEscapedCharacter(string raw, int i) {
result = raw.regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10}).", _, i)
}
/**
* Holds if `n` is delimited by `delim` and contains `rawStringNode` with the raw string value `raw`.
*/
private predicate hasRawStringAndQuote(
DataFlow::ValueNode n, string delim, AstNode rawStringNode, string raw
) {
rawStringNode = n.asExpr() and
raw = rawStringNode.(StringLiteral).getRawValue() and
delim = raw.charAt(0)
or
rawStringNode = n.asExpr().(TemplateLiteral).getAnElement() and
raw = rawStringNode.(TemplateElement).getRawValue() and
delim = "`"
or
rawStringNode = n.asExpr() and
raw = rawStringNode.(RegExpLiteral).getRawValue() and
delim = "/"
}
/**
* Gets a character in `n` that is preceded by a single useless backslash.
*
* The character is the `i`th character of `rawStringNode`'s raw string value.
*/
string getAnIdentityEscapedCharacter(DataFlow::Node n, AstNode rawStringNode, int i) {
exists(string delim, string raw, string additionalEscapeChars |
hasRawStringAndQuote(n, delim, rawStringNode, raw) and
if rawStringNode instanceof RegExpLiteral
then
additionalEscapeChars =
Sets::regexpMetaChars() + Sets::regexpAssertionChars() + Sets::regexpCharClassChars() +
Sets::regexpBackreferenceChars()
else additionalEscapeChars = "b"
|
result = getAnEscapedCharacter(raw, i) and
not result = (Sets::sharedEscapeChars() + delim + additionalEscapeChars).charAt(_)
)
}
/**
* Holds if `src` likely contains a regular expression mistake, to be reported by `js/useless-regexp-character-escape`.
*/
predicate hasALikelyRegExpPatternMistake(RegExpPatternSource src) {
exists(getALikelyRegExpPatternMistake(src, _, _, _))
}
/**
* Gets a character in `src` that is preceded by a single useless backslash, resulting in a likely regular expression mistake explained by `mistake`.
*
* The character is the `i`th character of the raw string value of `rawStringNode`.
*/
string getALikelyRegExpPatternMistake(
RegExpPatternSource src, string mistake, AstNode rawStringNode, int i
) {
result = getAnIdentityEscapedCharacter(src, rawStringNode, i) and
(
result = Sets::regexpAssertionChars().charAt(_) and mistake = "is not an assertion"
or
result = Sets::regexpCharClassChars().charAt(_) and mistake = "is not a character class"
or
result = Sets::regexpBackreferenceChars().charAt(_) and mistake = "is not a backreference"
or
// conservative formulation: we do not know in general if the sequence is enclosed in a character class `[...]`
result = Sets::regexpMetaChars().charAt(_) and
mistake = "may still represent a meta-character"
) and
// avoid the benign case where preceding escaped backslashes turns into backslashes when the regexp is constructed
not exists(string raw |
not rawStringNode instanceof RegExpLiteral and
hasRawStringAndQuote(_, _, rawStringNode, raw) and
result = raw.regexpFind("(?<=(^|[^\\\\])((\\\\{3})|(\\\\{7}))).", _, i)
)
}
}