mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
JS: Add RegExpAlwaysMatches query
This commit is contained in:
@@ -17,6 +17,7 @@
|
|||||||
| **Query** | **Tags** | **Purpose** |
|
| **Query** | **Tags** | **Purpose** |
|
||||||
|---------------------------------------------------------------------------------|-------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|---------------------------------------------------------------------------------|-------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| Cross-site scripting through exception (`js/xss-through-exception`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights potential XSS vulnerabilities where an exception is written to the DOM. Results are not shown on LGTM by default. |
|
| Cross-site scripting through exception (`js/xss-through-exception`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights potential XSS vulnerabilities where an exception is written to the DOM. Results are not shown on LGTM by default. |
|
||||||
|
| Regular expression always matches (`js/regex/always-matches`) | correctness, regular-expressions | Highlights regular expression checks that trivially succeed by matching an empty substring. |
|
||||||
|
|
||||||
## Changes to existing queries
|
## Changes to existing queries
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
+ semmlecode-javascript-queries/RegExp/DuplicateCharacterInCharacterClass.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/DuplicateCharacterInCharacterClass.ql: /Correctness/Regular Expressions
|
||||||
+ semmlecode-javascript-queries/RegExp/EmptyCharacterClass.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/EmptyCharacterClass.ql: /Correctness/Regular Expressions
|
||||||
+ semmlecode-javascript-queries/RegExp/IdentityReplacement.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/IdentityReplacement.ql: /Correctness/Regular Expressions
|
||||||
|
+ semmlecode-javascript-queries/RegExp/RegExpAlwaysMatches.ql: /Correctness/Regular Expressions
|
||||||
+ semmlecode-javascript-queries/RegExp/UnboundBackref.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/UnboundBackref.ql: /Correctness/Regular Expressions
|
||||||
+ semmlecode-javascript-queries/RegExp/UnmatchableCaret.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/UnmatchableCaret.ql: /Correctness/Regular Expressions
|
||||||
+ semmlecode-javascript-queries/RegExp/UnmatchableDollar.ql: /Correctness/Regular Expressions
|
+ semmlecode-javascript-queries/RegExp/UnmatchableDollar.ql: /Correctness/Regular Expressions
|
||||||
|
|||||||
@@ -144,7 +144,7 @@ newtype TInputSymbol =
|
|||||||
CharClass(RegExpCharacterClass recc) {
|
CharClass(RegExpCharacterClass recc) {
|
||||||
getRoot(recc).isRelevant() and
|
getRoot(recc).isRelevant() and
|
||||||
not recc.isInverted() and
|
not recc.isInverted() and
|
||||||
not isUniversalClass(recc)
|
not recc.isUniversalClass()
|
||||||
} or
|
} or
|
||||||
/** An input symbol representing all characters matched by `.`. */
|
/** An input symbol representing all characters matched by `.`. */
|
||||||
Dot() or
|
Dot() or
|
||||||
@@ -153,23 +153,6 @@ newtype TInputSymbol =
|
|||||||
/** An epsilon transition in the automaton. */
|
/** An epsilon transition in the automaton. */
|
||||||
Epsilon()
|
Epsilon()
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if character class `cc` matches all characters.
|
|
||||||
*/
|
|
||||||
predicate isUniversalClass(RegExpCharacterClass cc) {
|
|
||||||
// [^]
|
|
||||||
cc.isInverted() and not exists(cc.getAChild())
|
|
||||||
or
|
|
||||||
// [\w\W] and similar
|
|
||||||
not cc.isInverted() and
|
|
||||||
exists(string cce1, string cce2 |
|
|
||||||
cce1 = cc.getAChild().(RegExpCharacterClassEscape).getValue() and
|
|
||||||
cce2 = cc.getAChild().(RegExpCharacterClassEscape).getValue()
|
|
||||||
|
|
|
||||||
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An abstract input symbol, representing a set of concrete characters.
|
* An abstract input symbol, representing a set of concrete characters.
|
||||||
*/
|
*/
|
||||||
@@ -361,7 +344,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
|
|||||||
)
|
)
|
||||||
or
|
or
|
||||||
exists(RegExpCharacterClass cc |
|
exists(RegExpCharacterClass cc |
|
||||||
isUniversalClass(cc) and q1 = before(cc) and lbl = Any() and q2 = after(cc)
|
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
|
||||||
or
|
or
|
||||||
q1 = before(cc) and lbl = CharClass(cc) and q2 = after(cc)
|
q1 = before(cc) and lbl = CharClass(cc) and q2 = after(cc)
|
||||||
)
|
)
|
||||||
|
|||||||
54
javascript/ql/src/RegExp/RegExpAlwaysMatches.qhelp
Normal file
54
javascript/ql/src/RegExp/RegExpAlwaysMatches.qhelp
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
<!DOCTYPE qhelp PUBLIC
|
||||||
|
"-//Semmle//qhelp//EN"
|
||||||
|
"qhelp.dtd">
|
||||||
|
<qhelp>
|
||||||
|
|
||||||
|
<overview>
|
||||||
|
<p>
|
||||||
|
There are several built-in JavaScript functions that search for a regular expression match within a string,
|
||||||
|
such as <code>RegExp.prototype.test</code> and <code>String.prototype.search</code>.
|
||||||
|
If the regular expression is not anchored, the regular expression does not need to match the whole string;
|
||||||
|
it only needs to match a substring.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
If the regular expression being searched for accepts the empty string, this means it can match an empty
|
||||||
|
substring anywhere in the input string, and will thus always find a match.
|
||||||
|
In this case, testing if a match exists is redundant and indicates dead code.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
</overview>
|
||||||
|
<recommendation>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Examine the regular expression and determine how it was intended to match:
|
||||||
|
<ul>
|
||||||
|
<li>To match the whole input string, add anchors at the beginning and end of the regular expression.</li>
|
||||||
|
<li>To search for an occurrence within the input string, consider what the shortest meaningful match is and restrict the
|
||||||
|
regular expression accordingly, such as by changing a <code>*</code> to a <code>+</code>.</li>
|
||||||
|
</ul>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
</recommendation>
|
||||||
|
<example>
|
||||||
|
<p>
|
||||||
|
In the following example, a regular expression is used to check the format of an string <code>id</code>.
|
||||||
|
However, the check always passes because the regular expression can match the empty substring.
|
||||||
|
For example, it will allow the ID string "<code>%%</code>" by matching an empty string at index 0.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<sample src="examples/RegExpAlwaysMatches.js" />
|
||||||
|
|
||||||
|
<p>
|
||||||
|
To ensure the regular expression matches the whole string, add anchors at the beginning and end:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<sample src="examples/RegExpAlwaysMatchesGood.js" />
|
||||||
|
|
||||||
|
</example>
|
||||||
|
<references>
|
||||||
|
|
||||||
|
<li>Mozilla Developer Network: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">JavaScript Regular Expressions</a>.</li>
|
||||||
|
|
||||||
|
</references>
|
||||||
|
</qhelp>
|
||||||
117
javascript/ql/src/RegExp/RegExpAlwaysMatches.ql
Normal file
117
javascript/ql/src/RegExp/RegExpAlwaysMatches.ql
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
/**
|
||||||
|
* @name Regular expression always matches
|
||||||
|
* @description Regular expression tests checks always find a match indicate dead code or a logic error
|
||||||
|
* @kind problem
|
||||||
|
* @problem.severity warning
|
||||||
|
* @id js/regex/always-matches
|
||||||
|
* @tags correctness
|
||||||
|
* regular-expressions
|
||||||
|
* @precision high
|
||||||
|
*/
|
||||||
|
|
||||||
|
import javascript
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a node reachable from the given root term through alts and groups only.
|
||||||
|
*
|
||||||
|
* For example, for `/(foo|bar)/` this gets `(foo|bar)`, `foo|bar`, `foo` and `bar`.
|
||||||
|
*/
|
||||||
|
RegExpTerm getEffectiveRootAux(RegExpTerm actualRoot) {
|
||||||
|
actualRoot.isRootTerm() and
|
||||||
|
result = actualRoot
|
||||||
|
or
|
||||||
|
result = getEffectiveRootAux(actualRoot).(RegExpAlt).getAChild()
|
||||||
|
or
|
||||||
|
result = getEffectiveRootAux(actualRoot).(RegExpGroup).getAChild()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the effective root of the given term.
|
||||||
|
*
|
||||||
|
* For example, for `/(foo|bar)/` this gets `foo` and `bar`.
|
||||||
|
*/
|
||||||
|
RegExpTerm getEffectiveRoot(RegExpTerm actualRoot) {
|
||||||
|
result = getEffectiveRootAux(actualRoot) and
|
||||||
|
not result instanceof RegExpAlt and
|
||||||
|
not result instanceof RegExpGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `term` contains an anchor on both ends.
|
||||||
|
*/
|
||||||
|
predicate isPossiblyAnchoredOnBothEnds(RegExpSequence node) {
|
||||||
|
node.getAChild*() instanceof RegExpCaret and
|
||||||
|
node.getAChild*() instanceof RegExpDollar and
|
||||||
|
node.getNumChild() >= 2
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `term` is obviously intended to match any string.
|
||||||
|
*/
|
||||||
|
predicate isUniversalRegExp(RegExpTerm term) {
|
||||||
|
exists(RegExpTerm child | child = term.(RegExpStar).getAChild() |
|
||||||
|
child instanceof RegExpDot
|
||||||
|
or
|
||||||
|
child.(RegExpCharacterClass).isUniversalClass()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A call that searches for a regexp match within a string, but does not
|
||||||
|
* extract the capture groups or the matched string itself.
|
||||||
|
*
|
||||||
|
* Because of the longest-match rule, queries that are more than pure tests
|
||||||
|
* aren't necessarily broken just because the regexp can accept the empty string.
|
||||||
|
*/
|
||||||
|
abstract class RegExpQuery extends DataFlow::CallNode {
|
||||||
|
abstract RegExpTerm getRegExp();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A call to `RegExp.prototype.test`.
|
||||||
|
*/
|
||||||
|
class RegExpTestCall extends DataFlow::MethodCallNode, RegExpQuery {
|
||||||
|
DataFlow::RegExpCreationNode regexp;
|
||||||
|
|
||||||
|
RegExpTestCall() {
|
||||||
|
this = regexp.getAReference().getAMethodCall("test")
|
||||||
|
}
|
||||||
|
|
||||||
|
override RegExpTerm getRegExp() {
|
||||||
|
result = regexp.getRoot()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A call to `String.prototype.search`.
|
||||||
|
*/
|
||||||
|
class RegExpSearchCall extends DataFlow::MethodCallNode, RegExpQuery {
|
||||||
|
DataFlow::RegExpCreationNode regexp;
|
||||||
|
|
||||||
|
RegExpSearchCall() {
|
||||||
|
getMethodName() = "search" and
|
||||||
|
regexp.getAReference().flowsTo(getArgument(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
override RegExpTerm getRegExp() {
|
||||||
|
result = regexp.getRoot()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
from RegExpTerm term, RegExpQuery call, string message
|
||||||
|
where
|
||||||
|
term.isNullable() and
|
||||||
|
not term.getAChild() instanceof RegExpSubPattern and
|
||||||
|
not isUniversalRegExp(term) and
|
||||||
|
term = getEffectiveRoot(call.getRegExp()) and
|
||||||
|
(
|
||||||
|
call instanceof RegExpTestCall and
|
||||||
|
not isPossiblyAnchoredOnBothEnds(term) and
|
||||||
|
message = "This regular expression always matches when used in a test $@, as it can match an empty substring."
|
||||||
|
or
|
||||||
|
call instanceof RegExpSearchCall and
|
||||||
|
not term.getAChild*() instanceof RegExpDollar and
|
||||||
|
not term.getAChild*() instanceof RegExpSubPattern and
|
||||||
|
message = "This regular expression always the matches at index 0 when used $@, as it matches the empty substring."
|
||||||
|
)
|
||||||
|
select term, message, call, "here"
|
||||||
3
javascript/ql/src/RegExp/examples/RegExpAlwaysMatches.js
Normal file
3
javascript/ql/src/RegExp/examples/RegExpAlwaysMatches.js
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
if (!/[a-z0-9]*/.test(id)) {
|
||||||
|
throw new Error("Invalid id: " + id);
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
if (!/^[a-z0-9]*$/.test(id)) {
|
||||||
|
throw new Error("Invalid id: " + id);
|
||||||
|
}
|
||||||
@@ -764,6 +764,23 @@ class RegExpCharacterClass extends RegExpTerm, @regexp_char_class {
|
|||||||
override string getAMatchedString() {
|
override string getAMatchedString() {
|
||||||
not isInverted() and result = getAChild().getAMatchedString()
|
not isInverted() and result = getAChild().getAMatchedString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if this character class matches any character.
|
||||||
|
*/
|
||||||
|
predicate isUniversalClass() {
|
||||||
|
// [^]
|
||||||
|
isInverted() and not exists(getAChild())
|
||||||
|
or
|
||||||
|
// [\w\W] and similar
|
||||||
|
not isInverted() and
|
||||||
|
exists(string cce1, string cce2 |
|
||||||
|
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
|
||||||
|
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
||||||
|
|
|
||||||
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
| tst.js:2:11:2:20 | ^(https:)? | This regular expression always matches when used in a test $@, as it can match an empty substring. | tst.js:2:10:2:29 | /^(https:)?/.test(x) | here |
|
||||||
|
| tst.js:14:11:14:19 | (\\.com)?$ | This regular expression always matches when used in a test $@, as it can match an empty substring. | tst.js:14:10:14:28 | /(\\.com)?$/.test(x) | here |
|
||||||
|
| tst.js:22:11:22:34 | ^(?:https?:\|ftp:\|file:)? | This regular expression always matches when used in a test $@, as it can match an empty substring. | tst.js:22:10:22:43 | /^(?:ht ... test(x) | here |
|
||||||
|
| tst.js:30:11:30:20 | (foo\|bar)? | This regular expression always matches when used in a test $@, as it can match an empty substring. | tst.js:30:10:30:29 | /(foo\|bar)?/.test(x) | here |
|
||||||
|
| tst.js:34:21:34:26 | (baz)? | This regular expression always matches when used in a test $@, as it can match an empty substring. | tst.js:34:10:34:35 | /^foo\|b ... test(x) | here |
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
RegExp/RegExpAlwaysMatches.ql
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
function optionalPrefix(x) {
|
||||||
|
return /^(https:)?/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function mandatoryPrefix(x) {
|
||||||
|
return /^https:/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function httpOrHttps(x) {
|
||||||
|
return /^https?:/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function optionalSuffix(x) {
|
||||||
|
return /(\.com)?$/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function mandatorySuffix(x) {
|
||||||
|
return /\.com$/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function protocol(x) {
|
||||||
|
return /^(?:https?:|ftp:|file:)?/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function doubleAnchored(x) {
|
||||||
|
return /^(foo|bar)?$/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function noAnchor(x) {
|
||||||
|
return /(foo|bar)?/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function altAnchor(x) {
|
||||||
|
return /^foo|bar$|(baz)?/.test(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function wildcard(x) {
|
||||||
|
return /.*/.test(x); // OK - obviously intended to match anything
|
||||||
|
}
|
||||||
|
|
||||||
|
function wildcard2(x) {
|
||||||
|
return /[\d\D]*/.test(x); // OK - obviously intended to match anything
|
||||||
|
}
|
||||||
|
|
||||||
|
function emptyAlt(x) {
|
||||||
|
return /^$|foo|bar/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function emptyAlt2(x) {
|
||||||
|
return /(^$|foo|bar)/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function emptyAlt3(x) {
|
||||||
|
return /((^$|foo|bar))/.test(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function search(x) {
|
||||||
|
return /[a-z]*/.search(x); // NOT OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function search2(x) {
|
||||||
|
return /[a-z]/.search(x); // OK
|
||||||
|
}
|
||||||
|
|
||||||
|
function lookahead(x) {
|
||||||
|
return /(?!x)/.search(x); // OK
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user