mirror of
https://github.com/github/codeql.git
synced 2026-04-30 03:05:15 +02:00
Merge pull request #11833 from erik-krogh/trackPyReg
PY: track string-constants to regular expression uses
This commit is contained in:
@@ -38,15 +38,30 @@ private API::Node relevant_re_member(string name) {
|
||||
name != "escape"
|
||||
}
|
||||
|
||||
private import semmle.python.dataflow.new.internal.DataFlowImplForRegExp as RegData
|
||||
|
||||
/** A data-flow configuration for tracking string-constants that are used as regular expressions. */
|
||||
private class RegexTracking extends RegData::Configuration {
|
||||
RegexTracking() { this = "RegexTracking" }
|
||||
|
||||
override predicate isSource(RegData::Node node) {
|
||||
node.asExpr() instanceof Bytes or
|
||||
node.asExpr() instanceof Unicode
|
||||
}
|
||||
|
||||
override predicate isSink(RegData::Node node) { used_as_regex_internal(node.asExpr(), _) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `s` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
|
||||
* Holds if the expression `e` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
|
||||
* If regex mode is not known, `mode` will be `"None"`.
|
||||
*
|
||||
* This predicate has not done any data-flow tracking.
|
||||
*/
|
||||
predicate used_as_regex(Expr s, string mode) {
|
||||
(s instanceof Bytes or s instanceof Unicode) and
|
||||
private predicate used_as_regex_internal(Expr e, string mode) {
|
||||
/* Call to re.xxx(regex, ... [mode]) */
|
||||
exists(DataFlow::CallCfgNode call, string name |
|
||||
call.getArg(0).asExpr() = s and
|
||||
call.getArg(0).asExpr() = e and
|
||||
call = relevant_re_member(name).getACall()
|
||||
|
|
||||
mode = "None"
|
||||
@@ -55,6 +70,21 @@ predicate used_as_regex(Expr s, string mode) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the string-constant `s` ends up being used as a regex with the `re` module, with the regex-mode `mode` (if known).
|
||||
* If regex mode is not known, `mode` will be `"None"`.
|
||||
*
|
||||
* This predicate has done data-flow tracking to find the string-constant that is used as a regex.
|
||||
*/
|
||||
predicate used_as_regex(Expr s, string mode) {
|
||||
(s instanceof Bytes or s instanceof Unicode) and
|
||||
exists(RegexTracking t, RegData::Node source, RegData::Node sink |
|
||||
t.hasFlow(source, sink) and
|
||||
used_as_regex_internal(sink.asExpr(), mode) and
|
||||
s = source.asExpr()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
|
||||
* that have multiple names, we pick the long-form name as a canonical representative.
|
||||
|
||||
Reference in New Issue
Block a user