Merge pull request #17712 from yoff/python/re-finditer-match

Python: model that `re.finditer` returns an iterable of `re.Match` objects
This commit is contained in:
yoff
2024-10-11 16:08:49 +02:00
committed by GitHub
3 changed files with 50 additions and 20 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Modelled that `re.finditer` returns an iterable of `re.Match` objects. This is now understood by the API graph in many cases.

View File

@@ -3284,6 +3284,18 @@ module StdlibPrivate {
}
}
/**
* A base API node for regular expression functions.
* Either the `re` module or a compiled regular expression.
*/
private API::Node re(boolean compiled) {
result = API::moduleImport("re") and
compiled = false
or
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
compiled = true
}
/**
* A flow summary for methods returning a `re.Match` object
*
@@ -3293,17 +3305,18 @@ module StdlibPrivate {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
exists(API::Node re, boolean compiled |
re = re(compiled) and
(
compiled = false and
this = "re.Match"
or
compiled = true and
this = "compiled re.Match"
)
|
result = re.getMember(["match", "search", "fullmatch"]).getACall()
)
}
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3340,6 +3353,13 @@ module StdlibPrivate {
}
}
/** An API node for a `re.Match` object */
private API::Node match() {
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
or
result = re(_).getMember("finditer").getReturn().getASubscript()
}
/**
* A flow summary for methods on a `re.Match` object
*
@@ -3353,15 +3373,7 @@ module StdlibPrivate {
methodName in ["expand", "group", "groups", "groupdict"]
}
override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3463,6 +3475,14 @@ module StdlibPrivate {
) and
preservesValue = false
)
or
// flow from input string to attribute on match object
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
input in ["Argument[" + arg + "]", "Argument[string:]"] and
methodName = "finditer" and
output = "ReturnValue.ListElement.Attribute[string]" and
preservesValue = true
)
)
}
}

View File

@@ -38,6 +38,12 @@ ensure_tainted(
compiled_pat.match(ts).string, # $ tainted
re.compile(ts).match("safe").re.pattern, # $ tainted
list(re.finditer(pat, ts))[0].string, # $ tainted
[m.string for m in re.finditer(pat, ts)], # $ tainted
list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted // this requires list content in type tracking
[m.groups()[0] for m in re.finditer(pat, ts)], # $ tainted
)
ensure_not_tainted(
safe_match.expand("Hello \1"),