Python: Model taint from re.<func> calls

This commit is contained in:
Rasmus Wriedt Larsen
2023-11-08 17:18:40 +01:00
parent 851c30e797
commit 4943fc5a57
2 changed files with 86 additions and 11 deletions

View File

@@ -3157,6 +3157,80 @@ private module StdlibPrivate {
}
}
/**
* A flow summary for `re` methods not returning a `re.Match` object
*
* See https://docs.python.org/3/library/re.html#functions
*/
class ReFunctionsSummary extends SummarizedCallable {
string methodName;
ReFunctionsSummary() {
methodName in ["split", "findall", "finditer", "sub", "subn"] and
this = ["re.", "compiled re."] + methodName
}
override DataFlow::CallCfgNode getACall() {
this = "re." + methodName and
result = API::moduleImport("re").getMember(methodName).getACall()
or
this = "compiled re." + methodName and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(int offset |
// for non-compiled regex the first argument is the pattern, so we need to
// account for this difference
this = "re." + methodName and offset = 0
or
this = "compiled re." + methodName and offset = 1
|
// flow from input string to results
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
preservesValue = false and
input in ["Argument[" + arg + "]", "Argument[string:]"] and
(
methodName in ["split", "findall", "finditer"] and
output = "ReturnValue.ListElement"
or
// TODO: Since we currently model lists as tainted, the result of findall and split needs to be tainted
methodName in ["split", "findall"] and
output = "ReturnValue"
or
methodName = "sub" and
output = "ReturnValue"
or
methodName = "subn" and
output = "ReturnValue.TupleElement[0]"
)
)
or
// flow from replacement value for substitution
exists(string argumentSpec |
argumentSpec in ["Argument[" + (1 - offset) + "]", "Argument[repl:]"] and
// `repl` can also be a function
input = [argumentSpec, argumentSpec + ".ReturnValue"]
|
(
methodName = "sub" and output = "ReturnValue"
or
methodName = "subn" and output = "ReturnValue.TupleElement[0]"
) and
preservesValue = false
)
)
}
}
/**
* A call to 're.escape'.
* See https://docs.python.org/3/library/re.html#re.escape

View File

@@ -13,29 +13,30 @@ ensure_tainted(
re.fullmatch(pat, ts), # $ MISSING: tainted
# other functions not returning Match objects
re.split(pat, ts), # $ MISSING: tainted
re.split(pat, ts)[0], # $ MISSING: tainted
re.split(pat, ts), # $ tainted
re.split(pat, ts)[0], # $ tainted
re.findall(pat, ts), # $ MISSING: tainted
re.findall(pat, ts), # $ tainted
re.findall(pat, ts)[0], # $ tainted
re.finditer(pat, ts), # $ MISSING: tainted
[x for x in re.finditer(pat, ts)], # $ MISSING: tainted
[x for x in re.finditer(pat, ts)], # $ tainted
re.sub(pat, repl="safe", string=ts), # $ MISSING: tainted
re.sub(pat, repl=lambda m: ..., string=ts), # $ MISSING: tainted
re.sub(pat, repl=ts, string="safe"), # $ MISSING: tainted
re.sub(pat, repl=lambda m: ts, string="safe"), # $ MISSING: tainted
re.sub(pat, repl="safe", string=ts), # $ tainted
re.sub(pat, repl=lambda m: ..., string=ts), # $ tainted
re.sub(pat, repl=ts, string="safe"), # $ tainted
re.sub(pat, repl=lambda m: ts, string="safe"), # $ tainted
re.subn(pat, repl="safe", string=ts), # $ MISSING: tainted
re.subn(pat, repl="safe", string=ts)[0], # $ MISSING: tainted // the string
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
# same for compiled patterns
compiled_pat.search(ts), # $ MISSING: tainted
compiled_pat.match(ts), # $ MISSING: tainted
compiled_pat.fullmatch(ts), # $ MISSING: tainted
compiled_pat.split(ts), # $ MISSING: tainted
compiled_pat.split(ts)[0], # $ MISSING: tainted
compiled_pat.split(ts), # $ tainted
compiled_pat.split(ts)[0], # $ tainted
# ...