add tracking of strings to compile-sites for poly-redos, in the style of Ruby

This commit is contained in:
erik-krogh
2023-02-02 19:26:51 +01:00
parent 52959d7c0a
commit 6e712b293a
4 changed files with 23 additions and 4 deletions

View File

@@ -85,6 +85,17 @@ predicate used_as_regex(Expr s, string mode) {
)
}
private import semmle.python.Concepts
private import semmle.python.RegexTreeView
/** Gets a parsed regular expression term that is executed at `exec`. */
RegExpTerm getTermForExecution(RegexExecution exec) {
exists(RegexTracking t, DataFlow::Node source | t.hasFlow(source, exec.getRegex()) |
result.getRegex() = source.asExpr() and
result.isRootTerm()
)
}
/**
* Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
* that have multiple names, we pick the long-form name as a canonical representative.

View File

@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
private import semmle.python.regex
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -66,7 +67,7 @@ module PolynomialReDoS {
RegexExecutionAsSink() {
exists(RegexExecution re |
re.getRegex().asExpr() = t.getRegex() and
t = getTermForExecution(re) and
this = re.getString()
) and
t.isRootTerm()

View File

@@ -6,6 +6,9 @@ edges
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:8:30:8:33 | ControlFlowNode for text |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:9:32:9:35 | ControlFlowNode for text |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:12:17:12:20 | ControlFlowNode for text |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:18:28:18:31 | ControlFlowNode for text |
| test.py:14:33:14:39 | ControlFlowNode for my_text | test.py:16:24:16:30 | ControlFlowNode for my_text |
| test.py:18:28:18:31 | ControlFlowNode for text | test.py:14:33:14:39 | ControlFlowNode for my_text |
nodes
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:2:26:2:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
@@ -15,8 +18,12 @@ nodes
| test.py:8:30:8:33 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
| test.py:9:32:9:35 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
| test.py:12:17:12:20 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
| test.py:14:33:14:39 | ControlFlowNode for my_text | semmle.label | ControlFlowNode for my_text |
| test.py:16:24:16:30 | ControlFlowNode for my_text | semmle.label | ControlFlowNode for my_text |
| test.py:18:28:18:31 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
subpaths
#select
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings starting with '0.9' and with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:12:17:12:20 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:12:17:12:20 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:11:31:11:33 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:16:24:16:30 | ControlFlowNode for my_text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:16:24:16:30 | ControlFlowNode for my_text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:18:23:18:25 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -11,9 +11,9 @@ def code_execution():
reg = re.compile(r"^\s+|\s+$")
reg.sub("", text) # NOT OK
def indirect(input_reg_str):
def indirect(input_reg_str, my_text):
my_reg = re.compile(input_reg_str)
my_reg.sub("", text) # NOT OK - but not found
my_reg.sub("", my_text) # NOT OK
indirect(r"^\s+|\s+$")
indirect(r"^\s+|\s+$", text)