Structure development

This commit is contained in:
jorgectf
2021-03-24 17:59:48 +01:00
parent 444a15a461
commit 28fdeba4fa
9 changed files with 264 additions and 95 deletions

View File

@@ -13,10 +13,10 @@
// determine precision above
import python
import semmle.python.security.dataflow.RegexInjection
import experimental.semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ regular expression is constructed from a $@.",
sink.getNode(), "This", source.getNode(), "user-provided value"
sink.getNode(), "This", source.getNode(), "user-provided value"

View File

@@ -13,3 +13,32 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/**
* A data-flow node that works with regular expressions.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
abstract DataFlow::Node getRegexNode();
abstract Attribute getRegexMethod();
}
}
/**
* A data-flow node that works with regular expressions.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
Attribute getRegexMethod() { result = range.getRegexMethod() }
}

View File

@@ -8,4 +8,234 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the Python standard library. */
private module Stdlib {
// ---------------------------------------------------------------------------
// re
// ---------------------------------------------------------------------------
private module Re {
/** Gets a reference to the `re` module. */
private DataFlow::Node re(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("re")
or
exists(DataFlow::TypeTracker t2 | result = re(t2).track(t2, t))
}
/** Gets a reference to the `re` module. */
DataFlow::Node re() { result = re(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `re` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node re_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn", "compile"] and
(
t.start() and
result = DataFlow::importNode("re" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = re()
)
or
// Due to bad performance when using normal setup with `re_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
re_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate re_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(re_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `re` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node re_attr(string attr_name) {
result = re_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* Gets a reference to any `attr_name` of the `re` module that immediately executes an expression.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node re_exec_attr() {
exists(string attr_name |
attr_name in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"] and
result = re_attr(DataFlow::TypeTracker::end(), attr_name)
)
}
/**
* A call to `re.match`
* See https://docs.python.org/3/library/re.html#re.match
*/
private class ReMatchCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReMatchCall() { node.getFunction() = re_attr("match").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.fullmatch`
* See https://docs.python.org/3/library/re.html#re.fullmatch
*/
private class ReFullMatchCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReFullMatchCall() { node.getFunction() = re_attr("fullmatch").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.search`
* See https://docs.python.org/3/library/re.html#re.search
*/
private class ReSearchCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReSearchCall() { node.getFunction() = re_attr("search").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.split`
* See https://docs.python.org/3/library/re.html#re.split
*/
private class ReSplitCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReSplitCall() { node.getFunction() = re_attr("split").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.findall`
* See https://docs.python.org/3/library/re.html#re.findall
*/
private class ReFindAllCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReFindAllCall() { node.getFunction() = re_attr("findall").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.finditer`
* See https://docs.python.org/3/library/re.html#re.finditer
*/
private class ReFindIterCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReFindIterCall() { node.getFunction() = re_attr("finditer").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.sub`
* See https://docs.python.org/3/library/re.html#re.sub
*/
private class ReSubCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReSubCall() { node.getFunction() = re_attr("sub").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.subn`
* See https://docs.python.org/3/library/re.html#re.subn
*/
private class ReSubNCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReSubNCall() { node.getFunction() = re_attr("subn").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
}
/**
* A call to `re.compile`
* See https://docs.python.org/3/library/re.html#re.match
*/
private class ReCompileCall extends RegexExecution::Range, DataFlow::CfgNode {
override CallNode node;
ReCompileCall() { node.getFunction() = re_attr("compile").asCfgNode() }
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
override Attribute getRegexMethod() {
exists (DataFlow::AttrRead reMethod |
reMethod = re_exec_attr() and
node.getFunction() = reMethod.getObject().getALocalSource().asCfgNode() and
result = reMethod.asExpr().(Attribute)
)
}
}
/**
* A class for modeling expressions immediately executing a regular expression.
* See `re_exec_attr()`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
Attribute regexMethod;
DirectRegex() {
// needs inheritance (?)
this = re_exec_attr() and regexNode = this.getRegexNode() and regexMethod = this.getRegexMethod()
}
override DataFlow::Node getRegexNode() { result = regexNode }
override Attribute getRegexMethod() { result = regexMethod }
}
/**
* A class for finding `ReCompileCall` whose `Attribute` is an instance of `DirectRegex`.
* See `ReCompileCall`, `DirectRegex`, `re_exec_attr()`
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
Attribute regexMethod;
CompiledRegex() {
exists(DirectRegex reMethod, ReCompileCall compileCall |
this = reMethod and
reMethod.getRegexMethod() = compileCall.getRegexMethod() and
regexNode = compileCall.getRegexNode() and
regexMethod = reMethod.getRegexMethod()
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override Attribute getRegexMethod() { result = regexMethod }
}
}
}

View File

@@ -4,7 +4,7 @@
*/
import python
import semmle.python.Concepts
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources

View File

@@ -625,49 +625,5 @@ module Cryptography {
final override int minimumSecureKeySize() { result = 224 }
}
}
/*
*/
class ReMethods extends string {
ReMethods() { this in ["match", "fullmatch", "search", "split", "findall", "finditer"] }
}
class DirectRegex extends DataFlow::Node {
DirectRegex() {
exists(ReMethods reMethod, DataFlow::CallCfgNode reCall |
reCall = API::moduleImport("re").getMember(reMethod).getACall() and
this = reCall.getArg(0)
)
}
}
class CompiledRegex extends DataFlow::Node {
CompiledRegex() {
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall = reMethod.getObject().getALocalSource() and
reMethod.getAttributeName() instanceof ReMethods and
this = patternCall.getArg(0)
)
}
}
class RegexExecution extends DataFlow::Node {
RegexExecution() { this instanceof DirectRegex or this instanceof CompiledRegex } // How should this be cross-imported with Stdlib?
}
/*
*/
module RegexExecution {
abstract class Range extends DataFlow::Node {
abstract DataFlow::Node getRegexNode();
}
}
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
}
}

View File

@@ -864,52 +864,6 @@ private module Stdlib {
class Sqlite3 extends PEP249ModuleApiNode {
Sqlite3() { this = API::moduleImport("sqlite3") }
}
// ---------------------------------------------------------------------------
// re
// ---------------------------------------------------------------------------
private module Re {
/** List of re methods. */
private class ReMethods extends string {
ReMethods() { this in ["match", "fullmatch", "search", "split", "findall", "finditer"] }
}
/** re.ReMethod(pattern, string) */
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
DirectRegex() {
this = API::moduleImport("re").getMember(any(ReMethods m)).getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
/** re.compile(pattern).ReMethod */
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
this.getFunction() = reMethod and
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall = reMethod.getObject().getALocalSource() and
reMethod.getAttributeName() instanceof ReMethods and
regexNode = patternCall.getArg(0)
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
private class RegexEscape extends DataFlow::Node {
RegexEscape() {
this =
API::moduleImport("re").getMember("escape").getACall().(DataFlow::CallCfgNode).getArg(0)
}
}
}
}
// ---------------------------------------------------------------------------