mirror of
https://github.com/github/codeql.git
synced 2026-04-29 18:55:14 +02:00
Structure development
This commit is contained in:
@@ -13,10 +13,10 @@
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import semmle.python.security.dataflow.RegexInjection
|
||||
import experimental.semmle.python.security.injection.RegexInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ regular expression is constructed from a $@.",
|
||||
sink.getNode(), "This", source.getNode(), "user-provided value"
|
||||
sink.getNode(), "This", source.getNode(), "user-provided value"
|
||||
@@ -13,3 +13,32 @@ private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/** Provides classes for modeling Regular Expression-related APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that works with regular expressions.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
abstract Attribute getRegexMethod();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that works with regular expressions.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
Attribute getRegexMethod() { result = range.getRegexMethod() }
|
||||
}
|
||||
|
||||
@@ -8,4 +8,234 @@ private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/** Provides models for the Python standard library. */
|
||||
private module Stdlib {
|
||||
// ---------------------------------------------------------------------------
|
||||
// re
|
||||
// ---------------------------------------------------------------------------
|
||||
private module Re {
|
||||
|
||||
/** Gets a reference to the `re` module. */
|
||||
private DataFlow::Node re(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("re")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = re(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `re` module. */
|
||||
DataFlow::Node re() { result = re(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `re` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node re_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn", "compile"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("re" + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = re()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `re_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
re_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate re_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(re_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `re` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node re_attr(string attr_name) {
|
||||
result = re_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to any `attr_name` of the `re` module that immediately executes an expression.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node re_exec_attr() {
|
||||
exists(string attr_name |
|
||||
attr_name in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"] and
|
||||
result = re_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.match`
|
||||
* See https://docs.python.org/3/library/re.html#re.match
|
||||
*/
|
||||
private class ReMatchCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReMatchCall() { node.getFunction() = re_attr("match").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.fullmatch`
|
||||
* See https://docs.python.org/3/library/re.html#re.fullmatch
|
||||
*/
|
||||
private class ReFullMatchCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReFullMatchCall() { node.getFunction() = re_attr("fullmatch").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.search`
|
||||
* See https://docs.python.org/3/library/re.html#re.search
|
||||
*/
|
||||
private class ReSearchCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReSearchCall() { node.getFunction() = re_attr("search").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.split`
|
||||
* See https://docs.python.org/3/library/re.html#re.split
|
||||
*/
|
||||
private class ReSplitCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReSplitCall() { node.getFunction() = re_attr("split").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.findall`
|
||||
* See https://docs.python.org/3/library/re.html#re.findall
|
||||
*/
|
||||
private class ReFindAllCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReFindAllCall() { node.getFunction() = re_attr("findall").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.finditer`
|
||||
* See https://docs.python.org/3/library/re.html#re.finditer
|
||||
*/
|
||||
private class ReFindIterCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReFindIterCall() { node.getFunction() = re_attr("finditer").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.sub`
|
||||
* See https://docs.python.org/3/library/re.html#re.sub
|
||||
*/
|
||||
private class ReSubCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReSubCall() { node.getFunction() = re_attr("sub").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.subn`
|
||||
* See https://docs.python.org/3/library/re.html#re.subn
|
||||
*/
|
||||
private class ReSubNCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReSubNCall() { node.getFunction() = re_attr("subn").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() { result = node.getNode().getFunc().(Attribute) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `re.compile`
|
||||
* See https://docs.python.org/3/library/re.html#re.match
|
||||
*/
|
||||
private class ReCompileCall extends RegexExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ReCompileCall() { node.getFunction() = re_attr("compile").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getRegexNode() { result.asCfgNode() = node.getArg(0) }
|
||||
override Attribute getRegexMethod() {
|
||||
exists (DataFlow::AttrRead reMethod |
|
||||
reMethod = re_exec_attr() and
|
||||
node.getFunction() = reMethod.getObject().getALocalSource().asCfgNode() and
|
||||
result = reMethod.asExpr().(Attribute)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class for modeling expressions immediately executing a regular expression.
|
||||
* See `re_exec_attr()`
|
||||
*/
|
||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
Attribute regexMethod;
|
||||
|
||||
DirectRegex() {
|
||||
// needs inheritance (?)
|
||||
this = re_exec_attr() and regexNode = this.getRegexNode() and regexMethod = this.getRegexMethod()
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
override Attribute getRegexMethod() { result = regexMethod }
|
||||
}
|
||||
|
||||
/**
|
||||
* A class for finding `ReCompileCall` whose `Attribute` is an instance of `DirectRegex`.
|
||||
* See `ReCompileCall`, `DirectRegex`, `re_exec_attr()`
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
Attribute regexMethod;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DirectRegex reMethod, ReCompileCall compileCall |
|
||||
this = reMethod and
|
||||
reMethod.getRegexMethod() = compileCall.getRegexMethod() and
|
||||
regexNode = compileCall.getRegexNode() and
|
||||
regexMethod = reMethod.getRegexMethod()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
override Attribute getRegexMethod() { result = regexMethod }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.Concepts
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
@@ -625,49 +625,5 @@ module Cryptography {
|
||||
final override int minimumSecureKeySize() { result = 224 }
|
||||
}
|
||||
}
|
||||
/*
|
||||
*/
|
||||
|
||||
class ReMethods extends string {
|
||||
ReMethods() { this in ["match", "fullmatch", "search", "split", "findall", "finditer"] }
|
||||
}
|
||||
|
||||
class DirectRegex extends DataFlow::Node {
|
||||
DirectRegex() {
|
||||
exists(ReMethods reMethod, DataFlow::CallCfgNode reCall |
|
||||
reCall = API::moduleImport("re").getMember(reMethod).getACall() and
|
||||
this = reCall.getArg(0)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class CompiledRegex extends DataFlow::Node {
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall = reMethod.getObject().getALocalSource() and
|
||||
reMethod.getAttributeName() instanceof ReMethods and
|
||||
this = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution() { this instanceof DirectRegex or this instanceof CompiledRegex } // How should this be cross-imported with Stdlib?
|
||||
}
|
||||
/*
|
||||
*/
|
||||
|
||||
module RegexExecution {
|
||||
abstract class Range extends DataFlow::Node {
|
||||
abstract DataFlow::Node getRegexNode();
|
||||
}
|
||||
}
|
||||
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
|
||||
}
|
||||
}
|
||||
@@ -864,52 +864,6 @@ private module Stdlib {
|
||||
class Sqlite3 extends PEP249ModuleApiNode {
|
||||
Sqlite3() { this = API::moduleImport("sqlite3") }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// re
|
||||
// ---------------------------------------------------------------------------
|
||||
private module Re {
|
||||
/** List of re methods. */
|
||||
private class ReMethods extends string {
|
||||
ReMethods() { this in ["match", "fullmatch", "search", "split", "findall", "finditer"] }
|
||||
}
|
||||
|
||||
/** re.ReMethod(pattern, string) */
|
||||
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
DirectRegex() {
|
||||
this = API::moduleImport("re").getMember(any(ReMethods m)).getACall() and
|
||||
regexNode = this.getArg(0)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
|
||||
/** re.compile(pattern).ReMethod */
|
||||
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
|
||||
this.getFunction() = reMethod and
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall = reMethod.getObject().getALocalSource() and
|
||||
reMethod.getAttributeName() instanceof ReMethods and
|
||||
regexNode = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getRegexNode() { result = regexNode }
|
||||
}
|
||||
|
||||
private class RegexEscape extends DataFlow::Node {
|
||||
RegexEscape() {
|
||||
this =
|
||||
API::moduleImport("re").getMember("escape").getACall().(DataFlow::CallCfgNode).getArg(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user