Merge branch 'main' of github.com:github/codeql into tausbn-python-add-source-nodes

This commit is contained in:
Rasmus Lerchedahl Petersen
2020-12-15 11:13:35 +01:00
667 changed files with 46179 additions and 30618 deletions

View File

@@ -17,9 +17,8 @@ private string commonTopLevelDomainRegex() { result = "com|org|edu|gov|uk|net|io
predicate looksLikeUrl(StrConst s) {
exists(string text | text = s.getText() |
text
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + commonTopLevelDomainRegex() +
")(:[0-9]+)?/?")
text.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + commonTopLevelDomainRegex() +
")(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
text.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

View File

@@ -43,7 +43,8 @@ In the second example, it appears that the user is restricted to opening a file
special characters. For example, the string <code>"../../../etc/passwd"</code> will result in the code
reading the file located at <code>"/server/static/images/../../../etc/passwd"</code>, which is the system's
password file. This file would then be sent back to the user, giving them access to all the
system's passwords.
system's passwords. Note that a user could also use an absolute path here, since the result of
<code>os.path.join("/server/static/images/", "/etc/passwd")</code> is <code>"/etc/passwd"</code>.
</p>
<p>

View File

@@ -184,8 +184,7 @@ predicate ssa_consistency(string clsname, string problem, string what) {
/* Minimality of phi nodes */
exists(SsaVariable var |
strictcount(var.getAPhiInput()) = 1 and
var
.getAPhiInput()
var.getAPhiInput()
.getDefinition()
.getBasicBlock()
.strictlyDominates(var.getDefinition().getBasicBlock())

View File

@@ -738,6 +738,7 @@ class ListNode extends SequenceNode {
}
}
/** A control flow node corresponding to a set expression, such as `{ 1, 3, 5, 7, 9 }` */
class SetNode extends ControlFlowNode {
SetNode() { toAst(this) instanceof Set }
@@ -771,6 +772,25 @@ class DictNode extends ControlFlowNode {
}
}
/**
* A control flow node corresponding to an iterable literal. Currently does not include
* dictionaries, use `DictNode` directly instead.
*/
class IterableNode extends ControlFlowNode {
IterableNode() {
this instanceof SequenceNode
or
this instanceof SetNode
}
/** Gets the control flow node for an element of this iterable. */
ControlFlowNode getAnElement() {
result = this.(SequenceNode).getAnElement()
or
result = this.(SetNode).getAnElement()
}
}
private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())

View File

@@ -7,8 +7,9 @@ private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.MysqlConnectorPython
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Yaml

View File

@@ -0,0 +1,37 @@
/** Provides commonly used BarrierGuards. */
private import python
private import semmle.python.dataflow.new.DataFlow
/** A validation of unknown node by comparing with a constant string value. */
class StringConstCompare extends DataFlow::BarrierGuard, CompareNode {
ControlFlowNode checked_node;
boolean safe_branch;
StringConstCompare() {
exists(StrConst str_const, Cmpop op |
op = any(Eq eq) and safe_branch = true
or
op = any(NotEq ne) and safe_branch = false
|
this.operands(str_const.getAFlowNode(), op, checked_node)
or
this.operands(checked_node, op, str_const.getAFlowNode())
)
or
exists(IterableNode str_const_iterable, Cmpop op |
op = any(In in_) and safe_branch = true
or
op = any(NotIn ni) and safe_branch = false
|
forall(ControlFlowNode elem | elem = str_const_iterable.getAnElement() |
elem.getNode() instanceof StrConst
) and
this.operands(checked_node, op, str_const_iterable)
)
}
override predicate checks(ControlFlowNode node, boolean branch) {
node = checked_node and branch = safe_branch
}
}

View File

@@ -31,8 +31,6 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
or
stringManipulation(nodeFrom, nodeTo)
or
jsonStep(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
@@ -135,16 +133,6 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
*/
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this

View File

@@ -607,8 +607,7 @@ class TaintTrackingImplementation extends string {
TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path,
TaintKind kind, string edgeLabel
) {
this
.(EssaTaintTracking)
this.(EssaTaintTracking)
.taintedDefinition(src, node.asVariable().getDefinition(), context, path, kind) and
edgeLabel = ""
}

View File

@@ -95,8 +95,7 @@ private predicate dont_modify(File f) {
private predicate auto_generated(File f) {
exists(Comment c |
c.getLocation().getFile() = f and
c
.getText()
c.getText()
.regexpMatch("(?is)# *this +(code|file) +is +(auto(matically)?[ -]?generated|created automatically).*")
)
}

View File

@@ -17,7 +17,7 @@ private import PEP249
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/
*/
module MySQLdb {
private module MySQLdb {
// ---------------------------------------------------------------------------
// MySQLdb
// ---------------------------------------------------------------------------

View File

@@ -17,7 +17,7 @@ private import PEP249
* - https://dev.mysql.com/doc/connector-python/en/
* - https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
*/
module MysqlConnectorPython {
private module MysqlConnectorPython {
// ---------------------------------------------------------------------------
// mysql
// ---------------------------------------------------------------------------

View File

@@ -62,11 +62,11 @@ module Connection {
}
/**
* Provides models for the `db.Connection.cursor` method.
* Provides models for the `cursor` method on a connection.
* See https://www.python.org/dev/peps/pep-0249/#cursor.
*/
module cursor {
/** Gets a reference to the `db.connection.cursor` method. */
/** Gets a reference to the `cursor` method on a connection. */
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
t.startInAttr("cursor") and
result = Connection::instance()
@@ -74,10 +74,10 @@ module cursor {
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `db.connection.cursor` metod. */
/** Gets a reference to the `cursor` method on a connection. */
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to a result of calling `db.connection.cursor`. */
/** Gets a reference to a result of calling the `cursor` method on a connection. */
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
@@ -85,31 +85,40 @@ module cursor {
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
}
/** Gets a reference to a result of calling `db.connection.cursor`. */
/** Gets a reference to a result of calling the `cursor` method on a connection. */
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
}
/**
* Gets a reference to the `db.Connection.Cursor.execute` function.
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://www.python.org/dev/peps/pep-0249/#id15.
*/
private DataFlow::Node execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result = cursor::methodResult()
result in [cursor::methodResult(), Connection::instance()]
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/**
* Gets a reference to the `db.Connection.Cursor.execute` function.
* Gets a reference to the `execute` method on a cursor (or on a connection).
*
* Note: while `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*
* See https://www.python.org/dev/peps/pep-0249/#id15.
*/
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
private class DbConnectionExecute extends SqlExecution::Range, DataFlow::CfgNode {
/** A call to the `execute` method on a cursor (or on a connection). */
private class ExecuteCall extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
DbConnectionExecute() { node.getFunction() = execute().asCfgNode() }
ExecuteCall() { node.getFunction() = execute().asCfgNode() }
override DataFlow::Node getSql() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]

View File

@@ -17,7 +17,7 @@ private import PEP249
* - https://www.psycopg.org/docs/
* - https://pypi.org/project/psycopg2/
*/
module Psycopg2 {
private module Psycopg2 {
// ---------------------------------------------------------------------------
// Psycopg
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,32 @@
/**
* Provides classes modeling security-relevant aspects of the `PyMySQL` PyPI package.
* See https://pypi.org/project/PyMySQL/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import PEP249
/**
* Provides models for the `PyMySQL` PyPI package.
* See https://pypi.org/project/PyMySQL/
*/
private module PyMySQL {
/** Gets a reference to the `pymysql` module. */
private DataFlow::Node pymysql(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("pymysql")
or
exists(DataFlow::TypeTracker t2 | result = pymysql(t2).track(t2, t))
}
/** Gets a reference to the `pymysql` module. */
DataFlow::Node pymysql() { result = pymysql(DataFlow::TypeTracker::end()) }
/** PyMySQL implements PEP 249, providing ways to execute SQL statements against a database. */
class PyMySQLPEP249 extends PEP249Module {
PyMySQLPEP249() { this = pymysql() }
}
}

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import PEP249
/** Provides models for the Python standard library. */
private module Stdlib {
@@ -91,7 +92,7 @@ private module Stdlib {
* For example, using `attr_name = "join"` will get all uses of `os.path.join`.
*/
private DataFlow::Node path_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["join", "normpath"] and
attr_name in ["join", "normpath", "realpath", "abspath"] and
(
t.start() and
result = DataFlow::importNode("os.path." + attr_name)
@@ -157,6 +158,54 @@ private module Stdlib {
}
}
/**
* A call to `os.path.abspath`.
* See https://docs.python.org/3/library/os.path.html#os.path.abspath
*/
private class OsPathAbspathCall extends Path::PathNormalization::Range, DataFlow::CfgNode {
override CallNode node;
OsPathAbspathCall() { node.getFunction() = os::path::path_attr("abspath").asCfgNode() }
DataFlow::Node getPathArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
}
}
/** An additional taint step for calls to `os.path.abspath` */
private class OsPathAbspathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathAbspathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.path.realpath`.
* See https://docs.python.org/3/library/os.path.html#os.path.realpath
*/
private class OsPathRealpathCall extends Path::PathNormalization::Range, DataFlow::CfgNode {
override CallNode node;
OsPathRealpathCall() { node.getFunction() = os::path::path_attr("realpath").asCfgNode() }
DataFlow::Node getPathArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
}
}
/** An additional taint step for calls to `os.path.realpath` */
private class OsPathRealpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(OsPathRealpathCall call |
nodeTo = call and
nodeFrom = call.getPathArg()
)
}
}
/**
* A call to `os.system`.
* See https://docs.python.org/3/library/os.html#os.system
@@ -945,6 +994,116 @@ private module Stdlib {
private DataFlow::Node io_attr(string attr_name) {
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
}
// ---------------------------------------------------------------------------
// json
// ---------------------------------------------------------------------------
/** Gets a reference to the `json` module. */
private DataFlow::Node json(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("json")
or
exists(DataFlow::TypeTracker t2 | result = json(t2).track(t2, t))
}
/** Gets a reference to the `json` module. */
DataFlow::Node json() { result = json(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `json` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node json_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["loads", "dumps"] and
(
t.start() and
result = DataFlow::importNode("json" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = json()
)
or
// Due to bad performance when using normal setup with `json_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
json_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate json_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(json_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `json` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node json_attr(string attr_name) {
result = json_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* A call to `json.loads`
* See https://docs.python.org/3/library/json.html#json.loads
*/
private class JsonLoadsCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
JsonLoadsCall() { node.getFunction() = json_attr("loads").asCfgNode() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `json.dumps`
* See https://docs.python.org/3/library/json.html#json.dumps
*/
private class JsonDumpsCall extends Encoding::Range, DataFlow::CfgNode {
override CallNode node;
JsonDumpsCall() { node.getFunction() = json_attr("dumps").asCfgNode() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
// ---------------------------------------------------------------------------
// sqlite3
// ---------------------------------------------------------------------------
/** Gets a reference to the `sqlite3` module. */
private DataFlow::Node sqlite3(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("sqlite3")
or
exists(DataFlow::TypeTracker t2 | result = sqlite3(t2).track(t2, t))
}
/** Gets a reference to the `sqlite3` module. */
DataFlow::Node sqlite3() { result = sqlite3(DataFlow::TypeTracker::end()) }
/**
* sqlite3 implements PEP 249, providing ways to execute SQL statements against a database.
*
* See https://devdocs.io/python~3.9/library/sqlite3
*/
class Sqlite3 extends PEP249Module {
Sqlite3() { this = sqlite3() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -84,8 +84,7 @@ class Value extends TObject {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this
.(ObjectInternal)
this.(ObjectInternal)
.getOrigin()
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting code injection vulnerabilities.
@@ -18,4 +19,8 @@ class CodeInjectionConfiguration extends TaintTracking::Configuration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting command injection vulnerabilities.
@@ -48,4 +49,8 @@ class CommandInjectionConfiguration extends TaintTracking::Configuration {
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}

View File

@@ -31,6 +31,7 @@ import semmle.python.dataflow.new.TaintTracking2
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import ChainedConfigs12
import semmle.python.dataflow.new.BarrierGuards
// ---------------------------------------------------------------------------
// Case 1. The path is never normalized.
@@ -46,6 +47,10 @@ class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}
/**
@@ -68,6 +73,10 @@ class FirstNormalizationConfiguration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
@@ -82,6 +91,8 @@ class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuratio
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof Path::SafeAccessCheck
or
guard instanceof StringConstCompare
}
}

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting reflected server-side cross-site
@@ -24,4 +25,8 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
sink = response.getBody()
)
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting SQL injection vulnerabilities.
@@ -18,4 +19,8 @@ class SQLInjectionConfiguration extends TaintTracking::Configuration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
/**
* A taint-tracking configuration for detecting arbitrary code execution
@@ -24,4 +25,8 @@ class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
sink = d.getAnInput()
)
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
}

View File

@@ -75,8 +75,7 @@ class Object extends @py_object {
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.hasOrigin() and
this
.getOrigin()
this.getOrigin()
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or