Merge branch 'main' of github.com:github/codeql into python/promote-regex-injection

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-10-12 11:28:12 +02:00
1066 changed files with 87595 additions and 6139 deletions

View File

@@ -142,7 +142,7 @@ module API {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -67,7 +67,7 @@ class CommentBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -13,7 +13,7 @@ class File extends Container, @file {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -123,7 +123,7 @@ class Folder extends Container, @folder {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -323,7 +323,7 @@ abstract class Container extends @container {
/**
* Gets a URL representing the location of this container.
*
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
*/
abstract string getURL();
@@ -429,7 +429,7 @@ class Location extends @location {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -457,7 +457,7 @@ class Line extends @py_line {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -1111,7 +1111,7 @@ class BasicBlock extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -13,6 +13,7 @@ private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
@@ -20,13 +21,14 @@ private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml

View File

@@ -58,6 +58,7 @@ class Function extends Function_, Scope, AstNode {
/** Gets the name of the nth argument (for simple arguments) */
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
/** Gets the parameter of this function with the name `name`. */
Parameter getArgByName(string name) {
(
result = this.getAnArg()

View File

@@ -9,6 +9,7 @@ class ConditionBlock extends BasicBlock {
}
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
pragma[nomagic]
predicate controls(BasicBlock controlled, boolean testIsTrue) {
/*
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:

View File

@@ -28,7 +28,11 @@ private module AlgorithmNames {
name = "SHA256" or
name = "SHA384" or
name = "SHA512" or
name = "SHA3"
name = "SHA3" or
name = "SHA3224" or
name = "SHA3256" or
name = "SHA3384" or
name = "SHA3512"
}
predicate isWeakHashingAlgorithm(string name) {

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -2139,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2973,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3248,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3643,9 +3647,10 @@ private module Subpaths {
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, apout) and
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration()))
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
@@ -4032,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -1236,6 +1236,13 @@ class TypedContent extends MkTypedContent {
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
/**
* Holds if access paths with this `TypedContent` at their head always should
* be tracked at high precision. This disables adaptive access path precision
* for such access paths.
*/
predicate forceHighPrecision() { forceHighPrecision(c) }
}
/**

View File

@@ -200,6 +200,9 @@ module EssaFlow {
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
or
// boolean inline expressions such as `x or y` or `x and y`
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
or
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
@@ -1620,6 +1623,12 @@ predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }

View File

@@ -102,7 +102,7 @@ class Node extends TNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -384,7 +384,7 @@ abstract class TaintSource extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -498,7 +498,7 @@ abstract class TaintSink extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -1,5 +1,5 @@
/**
* Provides classes modeling security-relevant aspects of the 'dill' package.
* Provides classes modeling security-relevant aspects of the `dill` PyPI package.
* See https://pypi.org/project/dill/.
*/
@@ -10,18 +10,41 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
* Provides models for the `dill` PyPI package.
* See https://pypi.org/project/dill/.
*/
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
private module Dill {
/**
* A call to `dill.load`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.load)
*/
private class DillLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadCall() { this = API::moduleImport("dill").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "dill" }
override string getFormat() { result = "dill" }
}
/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
*/
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("str")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "dill" }
}
}

View File

@@ -0,0 +1,56 @@
/**
* Provides classes modeling security-relevant aspects of the `Flask-SQLAlchemy` PyPI package
* (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import semmle.python.frameworks.SqlAlchemy
/**
* INTERNAL: Do not use.
*
* Provides models for the `Flask-SQLAlchemy` PyPI package (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private module FlaskSqlAlchemy {
/** Gets an instance of `flask_sqlalchemy.SQLAlchemy` */
private API::Node dbInstance() {
result = API::moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn()
}
/** A call to the `text` method on a DB. */
private class DbTextCall extends SqlAlchemy::TextClause::TextClauseConstruction {
DbTextCall() { this = dbInstance().getMember("text").getACall() }
}
/** Access on a DB resulting in an Engine */
private class DbEngine extends SqlAlchemy::Engine::InstanceSource {
DbEngine() {
this = dbInstance().getMember("engine").getAUse()
or
this = dbInstance().getMember("get_engine").getACall()
}
}
/** Access on a DB resulting in a Session */
private class DbSession extends SqlAlchemy::Session::InstanceSource {
DbSession() {
this = dbInstance().getMember("session").getAUse()
or
this = dbInstance().getMember("create_session").getReturn().getACall()
or
this = dbInstance().getMember("create_session").getReturn().getMember("begin").getACall()
or
this = dbInstance().getMember("create_scoped_session").getACall()
}
}
}

View File

@@ -0,0 +1,344 @@
/**
* Provides classes modeling security-relevant aspects of the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
// This import is done like this to avoid importing the deprecated top-level things that
// would pollute the namespace
private import semmle.python.frameworks.PEP249::PEP249 as PEP249
/**
* INTERNAL: Do not use.
*
* Provides models for the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
module SqlAlchemy {
/**
* Provides models for the `sqlalchemy.engine.Engine` and `sqlalchemy.future.Engine` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Engine
*/
module Engine {
/** Gets a reference to a SQLAlchemy Engine class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("engine").getMember("Engine")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Engine")
}
/**
* A source of instances of a SQLAlchemy Engine, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Engine::instance()` to get references to instances of a SQLAlchemy Engine.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class EngineConstruction extends InstanceSource, DataFlow::CallCfgNode {
EngineConstruction() {
this = classRef().getACall()
or
this = API::moduleImport("sqlalchemy").getMember("create_engine").getACall()
or
this =
API::moduleImport("sqlalchemy").getMember("future").getMember("create_engine").getACall()
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.engine.base.Connection` and `sqlalchemy.future.Connection` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection
*/
module Connection {
/** Gets a reference to a SQLAlchemy Connection class. */
private API::Node classRef() {
result =
API::moduleImport("sqlalchemy")
.getMember("engine")
.getMember("base")
.getMember("Connection")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Connection")
}
/**
* A source of instances of a SQLAlchemy Connection, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Connection::instance()` to get references to instances of a SQLAlchemy Connection.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class ConnectionConstruction extends InstanceSource, DataFlow::CallCfgNode {
ConnectionConstruction() {
this = classRef().getACall()
or
this.(DataFlow::MethodCallNode).calls(Engine::instance(), ["begin", "connect"])
or
this.(DataFlow::MethodCallNode).calls(instance(), "connect")
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the underlying DB-API Connection of a SQLAlchemy Connection.
*
* See https://docs.sqlalchemy.org/en/14/core/connections.html#dbapi-connections.
*/
module DBAPIConnection {
/**
* A source of instances of DB-API Connections, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `DBAPIConnection::instance()` to get references to instances of DB-API Connections.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class DBAPIConnectionSources extends InstanceSource, PEP249::Connection::InstanceSource {
DBAPIConnectionSources() {
this.(DataFlow::MethodCallNode).calls(Engine::instance(), "raw_connection")
or
this.(DataFlow::AttrRead).accesses(Connection::instance(), "connection")
}
}
/** Gets a reference to an instance of DB-API Connections. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of DB-API Connections. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.orm.Session` class
*
* See
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session
* - https://docs.sqlalchemy.org/en/14/orm/session_basics.html
*/
module Session {
/** Gets a reference to the `sqlalchemy.orm.Session` class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("orm").getMember("Session")
}
/**
* A source of instances of `sqlalchemy.orm.Session`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Session::instance()` to get references to instances of `sqlalchemy.orm.Session`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class SessionConstruction extends InstanceSource, DataFlow::CallCfgNode {
SessionConstruction() {
this = classRef().getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getMember("begin")
.getACall()
}
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* A call to `execute` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.execute
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execute
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.execute
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.execute
*/
private class SqlAlchemyExecuteCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecuteCall() {
this.calls(Engine::instance(), "execute")
or
this.calls(Connection::instance(), "execute")
or
this.calls(Session::instance(), "execute")
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `exec_driver_sql` on a SQLAlchemy Connection.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.exec_driver_sql
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.exec_driver_sql
*/
private class SqlAlchemyExecDriverSqlCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecDriverSqlCall() { this.calls(Connection::instance(), "exec_driver_sql") }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `scalar` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.scalar
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar
*/
private class SqlAlchemyScalarCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyScalarCall() {
this.calls(Engine::instance(), "scalar")
or
this.calls(Connection::instance(), "scalar")
or
this.calls(Session::instance(), "scalar")
}
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName("statement"), this.getArgByName("object_")]
}
}
/**
* Provides models for the `sqlalchemy.sql.expression.TextClause` class,
* which represents a textual SQL string directly.
*
* ```py
* session.query(For14).filter_by(description=sqlalchemy.text(f"'{user_input}'")).all()
* ```
*
* Initially I wanted to add lots of additional taint steps for such that the normal
* SQL injection query would be able to find cases as the one above where an ORM query
* includes a TextClause that includes user-input directly... But that presented 2
* problems:
*
* - which part of the query construction above should be marked as SQL to fit our
* `SqlExecution` concept. Nothing really fits this well, since all the SQL
* execution happens under the hood.
* - This would require a LOT of modeling for these additional taint steps, since
* there are many many constructs we would need to have models for. (see the 2
* examples below)
*
* So instead we extended the SQL injection query to include TextClause construction
* as a sink. And so we don't highlight any parts of an ORM constructed query such as
* these as containing SQL, and don't need the additional taint steps either.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
*/
module TextClause {
/**
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
* textual SQL string directly.
*/
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
/** Gets the argument that specifies the SQL text. */
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
}
/** `TextClause` constructions from the `sqlalchemy` package. */
private class DefaultTextClauseConstruction extends TextClauseConstruction {
DefaultTextClauseConstruction() {
this = API::moduleImport("sqlalchemy").getMember("text").getACall()
or
this = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("text")
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("TextClause")
.getACall()
}
}
}
}

View File

@@ -428,6 +428,22 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// marshal
// ---------------------------------------------------------------------------
/**
* A call to `marshal.load`
* See https://docs.python.org/3/library/marshal.html#marshal.load
*/
private class MarshalLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
MarshalLoadCall() { this = API::moduleImport("marshal").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "marshal" }
}
/**
* A call to `marshal.loads`
* See https://docs.python.org/3/library/marshal.html#marshal.loads
@@ -447,15 +463,23 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// pickle
// ---------------------------------------------------------------------------
/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getAUse() }
/** Gets a reference to any of the `pickle` modules. */
API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) }
/** Provides models for the `pickle` module. */
module pickle {
/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads() {
result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getMember("loads").getAUse()
}
/**
* A call to `pickle.load`
* See https://docs.python.org/3/library/pickle.html#pickle.load
*/
private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadCall() { this = pickle().getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pickle" }
}
/**
@@ -463,11 +487,63 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadsCall() { this.getFunction() = pickle::loads() }
PickleLoadsCall() { this = pickle().getMember("loads").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pickle" }
}
/**
* A construction of a `pickle.Unpickler`
* See https://docs.python.org/3/library/pickle.html#pickle.Unpickler
*/
private class PickleUnpicklerCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleUnpicklerCall() { this = pickle().getMember("Unpickler").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
override DataFlow::Node getOutput() { result = this.getAMethodCall("load") }
override string getFormat() { result = "pickle" }
}
// ---------------------------------------------------------------------------
// shelve
// ---------------------------------------------------------------------------
/**
* A call to `shelve.open`
* See https://docs.python.org/3/library/shelve.html#shelve.open
*
* Claiming there is decoding of the input to `shelve.open` is a bit questionable, since
* it's not the filename, but the contents of the file that is decoded.
*
* However, we definitely want to be able to alert if a user is able to control what
* file is used, since that can lead to code execution (even if that file is free of
* path injection).
*
* So right now the best way we have of modeling this seems to be to treat the filename
* argument as being deserialized...
*/
private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getOutput() { result = this }

View File

@@ -79,7 +79,7 @@ class Value extends TObject {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -46,7 +46,7 @@ class CustomPathNode extends TCustomPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.SqlAlchemy
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -48,6 +49,13 @@ module SqlInjection {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
*/
class TextArgAsSink extends Sink {
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -139,8 +139,6 @@ class RegExpRoot extends RegExpTerm {
predicate isRelevant() {
// there is at least one repetition
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
// there are no lookbehinds
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
// is actually used as a RegExp
isUsedAsRegExp() and
// not excluded for library specific reasons

View File

@@ -69,7 +69,7 @@ class Object extends @py_object {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -24,7 +24,7 @@ class XMLLocatable extends @xmllocatable, TXMLLocatable {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn