Merge branch 'main' of github.com:github/codeql into python-dataflow/init-time

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-10-08 14:55:01 +02:00
1844 changed files with 119822 additions and 12289 deletions

View File

@@ -142,7 +142,7 @@ module API {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -424,13 +424,8 @@ module API {
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
)
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
@@ -445,6 +440,51 @@ module API {
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
@@ -487,6 +527,15 @@ module API {
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
))
)
}
/**

View File

@@ -67,7 +67,7 @@ class CommentBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -1,9 +1,7 @@
import python
/** A file */
class File extends Container {
File() { files(this, _, _, _, _) }
class File extends Container, @file {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
@@ -15,7 +13,7 @@ class File extends Container {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -34,9 +32,7 @@ class File extends Container {
}
/** Gets a short name for this file (just the file name) */
string getShortName() {
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
}
string getShortName() { result = this.getBaseName() }
private int lastLine() {
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
@@ -55,7 +51,7 @@ class File extends Container {
)
}
override string getAbsolutePath() { files(this, result, _, _, _) }
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
@@ -118,21 +114,16 @@ private predicate occupied_line(File f, int n) {
}
/** A folder (directory) */
class Folder extends Container {
Folder() { folders(this, _, _) }
class Folder extends Container, @folder {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/** DEPRECATED: Use `getBaseName` instead. */
deprecated string getSimple() { folders(this, _, result) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -144,7 +135,7 @@ class Folder extends Container {
endcolumn = 0
}
override string getAbsolutePath() { folders(this, result, _) }
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + this.getAbsolutePath() }
@@ -332,7 +323,7 @@ abstract class Container extends @container {
/**
* Gets a URL representing the location of this container.
*
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
*/
abstract string getURL();
@@ -438,7 +429,7 @@ class Location extends @location {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -466,7 +457,7 @@ class Line extends @py_line {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
DefinitionNode() {
exists(Assign a | a.getATarget().getAFlowNode() = this)
or
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
or
exists(Alias a | a.getAsname().getAFlowNode() = this)
or
augstore(_, this)
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
or
/* lhs : annotation = result */
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
or
/* import result as lhs */
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
or
@@ -1106,7 +1111,7 @@ class BasicBlock extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -13,6 +13,7 @@ private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
@@ -20,13 +21,14 @@ private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml

View File

@@ -58,6 +58,7 @@ class Function extends Function_, Scope, AstNode {
/** Gets the name of the nth argument (for simple arguments) */
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
/** Gets the parameter of this function with the name `name`. */
Parameter getArgByName(string name) {
(
result = this.getAnArg()

View File

@@ -9,6 +9,7 @@ class ConditionBlock extends BasicBlock {
}
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
pragma[nomagic]
predicate controls(BasicBlock controlled, boolean testIsTrue) {
/*
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:

View File

@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
override Stmt getASubStatement() { result = this.getAStmt() }
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
override Expr getType() {
result = super.getType() and not result instanceof Tuple
or
result = super.getType().(Tuple).getAnElt()
}
}
/** An assert statement, such as `assert a == b, "A is not equal to b"` */

View File

@@ -28,7 +28,11 @@ private module AlgorithmNames {
name = "SHA256" or
name = "SHA384" or
name = "SHA512" or
name = "SHA3"
name = "SHA3" or
name = "SHA3224" or
name = "SHA3256" or
name = "SHA3384" or
name = "SHA3512"
}
predicate isWeakHashingAlgorithm(string name) {

View File

@@ -923,28 +923,29 @@ private module Stage2 {
ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
class Cc = boolean;
class Cc = CallContext;
class CcCall extends Cc {
CcCall() { this = true }
class CcCall = CallContextCall;
/** Holds if this call context may be `call`. */
predicate matchesCall(DataFlowCall call) { any() }
}
class CcNoCall = CallContextNoCall;
class CcNoCall extends Cc {
CcNoCall() { this = false }
}
Cc ccNone() { result = false }
Cc ccNone() { result instanceof CallContextAny }
private class LocalCc = Unit;
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSiteDispatch(call, c)
then result = TSpecificCall(call)
else result = TSomeCall()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1172,7 +1173,8 @@ private module Stage2 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -1860,7 +1862,8 @@ private module Stage3 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2117,7 +2120,7 @@ private module Stage3 {
private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
exists(AccessPathFront apf |
Stage3::revFlow(node, true, _, apf, config) and
Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config)
Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
)
}
@@ -2136,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2618,7 +2622,8 @@ private module Stage4 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2969,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3244,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3258,24 +3266,16 @@ class PathNode extends TPathNode {
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
private predicate isHidden() {
hiddenNode(this.(PathNodeImpl).getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.(PathNodeImpl).getNodeEx() instanceof TNodeImplicitRead
}
private PathNode getASuccessorIfHidden() {
this.isHidden() and
this.(PathNodeImpl).isHidden() and
result = this.(PathNodeImpl).getASuccessorImpl()
}
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
not this.isHidden() and
not result.isHidden()
not this.(PathNodeImpl).isHidden() and
not result.(PathNodeImpl).isHidden()
}
/** Holds if this node is a source. */
@@ -3287,6 +3287,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -3313,10 +3321,15 @@ abstract private class PathNodeImpl extends PathNode {
}
/** Holds if `n` can reach a sink. */
private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) }
private predicate directReach(PathNode n) {
n instanceof PathNodeSink or directReach(n.getASuccessor())
}
/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) }
/** Holds if `n` can reach a sink or is used in a subpath. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
@@ -3325,12 +3338,14 @@ private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1
*/
module PathGraph {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) }
query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
query predicate subpaths = Subpaths::subpaths/4;
}
/**
@@ -3622,6 +3637,87 @@ private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext c
)
}
private module Subpaths {
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths01(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, apout) and
out.asNode() = kind.getAnOutNode(_)
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, apout) and
ret.getNodeEx() = retnode and
kind = retnode.getKind() and
innercc = ret.getCallContext() and
sc = ret.getSummaryCtx() and
ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
apout = ret.getAp() and
not ret.isHidden()
)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
out.getAp() = apout
)
}
/**
* Holds if `n` can reach a return node in a summarized subpath.
*/
predicate retReach(PathNode n) {
subpaths(_, _, n, _)
or
exists(PathNode mid |
retReach(mid) and
n.getASuccessor() = mid and
not subpaths(_, mid, _, _)
)
}
}
/**
* Holds if data can flow (inter-procedurally) from `source` to `sink`.
*
@@ -3941,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -923,28 +923,29 @@ private module Stage2 {
ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
class Cc = boolean;
class Cc = CallContext;
class CcCall extends Cc {
CcCall() { this = true }
class CcCall = CallContextCall;
/** Holds if this call context may be `call`. */
predicate matchesCall(DataFlowCall call) { any() }
}
class CcNoCall = CallContextNoCall;
class CcNoCall extends Cc {
CcNoCall() { this = false }
}
Cc ccNone() { result = false }
Cc ccNone() { result instanceof CallContextAny }
private class LocalCc = Unit;
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSiteDispatch(call, c)
then result = TSpecificCall(call)
else result = TSomeCall()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1172,7 +1173,8 @@ private module Stage2 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -1860,7 +1862,8 @@ private module Stage3 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2117,7 +2120,7 @@ private module Stage3 {
private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
exists(AccessPathFront apf |
Stage3::revFlow(node, true, _, apf, config) and
Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config)
Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
)
}
@@ -2136,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2618,7 +2622,8 @@ private module Stage4 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2969,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3244,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3258,24 +3266,16 @@ class PathNode extends TPathNode {
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
private predicate isHidden() {
hiddenNode(this.(PathNodeImpl).getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.(PathNodeImpl).getNodeEx() instanceof TNodeImplicitRead
}
private PathNode getASuccessorIfHidden() {
this.isHidden() and
this.(PathNodeImpl).isHidden() and
result = this.(PathNodeImpl).getASuccessorImpl()
}
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
not this.isHidden() and
not result.isHidden()
not this.(PathNodeImpl).isHidden() and
not result.(PathNodeImpl).isHidden()
}
/** Holds if this node is a source. */
@@ -3287,6 +3287,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -3313,10 +3321,15 @@ abstract private class PathNodeImpl extends PathNode {
}
/** Holds if `n` can reach a sink. */
private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) }
private predicate directReach(PathNode n) {
n instanceof PathNodeSink or directReach(n.getASuccessor())
}
/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) }
/** Holds if `n` can reach a sink or is used in a subpath. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
@@ -3325,12 +3338,14 @@ private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1
*/
module PathGraph {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) }
query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
query predicate subpaths = Subpaths::subpaths/4;
}
/**
@@ -3622,6 +3637,87 @@ private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext c
)
}
private module Subpaths {
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths01(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, apout) and
out.asNode() = kind.getAnOutNode(_)
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, apout) and
ret.getNodeEx() = retnode and
kind = retnode.getKind() and
innercc = ret.getCallContext() and
sc = ret.getSummaryCtx() and
ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
apout = ret.getAp() and
not ret.isHidden()
)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
out.getAp() = apout
)
}
/**
* Holds if `n` can reach a return node in a summarized subpath.
*/
predicate retReach(PathNode n) {
subpaths(_, _, n, _)
or
exists(PathNode mid |
retReach(mid) and
n.getASuccessor() = mid and
not subpaths(_, mid, _, _)
)
}
}
/**
* Holds if data can flow (inter-procedurally) from `source` to `sink`.
*
@@ -3941,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -923,28 +923,29 @@ private module Stage2 {
ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
class Cc = boolean;
class Cc = CallContext;
class CcCall extends Cc {
CcCall() { this = true }
class CcCall = CallContextCall;
/** Holds if this call context may be `call`. */
predicate matchesCall(DataFlowCall call) { any() }
}
class CcNoCall = CallContextNoCall;
class CcNoCall extends Cc {
CcNoCall() { this = false }
}
Cc ccNone() { result = false }
Cc ccNone() { result instanceof CallContextAny }
private class LocalCc = Unit;
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSiteDispatch(call, c)
then result = TSpecificCall(call)
else result = TSomeCall()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1172,7 +1173,8 @@ private module Stage2 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -1860,7 +1862,8 @@ private module Stage3 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2117,7 +2120,7 @@ private module Stage3 {
private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
exists(AccessPathFront apf |
Stage3::revFlow(node, true, _, apf, config) and
Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config)
Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
)
}
@@ -2136,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2618,7 +2622,8 @@ private module Stage4 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2969,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3244,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3258,24 +3266,16 @@ class PathNode extends TPathNode {
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
private predicate isHidden() {
hiddenNode(this.(PathNodeImpl).getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.(PathNodeImpl).getNodeEx() instanceof TNodeImplicitRead
}
private PathNode getASuccessorIfHidden() {
this.isHidden() and
this.(PathNodeImpl).isHidden() and
result = this.(PathNodeImpl).getASuccessorImpl()
}
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
not this.isHidden() and
not result.isHidden()
not this.(PathNodeImpl).isHidden() and
not result.(PathNodeImpl).isHidden()
}
/** Holds if this node is a source. */
@@ -3287,6 +3287,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -3313,10 +3321,15 @@ abstract private class PathNodeImpl extends PathNode {
}
/** Holds if `n` can reach a sink. */
private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) }
private predicate directReach(PathNode n) {
n instanceof PathNodeSink or directReach(n.getASuccessor())
}
/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) }
/** Holds if `n` can reach a sink or is used in a subpath. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
@@ -3325,12 +3338,14 @@ private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1
*/
module PathGraph {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) }
query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
query predicate subpaths = Subpaths::subpaths/4;
}
/**
@@ -3622,6 +3637,87 @@ private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext c
)
}
private module Subpaths {
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths01(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, apout) and
out.asNode() = kind.getAnOutNode(_)
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, apout) and
ret.getNodeEx() = retnode and
kind = retnode.getKind() and
innercc = ret.getCallContext() and
sc = ret.getSummaryCtx() and
ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
apout = ret.getAp() and
not ret.isHidden()
)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
out.getAp() = apout
)
}
/**
* Holds if `n` can reach a return node in a summarized subpath.
*/
predicate retReach(PathNode n) {
subpaths(_, _, n, _)
or
exists(PathNode mid |
retReach(mid) and
n.getASuccessor() = mid and
not subpaths(_, mid, _, _)
)
}
}
/**
* Holds if data can flow (inter-procedurally) from `source` to `sink`.
*
@@ -3941,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -923,28 +923,29 @@ private module Stage2 {
ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
class Cc = boolean;
class Cc = CallContext;
class CcCall extends Cc {
CcCall() { this = true }
class CcCall = CallContextCall;
/** Holds if this call context may be `call`. */
predicate matchesCall(DataFlowCall call) { any() }
}
class CcNoCall = CallContextNoCall;
class CcNoCall extends Cc {
CcNoCall() { this = false }
}
Cc ccNone() { result = false }
Cc ccNone() { result instanceof CallContextAny }
private class LocalCc = Unit;
bindingset[call, c, outercc]
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
checkCallContextCall(outercc, call, c) and
if recordDataFlowCallSiteDispatch(call, c)
then result = TSpecificCall(call)
else result = TSomeCall()
}
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
checkCallContextReturn(innercc, c, call) and
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1172,7 +1173,8 @@ private module Stage2 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -1860,7 +1862,8 @@ private module Stage3 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2117,7 +2120,7 @@ private module Stage3 {
private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
exists(AccessPathFront apf |
Stage3::revFlow(node, true, _, apf, config) and
Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config)
Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
)
}
@@ -2136,7 +2139,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config)
) and
accessPathApproxCostLimits(apLimit, tupleLimit) and
apLimit < tails and
tupleLimit < (tails - 1) * nodes
tupleLimit < (tails - 1) * nodes and
not tc.forceHighPrecision()
)
}
@@ -2618,7 +2622,8 @@ private module Stage4 {
fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
pragma[only_bind_into](config)) and
fwdFlowOutFromArg(call, out, argAp0, ap, config) and
fwdFlowIsEntered(call, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), argAp0,
fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
pragma[only_bind_into](config))
)
}
@@ -2969,12 +2974,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
* expected to be expensive. Holds with `unfold = true` otherwise.
*/
private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
if apa.getHead().forceHighPrecision()
then unfold = true
else
exists(int aps, int nodes, int apLimit, int tupleLimit |
aps = countPotentialAps(apa, config) and
nodes = countNodesUsingAccessPath(apa, config) and
accessPathCostLimits(apLimit, tupleLimit) and
if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
)
}
/**
@@ -3244,7 +3252,7 @@ class PathNode extends TPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -3258,24 +3266,16 @@ class PathNode extends TPathNode {
/** Gets the associated configuration. */
Configuration getConfiguration() { none() }
private predicate isHidden() {
hiddenNode(this.(PathNodeImpl).getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.(PathNodeImpl).getNodeEx() instanceof TNodeImplicitRead
}
private PathNode getASuccessorIfHidden() {
this.isHidden() and
this.(PathNodeImpl).isHidden() and
result = this.(PathNodeImpl).getASuccessorImpl()
}
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() {
result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
not this.isHidden() and
not result.isHidden()
not this.(PathNodeImpl).isHidden() and
not result.(PathNodeImpl).isHidden()
}
/** Holds if this node is a source. */
@@ -3287,6 +3287,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
}
private string ppAp() {
this instanceof PathNodeSink and result = ""
or
@@ -3313,10 +3321,15 @@ abstract private class PathNodeImpl extends PathNode {
}
/** Holds if `n` can reach a sink. */
private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) }
private predicate directReach(PathNode n) {
n instanceof PathNodeSink or directReach(n.getASuccessor())
}
/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) }
/** Holds if `n` can reach a sink or is used in a subpath. */
private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
@@ -3325,12 +3338,14 @@ private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1
*/
module PathGraph {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) }
query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
query predicate subpaths = Subpaths::subpaths/4;
}
/**
@@ -3622,6 +3637,87 @@ private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext c
)
}
private module Subpaths {
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths01(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
pathIntoCallable(arg, par, _, innercc, sc, _) and
paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
unbindConf(arg.getConfiguration()))
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
* `kind`, `sc`, `apout`, and `innercc`.
*/
pragma[nomagic]
private predicate subpaths02(
PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
NodeEx out, AccessPath apout
) {
subpaths01(arg, par, sc, innercc, kind, out, apout) and
out.asNode() = kind.getAnOutNode(_)
}
pragma[nomagic]
private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple.
*/
pragma[nomagic]
private predicate subpaths03(
PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
) {
exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
subpaths02(arg, par, sc, innercc, kind, out, apout) and
ret.getNodeEx() = retnode and
kind = retnode.getKind() and
innercc = ret.getCallContext() and
sc = ret.getSummaryCtx() and
ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
apout = ret.getAp() and
not ret.isHidden()
)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
exists(ParamNodeEx p, NodeEx o, AccessPath apout |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out and
subpaths03(arg, p, ret, o, apout) and
par.getNodeEx() = p and
out.getNodeEx() = o and
out.getAp() = apout
)
}
/**
* Holds if `n` can reach a return node in a summarized subpath.
*/
predicate retReach(PathNode n) {
subpaths(_, _, n, _)
or
exists(PathNode mid |
retReach(mid) and
n.getASuccessor() = mid and
not subpaths(_, mid, _, _)
)
}
}
/**
* Holds if data can flow (inter-procedurally) from `source` to `sink`.
*
@@ -3941,7 +4037,7 @@ private module FlowExploration {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -786,13 +786,18 @@ private module Cached {
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
* Holds if the call context `call` improves virtual dispatch in `callable`.
*/
cached
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
reducedViableImplInCallContext(_, callable, call)
or
}
/**
* Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
*/
cached
predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
}
@@ -846,6 +851,15 @@ private module Cached {
TAccessPathFrontSome(AccessPathFront apf)
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
*/
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
recordDataFlowCallSiteDispatch(call, callable) or
recordDataFlowCallSiteUnreachable(call, callable)
}
/**
* A `Node` at which a cast can occur such that the type should be checked.
*/
@@ -1222,6 +1236,13 @@ class TypedContent extends MkTypedContent {
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
/**
* Holds if access paths with this `TypedContent` at their head always should
* be tracked at high precision. This disables adaptive access path precision
* for such access paths.
*/
predicate forceHighPrecision() { forceHighPrecision(c) }
}
/**

View File

@@ -892,6 +892,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
nodeTo = r
)
or
// Default value for parameter flows to that parameter
defaultValueFlowStep(nodeFrom, nodeTo)
}
/**
@@ -1056,6 +1059,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
)
}
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
p = f.getArgByName(_) and
nodeFrom.asExpr() = p.getDefault() and
// The following expresses
// nodeTo.(ParameterNode).getParameter() = p
// without non-monotonic recursion
def.getParameter() = p and
nodeTo.getNode() = def.getDefiningNode()
)
}
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
@@ -1627,6 +1643,12 @@ predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }

View File

@@ -102,7 +102,7 @@ class Node extends TNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
*
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
*
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*/
Node importNode(string name) {
deprecated Node importNode(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and

View File

@@ -384,7 +384,7 @@ abstract class TaintSource extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -498,7 +498,7 @@ abstract class TaintSink extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -0,0 +1,56 @@
/**
* Provides classes modeling security-relevant aspects of the `Flask-SQLAlchemy` PyPI package
* (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import semmle.python.frameworks.SqlAlchemy
/**
* INTERNAL: Do not use.
*
* Provides models for the `Flask-SQLAlchemy` PyPI package (imported by `flask_sqlalchemy`).
* See
* - https://pypi.org/project/Flask-SQLAlchemy/
* - https://flask-sqlalchemy.palletsprojects.com/en/2.x/
*/
private module FlaskSqlAlchemy {
/** Gets an instance of `flask_sqlalchemy.SQLAlchemy` */
private API::Node dbInstance() {
result = API::moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn()
}
/** A call to the `text` method on a DB. */
private class DbTextCall extends SqlAlchemy::TextClause::TextClauseConstruction {
DbTextCall() { this = dbInstance().getMember("text").getACall() }
}
/** Access on a DB resulting in an Engine */
private class DbEngine extends SqlAlchemy::Engine::InstanceSource {
DbEngine() {
this = dbInstance().getMember("engine").getAUse()
or
this = dbInstance().getMember("get_engine").getACall()
}
}
/** Access on a DB resulting in a Session */
private class DbSession extends SqlAlchemy::Session::InstanceSource {
DbSession() {
this = dbInstance().getMember("session").getAUse()
or
this = dbInstance().getMember("create_session").getReturn().getACall()
or
this = dbInstance().getMember("create_session").getReturn().getMember("begin").getACall()
or
this = dbInstance().getMember("create_scoped_session").getACall()
}
}
}

View File

@@ -0,0 +1,344 @@
/**
* Provides classes modeling security-relevant aspects of the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
// This import is done like this to avoid importing the deprecated top-level things that
// would pollute the namespace
private import semmle.python.frameworks.PEP249::PEP249 as PEP249
/**
* INTERNAL: Do not use.
*
* Provides models for the `SQLAlchemy` PyPI package.
* See
* - https://pypi.org/project/SQLAlchemy/
* - https://docs.sqlalchemy.org/en/14/index.html
*/
module SqlAlchemy {
/**
* Provides models for the `sqlalchemy.engine.Engine` and `sqlalchemy.future.Engine` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Engine
*/
module Engine {
/** Gets a reference to a SQLAlchemy Engine class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("engine").getMember("Engine")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Engine")
}
/**
* A source of instances of a SQLAlchemy Engine, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Engine::instance()` to get references to instances of a SQLAlchemy Engine.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class EngineConstruction extends InstanceSource, DataFlow::CallCfgNode {
EngineConstruction() {
this = classRef().getACall()
or
this = API::moduleImport("sqlalchemy").getMember("create_engine").getACall()
or
this =
API::moduleImport("sqlalchemy").getMember("future").getMember("create_engine").getACall()
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Engine. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.engine.base.Connection` and `sqlalchemy.future.Connection` classes.
*
* These are so similar that we model both in the same way.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection
*/
module Connection {
/** Gets a reference to a SQLAlchemy Connection class. */
private API::Node classRef() {
result =
API::moduleImport("sqlalchemy")
.getMember("engine")
.getMember("base")
.getMember("Connection")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Connection")
}
/**
* A source of instances of a SQLAlchemy Connection, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Connection::instance()` to get references to instances of a SQLAlchemy Connection.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class ConnectionConstruction extends InstanceSource, DataFlow::CallCfgNode {
ConnectionConstruction() {
this = classRef().getACall()
or
this.(DataFlow::MethodCallNode).calls(Engine::instance(), ["begin", "connect"])
or
this.(DataFlow::MethodCallNode).calls(instance(), "connect")
or
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
}
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a SQLAlchemy Connection. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the underlying DB-API Connection of a SQLAlchemy Connection.
*
* See https://docs.sqlalchemy.org/en/14/core/connections.html#dbapi-connections.
*/
module DBAPIConnection {
/**
* A source of instances of DB-API Connections, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `DBAPIConnection::instance()` to get references to instances of DB-API Connections.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class DBAPIConnectionSources extends InstanceSource, PEP249::Connection::InstanceSource {
DBAPIConnectionSources() {
this.(DataFlow::MethodCallNode).calls(Engine::instance(), "raw_connection")
or
this.(DataFlow::AttrRead).accesses(Connection::instance(), "connection")
}
}
/** Gets a reference to an instance of DB-API Connections. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of DB-API Connections. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* Provides models for the `sqlalchemy.orm.Session` class
*
* See
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session
* - https://docs.sqlalchemy.org/en/14/orm/session_basics.html
*/
module Session {
/** Gets a reference to the `sqlalchemy.orm.Session` class. */
private API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("orm").getMember("Session")
}
/**
* A source of instances of `sqlalchemy.orm.Session`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Session::instance()` to get references to instances of `sqlalchemy.orm.Session`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class SessionConstruction extends InstanceSource, DataFlow::CallCfgNode {
SessionConstruction() {
this = classRef().getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("sessionmaker")
.getReturn()
.getMember("begin")
.getACall()
}
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `sqlalchemy.orm.Session`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* A call to `execute` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.execute
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execute
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.execute
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.execute
*/
private class SqlAlchemyExecuteCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecuteCall() {
this.calls(Engine::instance(), "execute")
or
this.calls(Connection::instance(), "execute")
or
this.calls(Session::instance(), "execute")
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `exec_driver_sql` on a SQLAlchemy Connection.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.exec_driver_sql
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.exec_driver_sql
*/
private class SqlAlchemyExecDriverSqlCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyExecDriverSqlCall() { this.calls(Connection::instance(), "exec_driver_sql") }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("statement")] }
}
/**
* A call to `scalar` on a SQLAlchemy Engine, Connection, or Session.
* See
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.scalar
* - https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/core/future.html#sqlalchemy.future.Connection.scalar
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar
*/
private class SqlAlchemyScalarCall extends DataFlow::MethodCallNode, SqlExecution::Range {
SqlAlchemyScalarCall() {
this.calls(Engine::instance(), "scalar")
or
this.calls(Connection::instance(), "scalar")
or
this.calls(Session::instance(), "scalar")
}
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName("statement"), this.getArgByName("object_")]
}
}
/**
* Provides models for the `sqlalchemy.sql.expression.TextClause` class,
* which represents a textual SQL string directly.
*
* ```py
* session.query(For14).filter_by(description=sqlalchemy.text(f"'{user_input}'")).all()
* ```
*
* Initially I wanted to add lots of additional taint steps for such that the normal
* SQL injection query would be able to find cases as the one above where an ORM query
* includes a TextClause that includes user-input directly... But that presented 2
* problems:
*
* - which part of the query construction above should be marked as SQL to fit our
* `SqlExecution` concept. Nothing really fits this well, since all the SQL
* execution happens under the hood.
* - This would require a LOT of modeling for these additional taint steps, since
* there are many many constructs we would need to have models for. (see the 2
* examples below)
*
* So instead we extended the SQL injection query to include TextClause construction
* as a sink. And so we don't highlight any parts of an ORM constructed query such as
* these as containing SQL, and don't need the additional taint steps either.
*
* See
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
*/
module TextClause {
/**
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
* textual SQL string directly.
*/
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
/** Gets the argument that specifies the SQL text. */
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
}
/** `TextClause` constructions from the `sqlalchemy` package. */
private class DefaultTextClauseConstruction extends TextClauseConstruction {
DefaultTextClauseConstruction() {
this = API::moduleImport("sqlalchemy").getMember("text").getACall()
or
this = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("text")
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("TextClause")
.getACall()
}
}
}
}

View File

@@ -79,7 +79,7 @@ class Value extends TObject {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -773,15 +773,18 @@ abstract class RegexString extends Expr {
* string is empty.
*/
predicate multiples(int start, int end, string lower, string upper) {
this.getChar(start) = "{" and
this.getChar(end - 1) = "}" and
exists(string inner | inner = this.getText().substring(start + 1, end - 1) |
inner.regexpMatch("[0-9]+") and
exists(string text, string match, string inner |
text = this.getText() and
end = start + match.length() and
inner = match.substring(1, match.length() - 1)
|
match = text.regexpFind("\\{[0-9]+\\}", _, start) and
lower = inner and
upper = lower
or
inner.regexpMatch("[0-9]*,[0-9]*") and
exists(int commaIndex | commaIndex = inner.indexOf(",") |
match = text.regexpFind("\\{[0-9]*,[0-9]*\\}", _, start) and
exists(int commaIndex |
commaIndex = inner.indexOf(",") and
lower = inner.prefix(commaIndex) and
upper = inner.suffix(commaIndex + 1)
)

View File

@@ -46,7 +46,7 @@ class CustomPathNode extends TCustomPathNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.SqlAlchemy
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -48,6 +49,13 @@ module SqlInjection {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
*/
class TextArgAsSink extends Sink {
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -69,7 +69,7 @@ class Object extends @py_object {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -24,7 +24,7 @@ class XMLLocatable extends @xmllocatable, TXMLLocatable {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn