Merge branch 'main' into python/captured-variables-for-typetracking

This commit is contained in:
yoff
2023-05-04 13:52:23 +02:00
committed by GitHub
775 changed files with 24194 additions and 11457 deletions

View File

@@ -1,3 +1,9 @@
## 0.9.1
### Minor Analysis Improvements
* Added support for querying the contents of YAML files.
## 0.9.0
### Deprecated APIs

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added more content-flow/field-flow for dictionaries, by adding support for reads through `mydict.get("key")` and `mydict.setdefault("key", value)`, and store steps through `dict["key"] = value` and `mydict.setdefault("key", value)`.

View File

@@ -1,4 +1,5 @@
---
category: minorAnalysis
---
## 0.9.1
### Minor Analysis Improvements
* Added support for querying the contents of YAML files.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.9.0
lastReleaseVersion: 0.9.1

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.9.1-dev
version: 0.9.2-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
@@ -12,3 +12,4 @@ dependencies:
codeql/yaml: ${workspace}
dataExtensions:
- semmle/python/frameworks/**/model.yml
warnOnImplicitThis: true

View File

@@ -51,7 +51,7 @@ private CryptographicAlgorithm getBestAlgorithmForName(string name) {
*/
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/** Gets a textual representation of this element. */
string toString() { result = getName() }
string toString() { result = this.getName() }
/**
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).

View File

@@ -815,24 +815,20 @@ private module Cached {
)
}
private predicate store(
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
exists(ContentSet cs |
c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a direct assignment to
* `f`.
* `c`.
*
* This includes reverse steps through reads when the result of the read has
* been stored into, in order to handle cases like `x.f1.f2 = y`.
*/
cached
predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
predicate store(
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
exists(ContentSet cs |
c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType)
)
}
/**
@@ -932,36 +928,15 @@ private module Cached {
TReturnCtxNoFlowThrough() or
TReturnCtxMaybeFlowThrough(ReturnPosition pos)
cached
newtype TTypedContentApprox =
MkTypedContentApprox(ContentApprox c, DataFlowType t) {
exists(Content cont |
c = getContentApprox(cont) and
store(_, cont, _, _, t)
)
}
cached
newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
cached
TypedContent getATypedContent(TypedContentApprox c) {
exists(ContentApprox cls, DataFlowType t, Content cont |
c = MkTypedContentApprox(cls, pragma[only_bind_into](t)) and
result = MkTypedContent(cont, pragma[only_bind_into](t)) and
cls = getContentApprox(cont)
)
}
cached
newtype TAccessPathFront =
TFrontNil(DataFlowType t) or
TFrontHead(TypedContent tc)
TFrontNil() or
TFrontHead(Content c)
cached
newtype TApproxAccessPathFront =
TApproxFrontNil(DataFlowType t) or
TApproxFrontHead(TypedContentApprox tc)
TApproxFrontNil() or
TApproxFrontHead(ContentApprox c)
cached
newtype TAccessPathFrontOption =
@@ -986,8 +961,16 @@ predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
/**
* A `Node` at which a cast can occur such that the type should be checked.
*/
class CastingNode extends Node {
class CastingNode instanceof Node {
CastingNode() { castingNode(this) }
string toString() { result = super.toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
private predicate readStepWithTypes(
@@ -1135,9 +1118,17 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParamNode extends Node {
class ParamNode instanceof Node {
ParamNode() { parameterNode(this, _, _) }
string toString() { result = super.toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/**
* Holds if this node is the parameter of callable `c` at the specified
* position.
@@ -1146,9 +1137,17 @@ class ParamNode extends Node {
}
/** A data-flow node that represents a call argument. */
class ArgNode extends Node {
class ArgNode instanceof Node {
ArgNode() { argumentNode(this, _, _) }
string toString() { result = super.toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
argumentNode(this, call, pos)
@@ -1159,9 +1158,17 @@ class ArgNode extends Node {
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
class ReturnNodeExt extends Node {
class ReturnNodeExt instanceof Node {
ReturnNodeExt() { returnNodeExt(this, _) }
string toString() { result = super.toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the kind of this returned value. */
ReturnKindExt getKind() { returnNodeExt(this, result) }
}
@@ -1170,8 +1177,16 @@ class ReturnNodeExt extends Node {
* A node to which data can flow from a call. Either an ordinary out node
* or a post-update node associated with a call argument.
*/
class OutNodeExt extends Node {
class OutNodeExt instanceof Node {
OutNodeExt() { outNodeExt(this) }
string toString() { result = super.toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
@@ -1387,67 +1402,37 @@ class ReturnCtx extends TReturnCtx {
}
}
/** An approximated `Content` tagged with the type of a containing object. */
class TypedContentApprox extends MkTypedContentApprox {
private ContentApprox c;
private DataFlowType t;
TypedContentApprox() { this = MkTypedContentApprox(c, t) }
/** Gets a typed content approximated by this value. */
TypedContent getATypedContent() { result = getATypedContent(this) }
/** Gets the content. */
ContentApprox getContent() { result = c }
/** Gets the container type. */
DataFlowType getContainerType() { result = t }
/** Gets a textual representation of this approximated content. */
string toString() { result = c.toString() }
}
/**
* The front of an approximated access path. This is either a head or a nil.
*/
abstract class ApproxAccessPathFront extends TApproxAccessPathFront {
abstract string toString();
abstract DataFlowType getType();
abstract boolean toBoolNonEmpty();
TypedContentApprox getHead() { this = TApproxFrontHead(result) }
ContentApprox getHead() { this = TApproxFrontHead(result) }
pragma[nomagic]
TypedContent getAHead() {
exists(TypedContentApprox cont |
Content getAHead() {
exists(ContentApprox cont |
this = TApproxFrontHead(cont) and
result = cont.getATypedContent()
cont = getContentApprox(result)
)
}
}
class ApproxAccessPathFrontNil extends ApproxAccessPathFront, TApproxFrontNil {
private DataFlowType t;
ApproxAccessPathFrontNil() { this = TApproxFrontNil(t) }
override string toString() { result = ppReprType(t) }
override DataFlowType getType() { result = t }
override string toString() { result = "nil" }
override boolean toBoolNonEmpty() { result = false }
}
class ApproxAccessPathFrontHead extends ApproxAccessPathFront, TApproxFrontHead {
private TypedContentApprox tc;
private ContentApprox c;
ApproxAccessPathFrontHead() { this = TApproxFrontHead(tc) }
ApproxAccessPathFrontHead() { this = TApproxFrontHead(c) }
override string toString() { result = tc.toString() }
override DataFlowType getType() { result = tc.getContainerType() }
override string toString() { result = c.toString() }
override boolean toBoolNonEmpty() { result = true }
}
@@ -1461,65 +1446,31 @@ class ApproxAccessPathFrontOption extends TApproxAccessPathFrontOption {
}
}
/** A `Content` tagged with the type of a containing object. */
class TypedContent extends MkTypedContent {
private Content c;
private DataFlowType t;
TypedContent() { this = MkTypedContent(c, t) }
/** Gets the content. */
Content getContent() { result = c }
/** Gets the container type. */
DataFlowType getContainerType() { result = t }
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
/**
* Holds if access paths with this `TypedContent` at their head always should
* be tracked at high precision. This disables adaptive access path precision
* for such access paths.
*/
predicate forceHighPrecision() { forceHighPrecision(c) }
}
/**
* The front of an access path. This is either a head or a nil.
*/
abstract class AccessPathFront extends TAccessPathFront {
abstract string toString();
abstract DataFlowType getType();
abstract ApproxAccessPathFront toApprox();
TypedContent getHead() { this = TFrontHead(result) }
Content getHead() { this = TFrontHead(result) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
private DataFlowType t;
override string toString() { result = "nil" }
AccessPathFrontNil() { this = TFrontNil(t) }
override string toString() { result = ppReprType(t) }
override DataFlowType getType() { result = t }
override ApproxAccessPathFront toApprox() { result = TApproxFrontNil(t) }
override ApproxAccessPathFront toApprox() { result = TApproxFrontNil() }
}
class AccessPathFrontHead extends AccessPathFront, TFrontHead {
private TypedContent tc;
private Content c;
AccessPathFrontHead() { this = TFrontHead(tc) }
AccessPathFrontHead() { this = TFrontHead(c) }
override string toString() { result = tc.toString() }
override string toString() { result = c.toString() }
override DataFlowType getType() { result = tc.getContainerType() }
override ApproxAccessPathFront toApprox() { result.getAHead() = tc }
override ApproxAccessPathFront toApprox() { result.getAHead() = c }
}
/** An optional access path front. */

View File

@@ -588,6 +588,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
dictStoreStep(nodeFrom, c, nodeTo)
or
moreDictStoreSteps(nodeFrom, c, nodeTo)
or
comprehensionStoreStep(nodeFrom, c, nodeTo)
or
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
@@ -688,19 +690,48 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
}
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
// Dictionary
// `{..., "key" = 42, ...}`
// nodeFrom is `42`, cfg node
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
// c denotes element of dictionary and the key `"key"`
exists(KeyValuePair item |
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
nodeFrom.getNode().getNode() = item.getValue() and
c.getKey() = item.getKey().(StrConst).getS()
)
}
/**
* This has been made private since `dictStoreStep` is used by taint-tracking, and
* adding these extra steps made some alerts very noisy.
*
* TODO: Once TaintTracking no longer uses `dictStoreStep`, unify the two predicates.
*/
private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(SubscriptNode subscript |
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
or
// see https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeTo.(PostUpdateNode).getPreUpdateNode(), "setdefault") and
call.getArg(0).asExpr().(StrConst).getText() = c.getKey() and
nodeFrom = call.getArg(1)
)
}
predicate dictClearStep(Node node, DictionaryElementContent c) {
exists(SubscriptNode subscript |
subscript instanceof DefinitionNode and
node.asCfgNode() = subscript.getObject() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
}
/** Data flows from an element expression in a comprehension to the comprehension. */
predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// Comprehension
@@ -761,6 +792,8 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
dictReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
@@ -799,6 +832,17 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
)
}
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// see
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeFrom, ["get", "setdefault"]) and
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
nodeTo = call
)
}
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
@@ -873,6 +917,8 @@ predicate clearsContent(Node n, Content c) {
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
or
dictSplatParameterNodeClearStep(n, c)
@@ -928,6 +974,8 @@ predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) {
n instanceof ModuleVariableNode
or
n instanceof SummaryNode
or
n instanceof SummaryParameterNode

View File

@@ -407,7 +407,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
}
/** Gets the module in which this variable appears. */

View File

@@ -3795,6 +3795,30 @@ private module StdlibPrivate {
preservesValue = true
}
}
/**
* A flow summary for `dict.setdefault`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
*/
class DictSetdefaultSummary extends SummarizedCallable {
DictSetdefaultSummary() { this = "dict.setdefault" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).calls(_, "setdefault")
}
override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "setdefault"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// store/read steps with dictionary content of this is modeled in DataFlowPrivate
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -55,16 +55,16 @@ deprecated class CustomPathNode extends TCustomPathNode {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a textual representation of this element. */
string toString() {
result = asNode1().toString()
result = this.asNode1().toString()
or
result = asNode2().toString()
result = this.asNode2().toString()
}
}