Merge branch 'main' of https://github.com/github/codeql into python/remove-ssa-nodes-from-dataflow-graph

This commit is contained in:
Rasmus Lerchedahl Petersen
2023-12-04 14:05:40 +01:00
2263 changed files with 98539 additions and 5331 deletions

View File

@@ -0,0 +1,126 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/
private import python
import semmle.python.dataflow.new.DataFlow::DataFlow
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
private import codeql.dataflow.internal.DataFlowImplConsistency
private module Input implements InputSig<PythonDataFlow> {
private import Private
private import Public
predicate argHasPostUpdateExclude(ArgumentNode n) {
// TODO: Implement post-updates for *args, see tests added in https://github.com/github/codeql/pull/14936
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
or
// TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
// For normal parameters that can both be passed as positional arguments or keyword
// arguments, we currently have parameter positions for both cases..
//
// TODO: Figure out how bad breaking this consistency check is
exists(Function func, Parameter param |
c.getScope() = func and
p = parameterNode(param) and
c.getParameter(pos) = p and
param = func.getArg(_) and
param = func.getArgByName(_)
)
}
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
predicate identityLocalStepExclude(Node n) {
not exists(n.getLocation().getFile().getRelativePath())
}
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
// since we can have multiple DataFlowCall for a CallNode (for example if can
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
// argument in the CallNode, we end up violating this consistency check in those
// cases. (see `getCallArg` in DataFlowDispatch.qll)
exists(DataFlowCall other, CallNode cfgCall | other != call |
call.getNode() = cfgCall and
other.getNode() = cfgCall and
isArgumentNode(arg, call, _) and
isArgumentNode(arg, other, _)
)
or
// bound methods that refer to the same self argument.
// Example: In `bm = self.foo; bm(); bm()` both bm() calls use the same `self` as
// the (pos self) argument
exists(AttrRead attr, DataFlowCall other | other != call |
// for simple cases we can track the function back to the attr read but when the
// call appears in the body of a list-comprehension, we can't do that, and simply
// allow it instead.
(
call.getScope() = attr.getScope() and
any(CfgNode n | n.asCfgNode() = call.getNode().(CallNode).getFunction()).getALocalSource() =
attr
or
not exists(call.getScope().(Function).getDefinition()) and
call.getScope().getScope+() = attr.getScope()
) and
(
other.getScope() = attr.getScope() and
any(CfgNode n | n.asCfgNode() = other.getNode().(CallNode).getFunction()).getALocalSource() =
attr
or
not exists(other.getScope().(Function).getDefinition()) and
other.getScope().getScope+() = attr.getScope()
) and
arg = attr.getObject() and
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
arg = other.getArgument(any(ArgumentPosition p | p.isSelf()))
)
or
// `f = getattr(obj, "foo"); f()` where `obj` is used as (pos self) argument for
// `f()` call
exists(DataFlowCall getAttrCall, DataFlowCall methodCall, AttrRead attr |
call in [getAttrCall, methodCall]
|
arg = getAttrCall.getArgument(any(ArgumentPosition p | p.isPositional(0))) and
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf())) and
attr.getObject() = arg and
attr.(CfgNode).getNode() = getAttrCall.getNode()
)
or
// In the code `super(Base, self).foo()` we use `self` as an argument in both the
// super() call (pos 1) and in the .foo() call (pos self).
exists(DataFlowCall superCall, DataFlowCall methodCall | call in [superCall, methodCall] |
exists(superCallTwoArgumentTracker(_, arg)) and
arg = superCall.getArgument(any(ArgumentPosition p | p.isPositional(1))) and
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf()))
)
or
// in the code `def func(self): super().foo(); super.bar()` we use `self` as the
// (pos self) argument in both .foo() and .bar() calls.
exists(Function f, DataFlowCall other | other != call |
exprNode(f.getArg(0)) = arg and
call.getNode().getScope() = f and
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
arg = other.getArgument(any(ArgumentPosition p | p.isSelf())) and
other.getNode().getScope() = f
)
}
}
import MakeConsistency<PythonDataFlow, PythonTaintTracking, Input>

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added taint-flow modeling for regular expressions with `re` module from the standard library.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added modeling of `*args` and `**kwargs` as routed-parameters in request handlers for django/flask/FastAPI/tornado.

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- Added support for tarfile extraction filters as defined in [PEP-706](https://peps.python.org/pep-0706). In particular, calls to `TarFile.extract`, and `TarFile.extractall` are no longer considered to be sinks for the `py/tarslip` query if a sufficiently safe filter is provided.

View File

@@ -928,7 +928,10 @@ module Http {
override Parameter getARoutedParameter() {
result = rs.getARoutedParameter() and
result in [this.getArg(_), this.getArgByName(_)]
result in [
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
this.getKwarg().(Parameter)
]
}
override string getFramework() { result = rs.getFramework() }

View File

@@ -10,6 +10,7 @@ import LocalSources
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.frameworks.data.ModelsAsData
/**
* IPA type for data flow nodes.
@@ -565,6 +566,11 @@ newtype TContent =
or
// Arguments can overflow and end up in the starred parameter tuple.
exists(any(CallNode cn).getArg(index))
or
// since flow summaries might use tuples, we ensure that we at least have valid
// TTupleElementContent for the 0..7 (7 was picked to match `small_tuple` in
// data-flow-private)
index in [0 .. 7]
} or
/** An element of a dictionary under a specific key. */
TDictionaryElementContent(string key) {
@@ -575,7 +581,30 @@ newtype TContent =
/** An element of a dictionary under any key. */
TDictionaryElementAnyContent() or
/** An object attribute. */
TAttributeContent(string attr) { attr = any(Attribute a).getName() }
TAttributeContent(string attr) {
attr = any(Attribute a).getName()
or
// Flow summaries that target attributes rely on a TAttributeContent being
// available. However, since the code above only constructs a TAttributeContent
// based on the attribute names seen in the DB, we can end up in a scenario where
// flow summaries don't work due to missing TAttributeContent. To get around this,
// we need to add the attribute names used by flow summaries. This needs to be done
// both for the summaries written in QL and the ones written in data-extension
// files.
//
// 1) Summaries in QL. Sadly the following code leads to non-monotonic recursion
// name = any(AccessPathToken a).getAnArgument("Attribute")
// instead we use a qltest to alert if we write a new summary in QL that uses an
// attribute -- see
// python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql
attr in ["re", "string", "pattern"]
or
//
// 2) summaries in data-extension files
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) |
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
)
}
/**
* A data-flow value can have associated content.

View File

@@ -2416,7 +2416,10 @@ module PrivateDjango {
// Since we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
result in [this.getArg(_), this.getArgByName(_)] and
result in [
this.getArg(_), this.getArgByName(_), //
this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
] and
not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i))
}
@@ -2452,13 +2455,20 @@ module PrivateDjango {
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
result in [
routeHandler.getArg(_), routeHandler.getArgByName(_), //
routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
] and
not result =
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
)
or
exists(string name |
result = this.getARequestHandler().getArgByName(name) and
(
result = this.getARequestHandler().getKwarg() // TODO: These sources should be modeled as storing content!
or
result = this.getARequestHandler().getArgByName(name)
) and
exists(string match |
match = this.getUrlPattern().regexpFind(pathRoutedParameterRegex(), _, _) and
name = match.regexpCapture(pathRoutedParameterRegex(), 2)
@@ -2475,7 +2485,10 @@ module PrivateDjango {
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
result in [
routeHandler.getArg(_), routeHandler.getArgByName(_), //
routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
] and
not result =
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
)
@@ -2488,13 +2501,22 @@ module PrivateDjango {
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler
.getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 +
regex.getGroupNumber(_, _))
(
// first group will have group number 1
result =
routeHandler
.getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 +
regex.getGroupNumber(_, _))
or
result = routeHandler.getVararg()
)
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
exists(regex.getGroupName(_, _)) and
(
result = routeHandler.getArgByName(regex.getGroupName(_, _))
or
result = routeHandler.getKwarg()
)
)
}
}

View File

@@ -66,6 +66,9 @@ private module FastApi {
result = this.getARequestHandler().getArgByName(_) and
// type-annotated with `Response`
not any(Response::RequestHandlerParam src).asExpr() = result
or
// **kwargs
result = this.getARequestHandler().getKwarg()
}
override DataFlow::Node getUrlPatternArg() {

View File

@@ -279,6 +279,9 @@ module Flask {
name = match.regexpCapture(werkzeug_rule_re(), 4)
)
)
or
// **kwargs
result = this.getARequestHandler().getKwarg()
}
override string getFramework() { result = "Flask" }
@@ -347,6 +350,12 @@ module Flask {
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
result in [this.getArg(_), this.getArgByName(_)] and
not result = this.getArg(0)
or
// *args
result = this.getVararg()
or
// **kwargs
result = this.getKwarg()
}
override string getFramework() { result = "Flask" }

View File

@@ -172,7 +172,10 @@ private module RestFramework {
// Since we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
result in [this.getArg(_), this.getArgByName(_)] and
result in [
this.getArg(_), this.getArgByName(_), //
this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
] and
not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i))
}

View File

@@ -3069,6 +3069,212 @@ private module StdlibPrivate {
override string getName() { result = "re." + method }
}
/**
* A flow summary for compiled regex objects
*
* See https://docs.python.org/3.11/library/re.html#re-objects
*/
class RePatternSummary extends SummarizedCallable {
RePatternSummary() { this = "re.Pattern" }
override DataFlow::CallCfgNode getACall() {
result = API::moduleImport("re").getMember("compile").getACall()
}
override DataFlow::ArgumentNode getACallback() {
result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input in ["Argument[0]", "Argument[pattern:]"] and
output = "ReturnValue.Attribute[pattern]" and
preservesValue = true
}
}
/**
* A flow summary for methods returning a `re.Match` object
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchSummary extends SummarizedCallable {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(string arg |
this = "re.Match" and arg = "Argument[1]"
or
this = "compiled re.Match" and arg = "Argument[0]"
|
input in [arg, "Argument[string:]"] and
(
output = "ReturnValue.Attribute[string]" and
preservesValue = true
or
// indexing such as `match[g]` is the same as `match.group(g)`
// since you can index with both integers and strings, we model it as
// both list element and dictionary... a bit of a hack, but no way to model
// subscript operators directly with flow-summaries :|
output in ["ReturnValue.ListElement", "ReturnValue.DictionaryElementAny"] and
preservesValue = false
)
)
or
// regex pattern
(
this = "re.Match" and input in ["Argument[0]", "Argument[pattern:]"]
or
// for compiled regexes, this it is already stored in the `pattern` attribute
this = "compiled re.Match" and input = "Argument[self].Attribute[pattern]"
) and
output = "ReturnValue.Attribute[re].Attribute[pattern]" and
preservesValue = true
}
}
/**
* A flow summary for methods on a `re.Match` object
*
* See https://docs.python.org/3/library/re.html#re.Match
*/
class ReMatchMethodsSummary extends SummarizedCallable {
string methodName;
ReMatchMethodsSummary() {
this = "re.Match." + methodName and
methodName in ["expand", "group", "groups", "groupdict"]
}
override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
methodName = "expand" and
preservesValue = false and
(
input = "Argument[0]" and output = "ReturnValue"
or
input = "Argument[self].Attribute[string]" and
output = "ReturnValue"
)
or
methodName = "group" and
input = "Argument[self].Attribute[string]" and
output in ["ReturnValue", "ReturnValue.ListElement"] and
preservesValue = false
or
methodName = "groups" and
input = "Argument[self].Attribute[string]" and
output = "ReturnValue.ListElement" and
preservesValue = false
or
methodName = "groupdict" and
input = "Argument[self].Attribute[string]" and
output = "ReturnValue.DictionaryElementAny" and
preservesValue = false
}
}
/**
* A flow summary for `re` methods not returning a `re.Match` object
*
* See https://docs.python.org/3/library/re.html#functions
*/
class ReFunctionsSummary extends SummarizedCallable {
string methodName;
ReFunctionsSummary() {
methodName in ["split", "findall", "finditer", "sub", "subn"] and
this = ["re.", "compiled re."] + methodName
}
override DataFlow::CallCfgNode getACall() {
this = "re." + methodName and
result = API::moduleImport("re").getMember(methodName).getACall()
or
this = "compiled re." + methodName and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(int offset |
// for non-compiled regex the first argument is the pattern, so we need to
// account for this difference
this = "re." + methodName and offset = 0
or
this = "compiled re." + methodName and offset = 1
|
// flow from input string to results
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
preservesValue = false and
input in ["Argument[" + arg + "]", "Argument[string:]"] and
(
methodName in ["split", "findall", "finditer"] and
output = "ReturnValue.ListElement"
or
// TODO: Since we currently model iterables as tainted when their elements
// are, the result of findall, finditer, split needs to be tainted
methodName in ["split", "findall", "finditer"] and
output = "ReturnValue"
or
methodName = "sub" and
output = "ReturnValue"
or
methodName = "subn" and
output = "ReturnValue.TupleElement[0]"
)
)
or
// flow from replacement value for substitution
exists(string argumentSpec |
argumentSpec in ["Argument[" + (1 - offset) + "]", "Argument[repl:]"] and
// `repl` can also be a function
input = [argumentSpec, argumentSpec + ".ReturnValue"]
|
(
methodName = "sub" and output = "ReturnValue"
or
methodName = "subn" and output = "ReturnValue.TupleElement[0]"
) and
preservesValue = false
)
)
}
}
/**
* A call to 're.escape'.
* See https://docs.python.org/3/library/re.html#re.escape

View File

@@ -416,7 +416,10 @@ module Tornado {
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(Function requestHandler | requestHandler = this.getARequestHandler() |
not exists(this.getUrlPattern()) and
result in [requestHandler.getArg(_), requestHandler.getArgByName(_)] and
result in [
requestHandler.getArg(_), requestHandler.getArgByName(_),
requestHandler.getVararg().(Parameter), requestHandler.getKwarg().(Parameter)
] and
not result = requestHandler.getArg(0)
)
or
@@ -429,6 +432,12 @@ module Tornado {
result = requestHandler.getArg(regex.getGroupNumber(_, _))
or
result = requestHandler.getArgByName(regex.getGroupName(_, _))
or
exists(regex.getGroupNumber(_, _)) and
result = requestHandler.getVararg()
or
exists(regex.getGroupName(_, _)) and
result = requestHandler.getKwarg()
)
}
}
@@ -446,7 +455,10 @@ module Tornado {
// Since we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
result in [this.getArg(_), this.getArgByName(_)] and
result in [
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
this.getKwarg().(Parameter)
] and
not result = this.getArg(0)
}

View File

@@ -55,10 +55,38 @@ module TarSlip {
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
}
/**
* Holds if `call` has an unsafe extraction filter, either by default (as the default is unsafe),
* or by being set to an explicitly unsafe value, such as `"fully_trusted"`, or `None`.
*/
private predicate hasUnsafeFilter(API::CallNode call) {
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember(["extract", "extractall"])
.getACall() and
(
exists(Expr filterValue |
filterValue = call.getParameter(4, "filter").getAValueReachingSink().asExpr() and
(
filterValue.(StrConst).getText() = "fully_trusted"
or
filterValue instanceof None
)
)
or
not exists(call.getParameter(4, "filter"))
)
}
/**
* A sink capturing method calls to `extractall`.
*
* For a call to `file.extractall` without arguments, `file` is considered a sink.
* For a call to `file.extractall`, `file` is considered a sink if
*
* - there are no other arguments, or
* - there are other arguments (except `members`), and the extraction filter is unsafe.
*/
class ExtractAllSink extends Sink {
ExtractAllSink() {
@@ -69,8 +97,13 @@ module TarSlip {
.getReturn()
.getMember("extractall")
.getACall() and
not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and
(
not exists(call.getArg(_)) and
not exists(call.getArgByName(_))
or
hasUnsafeFilter(call)
) and
not exists(call.getArgByName("members")) and
this = call.(DataFlow::MethodCallNode).getObject()
)
}
@@ -84,7 +117,8 @@ module TarSlip {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
this = call.getArg(0)
this = call.getArg(0) and
hasUnsafeFilter(call)
)
}
}
@@ -99,7 +133,8 @@ module TarSlip {
.getReturn()
.getMember("extractall")
.getACall() and
this in [call.getArg(0), call.getArgByName("members")]
this in [call.getArg(0), call.getArgByName("members")] and
hasUnsafeFilter(call)
)
}
}

View File

@@ -1,6 +0,0 @@
// TODO: this should be promoted to be a REAL consistency query by being placed in
// `python/ql/consistency-queries`. For for now it resides here.
import python
import semmle.python.dataflow.new.DataFlow::DataFlow
import semmle.python.dataflow.new.internal.DataFlowPrivate
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -5,6 +5,7 @@ import functools
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
SOURCE = "source"
arg = "source"
arg1 = "source1"
arg2 = "source2"
@@ -269,3 +270,68 @@ def test_stararg_mixed():
starargs_mixed(arg1, *args, *empty_args) # $ arg1
args = (arg2, "safe")
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
# ------------------------------------------------------------------------------
# Test updating field of argument
# ------------------------------------------------------------------------------
class MyClass: pass
def kwargsSideEffect(**kwargs):
kwargs["a"].foo = kwargs["b"]
@expects(2)
def test_kwargsSideEffect():
a = MyClass()
kwargs = {"a": a, "b": SOURCE}
kwargsSideEffect(**kwargs)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
kwargsSideEffect(a=a, b=SOURCE)
SINK(a.foo) # $ MISSING: flow
def keywordArgSideEffect(a, b):
a.foo = b
@expects(2)
def test_keywordArgSideEffect():
a = MyClass()
kwargs = {"a": a, "b": SOURCE}
keywordArgSideEffect(**kwargs)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
keywordArgSideEffect(a=a, b=SOURCE)
SINK(a.foo) # $ flow="SOURCE, l:-1 -> a.foo"
def starargsSideEffect(*args):
args[0].foo = args[1]
@expects(2)
def test_starargsSideEffect():
a = MyClass()
args = (a, SOURCE)
starargsSideEffect(*args)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
starargsSideEffect(a, SOURCE)
SINK(a.foo) # $ MISSING: flow
def positionalArgSideEffect(a, b):
a.foo = b
@expects(2)
def test_positionalArgSideEffect():
a = MyClass()
args = (a, SOURCE)
positionalArgSideEffect(*args)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
positionalArgSideEffect(a, SOURCE)
SINK(a.foo) # $ flow="SOURCE, l:-1 -> a.foo"

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -0,0 +1,31 @@
def_count
| 4 |
def
| def_use_flow.py:10:5:10:5 | Essa node definition |
| def_use_flow.py:17:11:17:11 | Essa node definition |
| def_use_flow.py:19:9:19:9 | Essa node definition |
| def_use_flow.py:21:7:21:7 | Essa node definition |
implicit_use_count
| 0 |
implicit_use
source_use_count
| 3 |
source_use
| def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
def_use_edge_count
| 12 |
def_use_edge
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |

View File

@@ -0,0 +1,47 @@
import python
private import semmle.python.dataflow.new.internal.DataFlowPrivate
query int def_count() {
exists(SsaSourceVariable x | x.getName() = "x" |
result = count(EssaNodeDefinition def | def.getSourceVariable() = x)
)
}
query EssaNodeDefinition def() {
exists(SsaSourceVariable x | x.getName() = "x" | result.getSourceVariable() = x)
}
query int implicit_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getAnImplicitUse()))
}
query ControlFlowNode implicit_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getAnImplicitUse())
}
query int source_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getASourceUse()))
}
query ControlFlowNode source_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getASourceUse())
}
query int def_use_edge_count() {
exists(SsaSourceVariable x | x.getName() = "x" |
result =
count(EssaVariable v, NameNode use |
v.getSourceVariable() = x and
use = x.getAUse() and
LocalFlow::defToFirstUse(v, use)
)
)
}
query predicate def_use_edge(EssaVariable v, NameNode use) {
exists(SsaSourceVariable x | x.getName() = "x" |
v.getSourceVariable() = x and
use = x.getAUse() and
LocalFlow::defToFirstUse(v, use)
)
}

View File

@@ -0,0 +1,36 @@
# This test file is inspired by
# `csharp/ql/test/library-tests/dataflow/local/UseUseExplosion.cs`
# but with `n=3` kept small, since we do have the explosion.
cond = ...
# global variables are slightly special,
# so we go into a function scope
def scope():
x = 0
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
x = 1
else:
x = 2
else:
x = 3
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
use(x)
else:
use(x)
else:
use(x)
def use(v):
# this could just be `pass` but we do not want it optimized away.
y = v+2

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -0,0 +1 @@
# an empty file, since we want the test to run on an empty db

View File

@@ -0,0 +1,8 @@
import python
import semmle.python.dataflow.new.FlowSummary
import semmle.python.dataflow.new.internal.FlowSummaryImpl
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
Private::External::invalidSpecComponent(s, c)
}

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.dataflow.new.FlowSummary
import semmle.python.dataflow.new.internal.FlowSummaryImpl
from SummarizedCallable sc, string s, string c, string attr
where
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
Private::External::invalidSpecComponent(s, c) and
c = "Attribute[" + attr + "]"
select "The attribute \"" + attr +
"\" is not a valid TAttributeContent, please add it to the hardcoded list of TAttributeContent in the dataflow library."

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -0,0 +1,36 @@
# This test file is inspired by
# `csharp/ql/test/library-tests/dataflow/local/UseUseExplosion.cs`
# but with `n=3` kept small, since we do have the explosion.
cond = ...
# global variables are slightly special,
# so we go into a function scope
def scope():
x = 0
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
use(x)
else:
use(x)
else:
use(x)
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
use(x)
else:
use(x)
else:
use(x)
def use(v):
# this could just be `pass` but we do not want it optimized away.
y = v+2

View File

@@ -0,0 +1,24 @@
implicit_use_count
| 0 |
implicit_use
source_use_count
| 6 |
source_use
| read_explosion.py:17:15:17:15 | ControlFlowNode for x |
| read_explosion.py:19:13:19:13 | ControlFlowNode for x |
| read_explosion.py:21:11:21:11 | ControlFlowNode for x |
| read_explosion.py:28:15:28:15 | ControlFlowNode for x |
| read_explosion.py:30:13:30:13 | ControlFlowNode for x |
| read_explosion.py:32:11:32:11 | ControlFlowNode for x |
use_use_edge_count
| 9 |
use_use_edge
| read_explosion.py:17:15:17:15 | ControlFlowNode for x | read_explosion.py:28:15:28:15 | ControlFlowNode for x |
| read_explosion.py:17:15:17:15 | ControlFlowNode for x | read_explosion.py:30:13:30:13 | ControlFlowNode for x |
| read_explosion.py:17:15:17:15 | ControlFlowNode for x | read_explosion.py:32:11:32:11 | ControlFlowNode for x |
| read_explosion.py:19:13:19:13 | ControlFlowNode for x | read_explosion.py:28:15:28:15 | ControlFlowNode for x |
| read_explosion.py:19:13:19:13 | ControlFlowNode for x | read_explosion.py:30:13:30:13 | ControlFlowNode for x |
| read_explosion.py:19:13:19:13 | ControlFlowNode for x | read_explosion.py:32:11:32:11 | ControlFlowNode for x |
| read_explosion.py:21:11:21:11 | ControlFlowNode for x | read_explosion.py:28:15:28:15 | ControlFlowNode for x |
| read_explosion.py:21:11:21:11 | ControlFlowNode for x | read_explosion.py:30:13:30:13 | ControlFlowNode for x |
| read_explosion.py:21:11:21:11 | ControlFlowNode for x | read_explosion.py:32:11:32:11 | ControlFlowNode for x |

View File

@@ -0,0 +1,37 @@
import python
private import semmle.python.dataflow.new.internal.DataFlowPrivate
query int implicit_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getAnImplicitUse()))
}
query ControlFlowNode implicit_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getAnImplicitUse())
}
query int source_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getASourceUse()))
}
query ControlFlowNode source_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getASourceUse())
}
query int use_use_edge_count() {
exists(SsaSourceVariable x | x.getName() = "x" |
result =
count(NameNode use1, NameNode use2 |
use1 = x.getAUse() and
use2 = x.getAUse() and
LocalFlow::useToNextUse(use1, use2)
)
)
}
query predicate use_use_edge(NameNode use1, NameNode use2) {
exists(SsaSourceVariable x | x.getName() = "x" |
use1 = x.getAUse() and
use2 = x.getAUse() and
LocalFlow::useToNextUse(use1, use2)
)
}

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,27 +0,0 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -1,12 +1,3 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
parameterCallable
localFlowIsLocal
readStepIsLocal
storeStepIsLocal
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:83:16:83:36 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | testapp/tests.py:84:16:84:43 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
@@ -56,20 +47,3 @@ storeStepIsLocal
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:356:12:356:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/orm_tests.py:294:1:294:29 | [orm-model] Class TestLoad | testapp/orm_tests.py:363:9:363:37 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/tests.py:81:33:81:37 | ControlFlowNode for Str | testapp/orm_form_test.py:6:1:6:28 | [orm-model] Class MyModel | Store step does not preserve enclosing callable. |
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
missingArgumentCall
multipleArgumentCall

View File

@@ -1,2 +0,0 @@
import python
import experimental.dataflow.TestUtil.DataFlowConsistency

View File

@@ -150,3 +150,40 @@ class UnknownViewSubclass(UnknownViewSuperclass):
urlpatterns = [
path("UnknownViewSubclass/", UnknownViewSubclass.as_view()), # $ routeSetup="UnknownViewSubclass/"
]
################################################################################
# Routing to *args and **kwargs
################################################################################
def kwargs_param(request, **kwargs): # $ requestHandler routedParameter=kwargs
ensure_tainted(
kwargs, # $ tainted
kwargs["foo"], # $ tainted
kwargs["bar"] # $ tainted
)
ensure_tainted(request) # $ tainted
def star_args_param(request, *args): # $ requestHandler routedParameter=args
ensure_tainted(
args, # $ tainted
args[0], # $ tainted
args[1], # $ tainted
)
ensure_tainted(request) # $ tainted
def star_args_param_check(request, foo, bar): # $ requestHandler routedParameter=foo routedParameter=bar
ensure_tainted(
foo, # $ tainted
bar, # $ tainted
)
ensure_tainted(request) # $ tainted
urlpatterns = [
path("test-kwargs_param/<foo>/<bar>", kwargs_param), # $ routeSetup="test-kwargs_param/<foo>/<bar>"
re_path("test-star_args_param/([^/]+)/(.+)", star_args_param), # $ routeSetup="test-star_args_param/([^/]+)/(.+)"
re_path("test-star_args_param_check/([^/]+)/(.+)", star_args_param_check), # $ routeSetup="test-star_args_param_check/([^/]+)/(.+)"
]

View File

@@ -0,0 +1,13 @@
storeStepIsLocal
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:21:5:21:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:23:5:23:45 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:24:5:24:55 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:25:5:25:49 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:27:5:27:52 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:28:5:28:46 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:30:5:30:34 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:31:5:31:92 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:34:5:34:34 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:37:5:37:33 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:37:5:37:59 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| SqlExecution.py:16:1:16:25 | [orm-model] Class User | SqlExecution.py:37:5:37:77 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |

View File

@@ -0,0 +1,4 @@
storeStepIsLocal
| testapp/models.py:6:1:6:24 | [orm-model] Class Foo | testapp/views.py:14:16:14:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/models.py:11:1:11:24 | [orm-model] Class Bar | testapp/views.py:19:16:19:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| testapp/models.py:11:1:11:24 | [orm-model] Class Bar | testapp/views.py:23:16:23:32 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |

View File

@@ -1,5 +1,6 @@
from rest_framework.decorators import api_view, parser_classes
from rest_framework.views import APIView
from rest_framework.viewsets import ModelViewSet
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.parsers import JSONParser
@@ -89,7 +90,7 @@ def test_taint(request: Request, routed_param): # $ requestHandler routedParamet
class MyClass(APIView):
def initial(self, request, *args, **kwargs): # $ requestHandler
def initial(self, request, *args, **kwargs): # $ requestHandler routedParameter=kwargs
# this method will be called before processing any request
ensure_tainted(request) # $ tainted
@@ -107,12 +108,31 @@ class MyClass(APIView):
return Response("ok") # $ HttpResponse
# Viewsets
# see https://www.django-rest-framework.org/api-guide/viewsets/
class MyModelViewSet(ModelViewSet):
def retrieve(self, request, routed_param): # $ requestHandler routedParameter=routed_param
ensure_tainted(
request, # $ tainted
request.GET, # $ tainted
request.GET.get("pk"), # $ tainted
request.data # $ tainted
)
ensure_tainted(routed_param) # $ tainted
# same as for standard Django view
ensure_tainted(self.args, self.kwargs) # $ tainted
return Response("retrieve") # $ HttpResponse
# fake setup, you can't actually run this
urlpatterns = [
path("test-taint/<routed_param>", test_taint), # $ routeSetup="test-taint/<routed_param>"
path("ClassView/<routed_param>", MyClass.as_view()), # $ routeSetup="ClassView/<routed_param>"
path("MyModelViewSet/<routed_param>", MyModelViewSet.as_view()) # $ routeSetup="MyModelViewSet/<routed_param>"
]
# tests with no route-setup, but we can still tell that these are using Django REST

View File

@@ -0,0 +1,82 @@
import re
ts = TAINTED_STRING
pat = ... # some pattern
compiled_pat = re.compile(pat)
# see https://docs.python.org/3/library/re.html#functions
ensure_not_tainted(
# returns Match object, which is tested properly below. (note: with the flow summary
# modeling, objects containing tainted values are not themselves tainted).
re.search(pat, ts),
re.match(pat, ts),
re.fullmatch(pat, ts),
compiled_pat.search(ts),
compiled_pat.match(ts),
compiled_pat.fullmatch(ts),
)
# Match object
tainted_match = re.match(pat, ts)
safe_match = re.match(pat, "safe")
ensure_tainted(
tainted_match.expand("Hello \1"), # $ tainted
safe_match.expand(ts), # $ tainted
tainted_match.group(), # $ tainted
tainted_match.group(1, 2), # $ tainted
tainted_match.group(1, 2)[0], # $ tainted
tainted_match[0], # $ tainted
tainted_match["key"], # $ tainted
tainted_match.groups()[0], # $ tainted
tainted_match.groupdict()["key"], # $ tainted
re.match(pat, ts).string, # $ tainted
re.match(ts, "safe").re.pattern, # $ tainted
compiled_pat.match(ts).string, # $ tainted
re.compile(ts).match("safe").re.pattern, # $ tainted
)
ensure_not_tainted(
safe_match.expand("Hello \1"),
safe_match.group(),
re.match(pat, "safe").re,
re.match(pat, "safe").string,
)
ensure_tainted(
# other functions not returning Match objects
re.split(pat, ts), # $ tainted
re.split(pat, ts)[0], # $ tainted
re.findall(pat, ts), # $ tainted
re.findall(pat, ts)[0], # $ tainted
re.finditer(pat, ts), # $ tainted
[x for x in re.finditer(pat, ts)], # $ tainted
re.sub(pat, repl="safe", string=ts), # $ tainted
re.sub(pat, repl=lambda m: ..., string=ts), # $ tainted
re.sub(pat, repl=ts, string="safe"), # $ tainted
re.sub(pat, repl=lambda m: ts, string="safe"), # $ tainted
# same for compiled patterns
compiled_pat.split(ts), # $ tainted
compiled_pat.split(ts)[0], # $ tainted
# ...
# user-controlled compiled pattern
re.compile(ts), # $ tainted
re.compile(ts).pattern, # $ tainted
)
ensure_not_tainted(
re.subn(pat, repl="safe", string=ts),
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
)
ensure_tainted(
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
)

View File

@@ -12,6 +12,15 @@ edges
| tarslip.py:58:1:58:3 | ControlFlowNode for tar | tarslip.py:59:5:59:9 | ControlFlowNode for entry |
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:58:1:58:3 | ControlFlowNode for tar |
| tarslip.py:59:5:59:9 | ControlFlowNode for entry | tarslip.py:61:21:61:25 | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | ControlFlowNode for tar | tarslip.py:91:1:91:3 | ControlFlowNode for tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:90:1:90:3 | ControlFlowNode for tar |
| tarslip.py:94:1:94:3 | ControlFlowNode for tar | tarslip.py:95:5:95:9 | ControlFlowNode for entry |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:94:1:94:3 | ControlFlowNode for tar |
| tarslip.py:95:5:95:9 | ControlFlowNode for entry | tarslip.py:96:17:96:21 | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | ControlFlowNode for tar | tarslip.py:110:1:110:3 | ControlFlowNode for tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:109:1:109:3 | ControlFlowNode for tar |
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | tarslip.py:113:24:113:26 | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:112:1:112:3 | ControlFlowNode for tar |
nodes
| tarslip.py:14:1:14:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
@@ -31,6 +40,19 @@ nodes
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:59:5:59:9 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:94:1:94:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:95:5:95:9 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
subpaths
#select
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
@@ -38,3 +60,7 @@ subpaths
| tarslip.py:39:17:39:21 | ControlFlowNode for entry | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | tarslip.py:39:17:39:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:43:24:43:26 | ControlFlowNode for tar | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | tarslip.py:43:24:43:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:61:21:61:25 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:91:1:91:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |

View File

@@ -82,3 +82,35 @@ tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
if not os.path.isabs(entry.name):
tar.extract(entry, "/tmp/unpack/")
# Extraction filters
extraction_filter = "fully_trusted"
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe
tar.close()
tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # unsafe
extraction_filter = "data"
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # safe
tar.close()
tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # safe
extraction_filter = None
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=tar, filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter

View File

@@ -0,0 +1,6 @@
storeStepIsLocal
| sql_injection.py:10:1:10:25 | [orm-model] Class User | sql_injection.py:18:9:18:80 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| sql_injection.py:10:1:10:25 | [orm-model] Class User | sql_injection.py:24:9:24:97 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| sql_injection.py:10:1:10:25 | [orm-model] Class User | sql_injection.py:25:9:25:84 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| sql_injection.py:10:1:10:25 | [orm-model] Class User | sql_injection.py:26:9:26:86 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |
| sql_injection.py:10:1:10:25 | [orm-model] Class User | sql_injection.py:40:9:40:82 | ControlFlowNode for Attribute() | Store step does not preserve enclosing callable. |