Merge branch 'main' into pyMaD

This commit is contained in:
Erik Krogh Kristensen
2022-05-12 14:43:16 +02:00
1508 changed files with 93821 additions and 15450 deletions

View File

@@ -283,7 +283,13 @@ module API {
* you should use `.getMember` on the parent module. For example, for nodes corresponding to the module `foo.bar`,
* use `moduleImport("foo").getMember("bar")`.
*/
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }
Node moduleImport(string m) {
result = Impl::MkModuleImport(m) and
// restrict `moduleImport` so it will never give results for a dotted name. Note
// that we cannot move this logic to the `MkModuleImport` construction, since we
// need the intermediate API graph nodes for the prefixes in `import foo.bar.baz`.
not m.matches("%.%")
}
/** Gets a node corresponding to the built-in with the given name, if any. */
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }

View File

@@ -498,6 +498,65 @@ module XML {
abstract string getName();
}
}
/**
* A kind of XML vulnerability.
*
* See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries
*
* See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing.
*/
class XmlParsingVulnerabilityKind extends string {
XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] }
/**
* Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic
* Blowup'.
*
* While a parser could technically be vulnerable to one and not the other, from our
* point of view the interesting part is that it IS vulnerable to these types of
* attacks, and not so much which one specifically works. In practice I haven't seen
* a parser that is vulnerable to one and not the other.
*/
predicate isXmlBomb() { this = "XML bomb" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
class XmlParsing extends Decoding instanceof XmlParsing::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XmlParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
abstract class Range extends Decoding::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind);
override string getFormat() { result = "XML" }
}
}
}
/** Provides classes for modeling LDAP-related APIs. */
@@ -910,6 +969,76 @@ module HTTP {
abstract DataFlow::Node getValueArg();
}
}
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* in a global manner.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsrfProtectionSetting::Range` instead.
*/
class CsrfProtectionSetting extends DataFlow::Node instanceof CsrfProtectionSetting::Range {
/**
* Gets the boolean value corresponding to if CSRF protection is enabled
* (`true`) or disabled (`false`) by this node.
*/
boolean getVerificationSetting() { result = super.getVerificationSetting() }
}
/** Provides a class for modeling new CSRF protection setting APIs. */
module CsrfProtectionSetting {
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* in a global manner.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CsrfProtectionSetting` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the boolean value corresponding to if CSRF protection is enabled
* (`true`) or disabled (`false`) by this node.
*/
abstract boolean getVerificationSetting();
}
}
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* for a specific part of an application.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsrfLocalProtectionSetting::Range` instead.
*/
class CsrfLocalProtectionSetting extends DataFlow::Node instanceof CsrfLocalProtectionSetting::Range {
/**
* Gets a request handler whose CSRF protection is changed.
*/
Function getRequestHandler() { result = super.getRequestHandler() }
/** Holds if CSRF protection is enabled by this setting */
predicate csrfEnabled() { super.csrfEnabled() }
}
/** Provides a class for modeling new CSRF protection setting APIs. */
module CsrfLocalProtectionSetting {
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* for a specific part of an application.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CsrfLocalProtectionSetting` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets a request handler whose CSRF protection is changed.
*/
abstract Function getRequestHandler();
/** Holds if CSRF protection is enabled by this setting */
abstract predicate csrfEnabled();
}
}
}
/** Provides classes for modeling HTTP clients. */

View File

@@ -363,7 +363,7 @@ class CallNode extends ControlFlowNode {
)
}
/** Gets the flow node corresponding to the nth argument of the call corresponding to this flow node */
/** Gets the flow node corresponding to the n'th positional argument of the call corresponding to this flow node */
ControlFlowNode getArg(int n) {
exists(Call c |
this.getNode() = c and

View File

@@ -53,3 +53,4 @@ private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.Xmltodict

View File

@@ -498,23 +498,35 @@ private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuratio
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
@@ -793,7 +805,7 @@ private module Stage1 {
* by `revFlow`.
*/
pragma[nomagic]
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -891,7 +903,7 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(pragma[only_bind_into](c), pragma[only_bind_into](config)) and
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
@@ -1181,11 +1193,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1646,10 +1673,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1740,7 +1781,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1979,6 +2021,16 @@ private module Stage3 {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1987,7 +2039,12 @@ private module Stage3 {
exists(state) and
exists(config) and
not clear(node, ap, config) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -2452,10 +2509,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3279,10 +3350,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3351,17 +3436,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -4257,6 +4353,12 @@ private module Subpaths {
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4264,15 +4366,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4609,6 +4709,10 @@ private module FlowExploration {
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4625,6 +4729,10 @@ private module FlowExploration {
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()

View File

@@ -498,23 +498,35 @@ private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuratio
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
@@ -793,7 +805,7 @@ private module Stage1 {
* by `revFlow`.
*/
pragma[nomagic]
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -891,7 +903,7 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(pragma[only_bind_into](c), pragma[only_bind_into](config)) and
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
@@ -1181,11 +1193,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1646,10 +1673,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1740,7 +1781,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1979,6 +2021,16 @@ private module Stage3 {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1987,7 +2039,12 @@ private module Stage3 {
exists(state) and
exists(config) and
not clear(node, ap, config) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -2452,10 +2509,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3279,10 +3350,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3351,17 +3436,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -4257,6 +4353,12 @@ private module Subpaths {
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4264,15 +4366,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4609,6 +4709,10 @@ private module FlowExploration {
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4625,6 +4729,10 @@ private module FlowExploration {
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()

View File

@@ -498,23 +498,35 @@ private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuratio
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
@@ -793,7 +805,7 @@ private module Stage1 {
* by `revFlow`.
*/
pragma[nomagic]
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -891,7 +903,7 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(pragma[only_bind_into](c), pragma[only_bind_into](config)) and
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
@@ -1181,11 +1193,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1646,10 +1673,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1740,7 +1781,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1979,6 +2021,16 @@ private module Stage3 {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1987,7 +2039,12 @@ private module Stage3 {
exists(state) and
exists(config) and
not clear(node, ap, config) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -2452,10 +2509,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3279,10 +3350,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3351,17 +3436,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -4257,6 +4353,12 @@ private module Subpaths {
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4264,15 +4366,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4609,6 +4709,10 @@ private module FlowExploration {
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4625,6 +4729,10 @@ private module FlowExploration {
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()

View File

@@ -498,23 +498,35 @@ private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuratio
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
pragma[inline]
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
c = cs.getAReadContent()
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
@@ -793,7 +805,7 @@ private module Stage1 {
* by `revFlow`.
*/
pragma[nomagic]
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -891,7 +903,7 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(pragma[only_bind_into](c), pragma[only_bind_into](config)) and
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
@@ -1181,11 +1193,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1646,10 +1673,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1740,7 +1781,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1979,6 +2021,16 @@ private module Stage3 {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1987,7 +2039,12 @@ private module Stage3 {
exists(state) and
exists(config) and
not clear(node, ap, config) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -2452,10 +2509,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3279,10 +3350,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3351,17 +3436,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -4257,6 +4353,12 @@ private module Subpaths {
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4264,15 +4366,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4609,6 +4709,10 @@ private module FlowExploration {
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4625,6 +4729,10 @@ private module FlowExploration {
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()

View File

@@ -328,6 +328,9 @@ private module Cached {
cached
predicate clearsContentCached(Node n, ContentSet c) { clearsContent(n, c) }
cached
predicate expectsContentCached(Node n, ContentSet c) { expectsContent(n, c) }
cached
predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }

View File

@@ -813,6 +813,12 @@ predicate clearsContent(Node n, Content c) {
attributeClearStep(n, c)
}
/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }
/**
* Holds if values stored inside attribute `c` are cleared at node `n`.
*

View File

@@ -211,7 +211,7 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
*/
Node getFunction() { result.asCfgNode() = node.getFunction() }
/** Gets the data-flow node corresponding to the i'th argument of the call corresponding to this data-flow node */
/** Gets the data-flow node corresponding to the i'th positional argument of the call corresponding to this data-flow node */
Node getArg(int i) { result.asCfgNode() = node.getArg(i) }
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */

View File

@@ -737,6 +737,38 @@ module PrivateDjango {
}
}
/**
* Provides models for the `django.db.models.FileField` class and `ImageField` subclasses.
*
* See
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.FileField
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.ImageField
*/
module FileField {
/** Gets a reference to the `django.db.models.FileField` or the `django.db.models.ImageField` class or any subclass. */
API::Node subclassRef() {
exists(string className | className in ["FileField", "ImageField"] |
// commonly used alias
result =
API::moduleImport("django")
.getMember("db")
.getMember("models")
.getMember(className)
.getASubclass*()
or
// actual class definition
result =
API::moduleImport("django")
.getMember("db")
.getMember("models")
.getMember("fields")
.getMember("files")
.getMember(className)
.getASubclass*()
)
}
}
/**
* Gets a reference to the Manager (django.db.models.Manager) for the django Model `modelClass`,
* accessed by `<modelClass>.objects`.
@@ -2599,6 +2631,36 @@ module PrivateDjango {
}
}
/**
* A parameter that accepts the filename used to upload a file. This is the second
* parameter in functions used for the `upload_to` argument to a `FileField`.
*
* Note that the value this parameter accepts cannot contain a slash. Even when
* forcing the filename to contain a slash when sending the request, django does
* something like `input_filename.split("/")[-1]` (so other special characters still
* allowed). This also means that although the return value from `upload_to` is used
* to construct a path, path injection is not possible.
*
* See
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.FileField.upload_to
* - https://docs.djangoproject.com/en/3.1/topics/http/file-uploads/#handling-uploaded-files-with-a-model
*/
private class DjangoFileFieldUploadToFunctionFilenameParam extends RemoteFlowSource::Range,
DataFlow::ParameterNode {
DjangoFileFieldUploadToFunctionFilenameParam() {
exists(DataFlow::CallCfgNode call, DataFlow::Node uploadToArg, Function func |
this.getParameter() = func.getArg(1) and
call = DjangoImpl::DB::Models::FileField::subclassRef().getACall() and
uploadToArg in [call.getArg(2), call.getArgByName("upload_to")] and
uploadToArg = poorMansFunctionTracker(func)
)
}
override string getSourceType() {
result = "django filename parameter to function used in FileField.upload_to"
}
}
// ---------------------------------------------------------------------------
// django.shortcuts.redirect
// ---------------------------------------------------------------------------
@@ -2676,4 +2738,67 @@ module PrivateDjango {
.getAnImmediateUse()
}
}
// ---------------------------------------------------------------------------
// Settings
// ---------------------------------------------------------------------------
/**
* A custom middleware stack
*/
private class DjangoSettingsMiddlewareStack extends HTTP::Server::CsrfProtectionSetting::Range {
List list;
DjangoSettingsMiddlewareStack() {
this.asExpr() = list and
// we look for an assignment to the `MIDDLEWARE` setting
exists(DataFlow::Node mw |
mw.asVar().getName() = "MIDDLEWARE" and
DataFlow::localFlow(this, mw)
|
// To only include results where CSRF protection matters, we only care about CSRF
// protection when the django authentication middleware is enabled.
// Since an active session cookie is exactly what would allow an attacker to perform
// a CSRF attack.
// Notice that this does not ensure that this is not a FP, since the authentication
// middleware might be unused.
//
// This also strongly implies that `mw` is in fact a Django middleware setting and
// not just a variable named `MIDDLEWARE`.
list.getAnElt().(StrConst).getText() =
"django.contrib.auth.middleware.AuthenticationMiddleware"
)
}
override boolean getVerificationSetting() {
if
list.getAnElt().(StrConst).getText() in [
"django.middleware.csrf.CsrfViewMiddleware",
// see https://github.com/mozilla/django-session-csrf
"session_csrf.CsrfMiddleware"
]
then result = true
else result = false
}
}
private class DjangoCsrfDecorator extends HTTP::Server::CsrfLocalProtectionSetting::Range {
string decoratorName;
Function function;
DjangoCsrfDecorator() {
decoratorName in ["csrf_protect", "csrf_exempt", "requires_csrf_token", "ensure_csrf_cookie"] and
this =
API::moduleImport("django")
.getMember("views")
.getMember("decorators")
.getMember("csrf")
.getMember(decoratorName)
.getAUse() and
this.asExpr() = function.getADecorator()
}
override Function getRequestHandler() { result = function }
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
}
}

View File

@@ -411,21 +411,16 @@ module Flask {
/** An `FileStorage` instance that originates from a flask request. */
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
FlaskRequestFileStorageInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
// things, we can rewrite this.
//
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(DataFlow::AttrRead files | files = request().getMember("files").getAnImmediateUse() |
this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode()
exists(API::Node files | files = request().getMember("files") |
this.asCfgNode().(SubscriptNode).getObject() = files.getAUse().asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(files, "get")
this = files.getMember("get").getACall()
or
exists(DataFlow::MethodCallNode getlistCall | getlistCall.calls(files, "getlist") |
this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode()
)
this.asCfgNode().(SubscriptNode).getObject() =
files.getMember("getlist").getReturn().getAUse().asCfgNode()
)
}
}

View File

@@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs
* - https://lxml.de/tutorial.html
*/
private module Lxml {
// ---------------------------------------------------------------------------
// XPath
// ---------------------------------------------------------------------------
/**
* A class constructor compiling an XPath expression.
*
@@ -57,13 +60,25 @@ private module Lxml {
*/
class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathCall() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
.getMember("xpath")
.getACall()
exists(API::Node parseResult |
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
or
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
// but we don't really have a way to model that nicely.
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
|
this = parseResult.getMember("xpath").getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] }
@@ -85,4 +100,235 @@ private module Lxml {
override string getName() { result = "lxml.etree" }
}
// ---------------------------------------------------------------------------
// Parsing
// ---------------------------------------------------------------------------
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XmlParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XmlParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LxmlParser extends InstanceSource, API::CallNode {
LxmlParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t)
)
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and
not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() =
any(False t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LxmlDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LxmlParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range {
LxmlParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.XMLID`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
string functionName;
LxmlParsing() {
functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and
this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.getParserArg() = XmlParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// Note: for `parseid`/XMLID the result of the call is a tuple with `(root, dict)`, so
// maybe we should not just say that the entire tuple is the decoding output... my
// gut feeling is that THIS instance doesn't matter too much, but that it would be
// nice to be able to do this in general. (this is a problem for both `lxml.etree`
// and `xml.etree`)
result = this
}
}
/**
* A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which
* takes either a filename or a file-like object as argument. To capture the filename
* for path-injection, we have this subclass.
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range {
FileAccessFromLxmlParsing() {
functionName in ["parse", "parseid"]
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
/**
* A call to `lxml.etree.iterparse`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
*/
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
LxmlIterparseCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
kind.isXxe()
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}

View File

@@ -2890,70 +2890,6 @@ private module StdlibPrivate {
override string getKind() { result = Escaping::getRegexKind() }
}
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
}
/**
* An instance of `xml.etree.ElementTree.Element`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
*/
private API::Node elementInstance() {
// parse or go to the root of a tree
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
}
/**
* A call to a find method on a tree or an element will execute an XPath expression.
*/
private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
string methodName;
ElementTreeFindCall() {
methodName in ["find", "findall", "findtext"] and
(
this = elementTreeInstance().getMember(methodName).getACall()
or
this = elementInstance().getMember(methodName).getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
override string getName() { result = "xml.etree" }
}
// ---------------------------------------------------------------------------
// urllib
// ---------------------------------------------------------------------------
@@ -3171,6 +3107,547 @@ private module StdlibPrivate {
result in [this.getArg(0), this.getArgByName("path")]
}
}
// ---------------------------------------------------------------------------
// io
// ---------------------------------------------------------------------------
/**
* Provides models for the `io.StringIO`/`io.BytesIO` classes
*
* See https://docs.python.org/3.10/library/io.html#io.StringIO.
*/
module StringIO {
/** Gets a reference to the `io.StringIO` class. */
private API::Node classRef() {
result = API::moduleImport("io").getMember(["StringIO", "BytesIO"])
}
/**
* A source of instances of `io.StringIO`/`io.BytesIO`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `StringIO::instance()` to get references to instances of `io.StringIO`.
*/
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource { }
/** A direct instantiation of `io.StringIO`/`io.BytesIO`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
DataFlow::Node getInitialValue() {
result = this.getArg(0)
or
// `initial_value` for StringIO, `initial_bytes` for BytesIO
result = this.getArgByName(["initial_value", "initial_bytes"])
}
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Extra taint propagation for `io.StringIO`/`io.BytesIO`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getInitialValue() = nodeFrom
}
}
}
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
}
/**
* An instance of `xml.etree.ElementTree.Element`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
*/
private API::Node elementInstance() {
// parse or go to the root of a tree
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
or
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
}
/**
* A call to a find method on a tree or an element will execute an XPath expression.
*/
private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
string methodName;
ElementTreeFindCall() {
methodName in ["find", "findall", "findtext"] and
(
this = elementTreeInstance().getMember(methodName).getACall()
or
this = elementInstance().getMember(methodName).getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
override string getName() { result = "xml.etree" }
}
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XmlParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XmlParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["XMLParser", "XMLPullParser"])
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XmlEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range {
XmlEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isXmlBomb() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
* - `parse` method on an `xml.etree.ElementTree.ElementTree` instance
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstring
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstringlist
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XML
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLID
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class XmlEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
or
this = elementTreeInstance().getMember("parse").getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// Note: for `XMLID` the result of the call is a tuple with `(root, dict)`, so
// maybe we should not just say that the entire tuple is the decoding output... my
// gut feeling is that THIS instance doesn't matter too much, but that it would be
// nice to be able to do this in general. (this is a problem for both `lxml.etree`
// and `xml.etree`)
result = this
}
}
/**
* A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which
* takes either a filename or a file-like object as argument. To capture the filename
* for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class FileAccessFromXmlEtreeParsing extends XmlEtreeParsing, FileSystemAccess::Range {
FileAccessFromXmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["parse", "iterparse"])
.getACall()
or
this = elementTreeInstance().getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// xml.sax
// ---------------------------------------------------------------------------
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
private class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
API::Node getFeatureArg() { result = this.getParameter(0, "name") }
API::Node getStateArg() { result = this.getParameter(1, "state") }
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
*/
private class XmlSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
XmlSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// always vuln to these
kind.isXmlBomb()
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
override DataFlow::Node getAPathArgument() {
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
result = this.getAnInput()
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XmlSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// always vuln to these
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
}
/**
* A call to `xml.sax.parse`, which takes either a filename or a file-like object as
* argument. To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class FileAccessFromXmlSaxParsing extends XmlSaxParsing, FileSystemAccess::Range {
FileAccessFromXmlSaxParsing() {
this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// xml.dom.*
// ---------------------------------------------------------------------------
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class XmlDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
* `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
* To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class FileAccessFromXmlDomParsing extends XmlDomParsing, FileSystemAccess::Range {
FileAccessFromXmlDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember("parse")
.getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,39 @@
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package.
*
* See
* - https://pypi.org/project/xmltodict/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package
*
* See
* - https://pypi.org/project/xmltodict/
*/
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends API::CallNode, XML::XmlParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
kind.isXmlBomb() and
this.getKeywordParameter("disable_entities").getAValueReachingRhs().asExpr() = any(False f)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -49,7 +49,7 @@ class ObjectInternal extends TObject {
abstract ObjectInternal getClass();
/**
* True if this "object" can be meaningfully analysed to determine the boolean value of
* True if this "object" can be meaningfully analyzed to determine the boolean value of
* equality tests on it.
* For example, `None` or `int` can be, but `int()` or an unknown string cannot.
*/

View File

@@ -70,7 +70,7 @@ abstract class TupleObjectInternal extends SequenceObjectInternal {
override ObjectInternal getClass() { result = ObjectInternal::builtin("tuple") }
/**
* True if this "object" can be meaningfully analysed for
* True if this "object" can be meaningfully analyzed for
* truth or false in comparisons. For example, `None` or `int` can be, but `int()`
* or an unknown string cannot.
*/

View File

@@ -0,0 +1,49 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "XML bomb"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting "XML bomb"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlBomb {
/**
* A data flow source for XML-bomb vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for XML-bomb vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for XML-bomb vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for XML bomb vulnerabilities. */
class RemoteFlowSourceAsSource extends Source {
RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
}
/**
* A call to an XML parser that is vulnerable to XML bombs.
*/
class XmlParsingVulnerableToXmlBomb extends Sink {
XmlParsingVulnerableToXmlBomb() {
exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind |
kind.isXmlBomb() and
parsing.vulnerableTo(kind) and
this = parsing.getAnInput()
)
}
}
}

View File

@@ -0,0 +1,28 @@
/**
* Provides a taint-tracking configuration for detecting "XML bomb" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `Configuration` is needed, otherwise
* `XmlBombCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import XmlBombCustomizations::XmlBomb
/**
* A taint-tracking configuration for detecting "XML bomb" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "XmlBomb" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof Sanitizer
}
}

View File

@@ -0,0 +1,49 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "XML External Entity (XXE)"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting "XML External Entity (XXE)"
* vulnerabilities, as well as extension points for adding your own.
*/
module Xxe {
/**
* A data flow source for XXE vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for XXE vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for XXE vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for XXE vulnerabilities. */
class RemoteFlowSourceAsSource extends Source {
RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
}
/**
* A call to an XML parser that is vulnerable to XXE.
*/
class XmlParsingVulnerableToXxe extends Sink {
XmlParsingVulnerableToXxe() {
exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind |
kind.isXxe() and
parsing.vulnerableTo(kind) and
this = parsing.getAnInput()
)
}
}
}

View File

@@ -0,0 +1,28 @@
/**
* Provides a taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `Configuration` is needed, otherwise
* `XxeCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import XxeCustomizations::Xxe
/**
* A taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xxe" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof Sanitizer
}
}