diff --git a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll index ffe5e4ec29f..42d7e89f20b 100644 --- a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll +++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll @@ -10,6 +10,7 @@ private import DataFlowImplCommon private import DataFlowImplSpecific::Private import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic /** * A configuration of interprocedural data flow analysis. This defines @@ -94,6 +95,22 @@ abstract class Configuration extends string { */ int fieldFlowBranchLimit() { result = 2 } + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + /** * Holds if data may flow from `source` to `sink` for this configuration. */ @@ -244,6 +261,8 @@ private class ParamNodeEx extends NodeEx { } int getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } } private class RetNodeEx extends NodeEx { @@ -270,6 +289,7 @@ private predicate outBarrier(NodeEx node, Configuration config) { ) } +pragma[nomagic] private predicate fullBarrier(NodeEx node, Configuration config) { exists(Node n | node.asNode() = n | config.isBarrier(n) @@ -288,11 +308,23 @@ private predicate fullBarrier(NodeEx node, Configuration config) { } pragma[nomagic] -private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) } +private predicate sourceNode(NodeEx node, Configuration config) { + config.isSource(node.asNode()) and + not fullBarrier(node, config) +} pragma[nomagic] private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) } +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + /** * Holds if data can flow in one local step from `node1` to `node2`. */ @@ -301,16 +333,14 @@ private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config node1.asNode() = n1 and node2.asNode() = n2 and simpleLocalFlowStepExt(n1, n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) ) or exists(Node n | config.allowImplicitRead(n, _) and node1.asNode() = n and - node2.isImplicitReadNode(n, false) + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) ) } @@ -323,16 +353,14 @@ private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configurat node2.asNode() = n2 and config.isAdditionalFlowStep(n1, n2) and getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) ) or exists(Node n | config.allowImplicitRead(n, _) and node1.isImplicitReadNode(n, true) and - node2.asNode() = n + node2.asNode() = n and + not fullBarrier(node2, config) ) } @@ -344,10 +372,8 @@ private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { node1.asNode() = n1 and node2.asNode() = n2 and jumpStepCached(n1, n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) } @@ -360,15 +386,14 @@ private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration c node2.asNode() = n2 and config.isAdditionalFlowStep(n1, n2) and getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) } private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { - read(node1.asNode(), c, node2.asNode()) + read(node1.asNode(), c, node2.asNode()) and + stepFilter(node1, node2, config) or exists(Node n | node2.isImplicitReadNode(n, true) and @@ -381,7 +406,8 @@ private predicate store( NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config ) { store(node1.asNode(), tc, node2.asNode(), contentType) and - read(_, tc.getContent(), _, config) + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) } pragma[nomagic] @@ -399,6 +425,20 @@ private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx a */ private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + private module Stage1 { class ApApprox = Unit; @@ -416,63 +456,59 @@ private module Stage1 { * argument in a call. */ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { - not fullBarrier(node, config) and - ( - sourceNode(node, config) and + sourceNode(node, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and cc = false or - exists(NodeEx mid | - fwdFlow(mid, cc, config) and - localFlowStep(mid, node, config) - ) - or - exists(NodeEx mid | - fwdFlow(mid, cc, config) and - additionalLocalFlowStep(mid, node, config) - ) - or - exists(NodeEx mid | - fwdFlow(mid, _, config) and - jumpStep(mid, node, config) and - cc = false - ) - or - exists(NodeEx mid | - fwdFlow(mid, _, config) and - additionalJumpStep(mid, node, config) and - cc = false - ) - or - // store - exists(NodeEx mid | - useFieldFlow(config) and - fwdFlow(mid, cc, config) and - store(mid, _, node, _, config) and - not outBarrier(mid, config) - ) - or - // read - exists(Content c | - fwdFlowRead(c, node, cc, config) and - fwdFlowConsCand(c, config) and - not inBarrier(node, config) - ) - or - // flow into a callable - exists(NodeEx arg | - fwdFlow(arg, _, config) and - viableParamArgEx(_, node, arg) and - cc = true - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, false, config) and - cc = false - or - fwdFlowOutFromArg(call, node, config) and - fwdFlowIsEntered(call, cc, config) - ) + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) ) } @@ -512,7 +548,8 @@ private module Stage1 { private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { exists(ReturnPosition pos | fwdFlowReturnPosition(pos, cc, config) and - viableReturnPosOutEx(call, pos, out) + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) ) } @@ -549,7 +586,7 @@ private module Stage1 { private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { fwdFlow(node, config) and sinkNode(node, config) and - toReturn = false + if hasSinkCallCtx(config) then toReturn = true else toReturn = false or exists(NodeEx mid | localFlowStep(node, mid, config) and @@ -738,14 +775,19 @@ private module Stage1 { * Holds if flow may enter through `p` and reach a return node making `p` a * candidate for the origin of a summary. */ + pragma[nomagic] predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { exists(ReturnKindExt kind | throughFlowNodeCand(p, config) and returnFlowCallableNodeCand(c, kind, config) and p.getEnclosingCallable() = c and exists(ap) and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) ) } @@ -931,6 +973,8 @@ private module Stage2 { Cc ccNone() { result instanceof CallContextAny } + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + private class LocalCc = Unit; bindingset[call, c, outercc] @@ -968,6 +1012,9 @@ private module Stage2 { private predicate flowIntoCall = flowIntoCallNodeCand1/5; + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { any() } + bindingset[ap, contentType] private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } @@ -976,14 +1023,23 @@ private module Stage2 { PrevStage::revFlow(node, _, _, apa, config) } + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -996,9 +1052,16 @@ private module Stage2 { */ pragma[nomagic] predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -1066,7 +1129,7 @@ private module Stage2 { ) { exists(DataFlowType contentType | fwdFlow(node1, cc, argAp, ap1, config) and - PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and typecheckStore(ap1, contentType) ) } @@ -1101,9 +1164,8 @@ private module Stage2 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1118,9 +1180,8 @@ private module Stage2 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1130,10 +1191,8 @@ private module Stage2 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1147,7 +1206,7 @@ private module Stage2 { ) { exists(ParamNodeEx p | fwdFlowIn(call, p, cc, _, argAp, ap, config) and - PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) ) } @@ -1189,6 +1248,11 @@ private module Stage2 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -1209,7 +1273,7 @@ private module Stage2 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -1265,7 +1329,7 @@ private module Stage2 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -1300,9 +1364,8 @@ private module Stage2 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1312,9 +1375,8 @@ private module Stage2 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1324,9 +1386,8 @@ private module Stage2 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1394,8 +1455,12 @@ private module Stage2 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -1606,6 +1671,8 @@ private module Stage3 { Cc ccNone() { result = false } + CcCall ccSomeCall() { result = true } + private class LocalCc = Unit; bindingset[call, c, outercc] @@ -1660,12 +1727,14 @@ private module Stage3 { pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -1687,7 +1756,7 @@ private module Stage3 { private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -1790,9 +1859,8 @@ private module Stage3 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1807,9 +1875,8 @@ private module Stage3 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1819,10 +1886,8 @@ private module Stage3 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1878,6 +1943,11 @@ private module Stage3 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -1898,7 +1968,7 @@ private module Stage3 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -1954,7 +2024,7 @@ private module Stage3 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -1989,9 +2059,8 @@ private module Stage3 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2001,9 +2070,8 @@ private module Stage3 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2013,9 +2081,8 @@ private module Stage3 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2083,8 +2150,12 @@ private module Stage3 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -2139,7 +2210,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) ) and accessPathApproxCostLimits(apLimit, tupleLimit) and apLimit < tails and - tupleLimit < (tails - 1) * nodes + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() ) } @@ -2351,6 +2423,8 @@ private module Stage4 { Cc ccNone() { result instanceof CallContextAny } + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + private class LocalCc = LocalCallContext; bindingset[call, c, outercc] @@ -2419,12 +2493,14 @@ private module Stage4 { pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -2446,7 +2522,7 @@ private module Stage4 { private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -2549,9 +2625,8 @@ private module Stage4 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2566,9 +2641,8 @@ private module Stage4 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2578,10 +2652,8 @@ private module Stage4 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2637,6 +2709,11 @@ private module Stage4 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -2657,7 +2734,7 @@ private module Stage4 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -2713,7 +2790,7 @@ private module Stage4 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -2748,9 +2825,8 @@ private module Stage4 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2760,9 +2836,8 @@ private module Stage4 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2772,9 +2847,8 @@ private module Stage4 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2842,8 +2916,12 @@ private module Stage4 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -2916,6 +2994,8 @@ private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { int getParameterPos() { p.isParameterOf(_, result) } + ParamNodeEx getParamNode() { result = p } + override string toString() { result = p + ": " + ap } predicate hasLocationInfo( @@ -2973,12 +3053,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { * expected to be expensive. Holds with `unfold = true` otherwise. */ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { - exists(int aps, int nodes, int apLimit, int tupleLimit | - aps = countPotentialAps(apa, config) and - nodes = countNodesUsingAccessPath(apa, config) and - accessPathCostLimits(apLimit, tupleLimit) and - if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true - ) + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) } /** @@ -3040,7 +3123,11 @@ private newtype TPathNode = // A PathNode is introduced by a source ... Stage4::revFlow(node, config) and sourceNode(node, config) and - cc instanceof CallContextAny and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and sc instanceof SummaryCtxNone and ap = TAccessPathNil(node.getDataFlowType()) or @@ -3052,17 +3139,10 @@ private newtype TPathNode = ) } or TPathNodeSink(NodeEx node, Configuration config) { - sinkNode(node, pragma[only_bind_into](config)) and - Stage4::revFlow(node, pragma[only_bind_into](config)) and - ( - // A sink that is also a source ... - sourceNode(node, config) - or - // ... or a sink that can be reached from a source - exists(PathNodeMid mid | - pathStep(mid, node, _, _, TAccessPathNil(_)) and - pragma[only_bind_into](config) = mid.getConfiguration() - ) + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + config = sink.getConfiguration() ) } @@ -3379,22 +3459,46 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid { // an intermediate step to another intermediate node result = this.getSuccMid() or - // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges - exists(PathNodeMid mid, PathNodeSink sink | - mid = this.getSuccMid() and - mid.getNodeEx() = sink.getNodeEx() and - mid.getAp() instanceof AccessPathNil and - sink.getConfiguration() = unbindConf(mid.getConfiguration()) and - result = sink - ) + // a final step to a sink + result = this.getSuccMid().projectToSink() } override predicate isSource() { sourceNode(node, config) and - cc instanceof CallContextAny and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and sc instanceof SummaryCtxNone and ap instanceof AccessPathNil } + + predicate isAtSink() { + sinkNode(node, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getConfiguration() = unbindConf(config) + } } /** @@ -3458,7 +3562,7 @@ private predicate pathStep( exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and sc = mid.getSummaryCtx() or - pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + pathIntoCallable(mid, node, _, cc, sc, _, _) and ap = mid.getAp() or pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone or @@ -3535,18 +3639,20 @@ private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) */ pragma[noinline] private predicate pathIntoArg( - PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa, + Configuration config ) { exists(ArgNode arg | arg = mid.getNodeEx().asNode() and cc = mid.getCallContext() and arg.argumentOf(call, i) and ap = mid.getAp() and - apa = ap.getApprox() + apa = ap.getApprox() and + config = mid.getConfiguration() ) } -pragma[noinline] +pragma[nomagic] private predicate parameterCand( DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config ) { @@ -3559,12 +3665,14 @@ private predicate parameterCand( pragma[nomagic] private predicate pathIntoCallable0( PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, - AccessPath ap + AccessPath ap, Configuration config ) { exists(AccessPathApprox apa | - pathIntoArg(mid, i, outercc, call, ap, apa) and + pathIntoArg(mid, pragma[only_bind_into](i), outercc, call, ap, pragma[only_bind_into](apa), + pragma[only_bind_into](config)) and callable = resolveCall(call, outercc) and - parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + parameterCand(callable, pragma[only_bind_into](i), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) ) } @@ -3573,18 +3681,23 @@ private predicate pathIntoCallable0( * before and after entering the callable are `outercc` and `innercc`, * respectively. */ +pragma[nomagic] private predicate pathIntoCallable( PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, - DataFlowCall call + DataFlowCall call, Configuration config ) { exists(int i, DataFlowCallable callable, AccessPath ap | - pathIntoCallable0(mid, callable, i, outercc, call, ap) and + pathIntoCallable0(mid, callable, i, outercc, call, ap, config) and p.isParameterOf(callable, i) and ( sc = TSummaryCtxSome(p, ap) or not exists(TSummaryCtxSome(p, ap)) and - sc = TSummaryCtxNone() + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) | if recordDataFlowCallSite(call, callable) @@ -3608,18 +3721,23 @@ private predicate paramFlowsThrough( ap = mid.getAp() and apa = ap.getApprox() and pos = sc.getParameterPos() and - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) ) } pragma[nomagic] private predicate pathThroughCallable0( DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, - AccessPathApprox apa + AccessPathApprox apa, Configuration config ) { exists(CallContext innercc, SummaryCtx sc | - pathIntoCallable(mid, _, cc, innercc, sc, call) and - paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + pathIntoCallable(mid, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, innercc, sc, ap, apa, config) ) } @@ -3629,9 +3747,9 @@ private predicate pathThroughCallable0( */ pragma[noinline] private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) { - exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | - pathThroughCallable0(call, mid, kind, cc, ap, apa) and - out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) ) } @@ -3642,12 +3760,15 @@ private module Subpaths { */ pragma[nomagic] private predicate subpaths01( - PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, NodeEx out, AccessPath apout ) { - pathThroughCallable(arg, out, _, apout) and - pathIntoCallable(arg, par, _, innercc, sc, _) and - paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration())) + exists(Configuration config | + pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, innercc, sc, _, config) and + paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) } /** @@ -3680,8 +3801,17 @@ private module Subpaths { innercc = ret.getCallContext() and sc = ret.getSummaryCtx() and ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and - apout = ret.getAp() and - not ret.isHidden() + apout = ret.getAp() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, n2, _, _, _, _) or + store(n1, _, n2, _, _) or + read(n1, _, n2, _) ) } @@ -3690,11 +3820,12 @@ private module Subpaths { * a subpath between `par` and `ret` with the connecting edges `arg -> par` and * `ret -> out` is summarized as the edge `arg -> out`. */ - predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) { + predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNodeMid out) { exists(ParamNodeEx p, NodeEx o, AccessPath apout | - arg.getASuccessor() = par and - arg.getASuccessor() = out and - subpaths03(arg, p, ret, o, apout) and + pragma[only_bind_into](arg).getASuccessor() = par and + pragma[only_bind_into](arg).getASuccessor() = out and + subpaths03(arg, p, localStepToHidden*(ret), o, apout) and + not ret.isHidden() and par.getNodeEx() = p and out.getNodeEx() = o and out.getAp() = apout diff --git a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll index ffe5e4ec29f..42d7e89f20b 100644 --- a/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll +++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll @@ -10,6 +10,7 @@ private import DataFlowImplCommon private import DataFlowImplSpecific::Private import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic /** * A configuration of interprocedural data flow analysis. This defines @@ -94,6 +95,22 @@ abstract class Configuration extends string { */ int fieldFlowBranchLimit() { result = 2 } + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + /** * Holds if data may flow from `source` to `sink` for this configuration. */ @@ -244,6 +261,8 @@ private class ParamNodeEx extends NodeEx { } int getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } } private class RetNodeEx extends NodeEx { @@ -270,6 +289,7 @@ private predicate outBarrier(NodeEx node, Configuration config) { ) } +pragma[nomagic] private predicate fullBarrier(NodeEx node, Configuration config) { exists(Node n | node.asNode() = n | config.isBarrier(n) @@ -288,11 +308,23 @@ private predicate fullBarrier(NodeEx node, Configuration config) { } pragma[nomagic] -private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) } +private predicate sourceNode(NodeEx node, Configuration config) { + config.isSource(node.asNode()) and + not fullBarrier(node, config) +} pragma[nomagic] private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) } +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + /** * Holds if data can flow in one local step from `node1` to `node2`. */ @@ -301,16 +333,14 @@ private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config node1.asNode() = n1 and node2.asNode() = n2 and simpleLocalFlowStepExt(n1, n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) ) or exists(Node n | config.allowImplicitRead(n, _) and node1.asNode() = n and - node2.isImplicitReadNode(n, false) + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) ) } @@ -323,16 +353,14 @@ private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configurat node2.asNode() = n2 and config.isAdditionalFlowStep(n1, n2) and getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) ) or exists(Node n | config.allowImplicitRead(n, _) and node1.isImplicitReadNode(n, true) and - node2.asNode() = n + node2.asNode() = n and + not fullBarrier(node2, config) ) } @@ -344,10 +372,8 @@ private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { node1.asNode() = n1 and node2.asNode() = n2 and jumpStepCached(n1, n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) } @@ -360,15 +386,14 @@ private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration c node2.asNode() = n2 and config.isAdditionalFlowStep(n1, n2) and getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) } private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { - read(node1.asNode(), c, node2.asNode()) + read(node1.asNode(), c, node2.asNode()) and + stepFilter(node1, node2, config) or exists(Node n | node2.isImplicitReadNode(n, true) and @@ -381,7 +406,8 @@ private predicate store( NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config ) { store(node1.asNode(), tc, node2.asNode(), contentType) and - read(_, tc.getContent(), _, config) + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) } pragma[nomagic] @@ -399,6 +425,20 @@ private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx a */ private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + private module Stage1 { class ApApprox = Unit; @@ -416,63 +456,59 @@ private module Stage1 { * argument in a call. */ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { - not fullBarrier(node, config) and - ( - sourceNode(node, config) and + sourceNode(node, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and cc = false or - exists(NodeEx mid | - fwdFlow(mid, cc, config) and - localFlowStep(mid, node, config) - ) - or - exists(NodeEx mid | - fwdFlow(mid, cc, config) and - additionalLocalFlowStep(mid, node, config) - ) - or - exists(NodeEx mid | - fwdFlow(mid, _, config) and - jumpStep(mid, node, config) and - cc = false - ) - or - exists(NodeEx mid | - fwdFlow(mid, _, config) and - additionalJumpStep(mid, node, config) and - cc = false - ) - or - // store - exists(NodeEx mid | - useFieldFlow(config) and - fwdFlow(mid, cc, config) and - store(mid, _, node, _, config) and - not outBarrier(mid, config) - ) - or - // read - exists(Content c | - fwdFlowRead(c, node, cc, config) and - fwdFlowConsCand(c, config) and - not inBarrier(node, config) - ) - or - // flow into a callable - exists(NodeEx arg | - fwdFlow(arg, _, config) and - viableParamArgEx(_, node, arg) and - cc = true - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, false, config) and - cc = false - or - fwdFlowOutFromArg(call, node, config) and - fwdFlowIsEntered(call, cc, config) - ) + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) ) } @@ -512,7 +548,8 @@ private module Stage1 { private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { exists(ReturnPosition pos | fwdFlowReturnPosition(pos, cc, config) and - viableReturnPosOutEx(call, pos, out) + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) ) } @@ -549,7 +586,7 @@ private module Stage1 { private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { fwdFlow(node, config) and sinkNode(node, config) and - toReturn = false + if hasSinkCallCtx(config) then toReturn = true else toReturn = false or exists(NodeEx mid | localFlowStep(node, mid, config) and @@ -738,14 +775,19 @@ private module Stage1 { * Holds if flow may enter through `p` and reach a return node making `p` a * candidate for the origin of a summary. */ + pragma[nomagic] predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { exists(ReturnKindExt kind | throughFlowNodeCand(p, config) and returnFlowCallableNodeCand(c, kind, config) and p.getEnclosingCallable() = c and exists(ap) and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) ) } @@ -931,6 +973,8 @@ private module Stage2 { Cc ccNone() { result instanceof CallContextAny } + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + private class LocalCc = Unit; bindingset[call, c, outercc] @@ -968,6 +1012,9 @@ private module Stage2 { private predicate flowIntoCall = flowIntoCallNodeCand1/5; + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { any() } + bindingset[ap, contentType] private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } @@ -976,14 +1023,23 @@ private module Stage2 { PrevStage::revFlow(node, _, _, apa, config) } + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -996,9 +1052,16 @@ private module Stage2 { */ pragma[nomagic] predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -1066,7 +1129,7 @@ private module Stage2 { ) { exists(DataFlowType contentType | fwdFlow(node1, cc, argAp, ap1, config) and - PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and typecheckStore(ap1, contentType) ) } @@ -1101,9 +1164,8 @@ private module Stage2 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1118,9 +1180,8 @@ private module Stage2 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1130,10 +1191,8 @@ private module Stage2 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1147,7 +1206,7 @@ private module Stage2 { ) { exists(ParamNodeEx p | fwdFlowIn(call, p, cc, _, argAp, ap, config) and - PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) ) } @@ -1189,6 +1248,11 @@ private module Stage2 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -1209,7 +1273,7 @@ private module Stage2 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -1265,7 +1329,7 @@ private module Stage2 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -1300,9 +1364,8 @@ private module Stage2 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1312,9 +1375,8 @@ private module Stage2 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1324,9 +1386,8 @@ private module Stage2 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1394,8 +1455,12 @@ private module Stage2 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -1606,6 +1671,8 @@ private module Stage3 { Cc ccNone() { result = false } + CcCall ccSomeCall() { result = true } + private class LocalCc = Unit; bindingset[call, c, outercc] @@ -1660,12 +1727,14 @@ private module Stage3 { pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -1687,7 +1756,7 @@ private module Stage3 { private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -1790,9 +1859,8 @@ private module Stage3 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1807,9 +1875,8 @@ private module Stage3 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1819,10 +1886,8 @@ private module Stage3 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1878,6 +1943,11 @@ private module Stage3 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -1898,7 +1968,7 @@ private module Stage3 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -1954,7 +2024,7 @@ private module Stage3 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -1989,9 +2059,8 @@ private module Stage3 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2001,9 +2070,8 @@ private module Stage3 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2013,9 +2081,8 @@ private module Stage3 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2083,8 +2150,12 @@ private module Stage3 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -2139,7 +2210,8 @@ private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) ) and accessPathApproxCostLimits(apLimit, tupleLimit) and apLimit < tails and - tupleLimit < (tails - 1) * nodes + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() ) } @@ -2351,6 +2423,8 @@ private module Stage4 { Cc ccNone() { result instanceof CallContextAny } + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + private class LocalCc = LocalCallContext; bindingset[call, c, outercc] @@ -2419,12 +2493,14 @@ private module Stage4 { pragma[nomagic] private predicate flowThroughOutOfCall( - DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config ) { flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, - pragma[only_bind_into](config)) + pragma[only_bind_into](config)) and + ccc.matchesCall(call) } /** @@ -2446,7 +2522,7 @@ private module Stage4 { private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { flowCand(node, _, config) and sourceNode(node, config) and - cc = ccNone() and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and argAp = apNone() and ap = getApNil(node) or @@ -2549,9 +2625,8 @@ private module Stage4 { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, outercc, argAp, ap, config) and flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2566,9 +2641,8 @@ private module Stage4 { fwdFlow(ret, innercc, argAp, ap, config) and flowOutOfCall(call, ret, out, allowsFieldFlow, config) and inner = ret.getEnclosingCallable() and - ccOut = getCallContextReturn(inner, call, innercc) - | - ap instanceof ApNil or allowsFieldFlow = true + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2578,10 +2652,8 @@ private module Stage4 { ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | fwdFlow(ret, ccc, apSome(argAp), ap, config) and - flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and - ccc.matchesCall(call) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2637,6 +2709,11 @@ private module Stage4 { callMayFlowThroughFwd(call, pragma[only_bind_into](config)) } + pragma[nomagic] + private predicate returnNodeMayFlowThrough(RetNodeEx ret, Ap ap, Configuration config) { + fwdFlow(ret, any(CcCall ccc), apSome(_), ap, config) + } + /** * Holds if `node` with access path `ap` is part of a path from a source to a * sink in the configuration `config`. @@ -2657,7 +2734,7 @@ private module Stage4 { ) { fwdFlow(node, _, _, ap, config) and sinkNode(node, config) and - toReturn = false and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and returnAp = apNone() and ap instanceof ApNil or @@ -2713,7 +2790,7 @@ private module Stage4 { // flow out of a callable revFlowOut(_, node, _, _, ap, config) and toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + if returnNodeMayFlowThrough(node, ap, config) then returnAp = apSome(ap) else returnAp = apNone() } @@ -2748,9 +2825,8 @@ private module Stage4 { ) { exists(NodeEx out, boolean allowsFieldFlow | revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2760,9 +2836,8 @@ private module Stage4 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, false, returnAp, ap, config) and - flowIntoCall(_, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2772,9 +2847,8 @@ private module Stage4 { ) { exists(ParamNodeEx p, boolean allowsFieldFlow | revFlow(p, true, apSome(returnAp), ap, config) and - flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -2842,8 +2916,12 @@ private module Stage4 { fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and kind = ret.getKind() and p.getPosition() = pos and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) ) } @@ -2916,6 +2994,8 @@ private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { int getParameterPos() { p.isParameterOf(_, result) } + ParamNodeEx getParamNode() { result = p } + override string toString() { result = p + ": " + ap } predicate hasLocationInfo( @@ -2973,12 +3053,15 @@ private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { * expected to be expensive. Holds with `unfold = true` otherwise. */ private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { - exists(int aps, int nodes, int apLimit, int tupleLimit | - aps = countPotentialAps(apa, config) and - nodes = countNodesUsingAccessPath(apa, config) and - accessPathCostLimits(apLimit, tupleLimit) and - if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true - ) + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) } /** @@ -3040,7 +3123,11 @@ private newtype TPathNode = // A PathNode is introduced by a source ... Stage4::revFlow(node, config) and sourceNode(node, config) and - cc instanceof CallContextAny and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and sc instanceof SummaryCtxNone and ap = TAccessPathNil(node.getDataFlowType()) or @@ -3052,17 +3139,10 @@ private newtype TPathNode = ) } or TPathNodeSink(NodeEx node, Configuration config) { - sinkNode(node, pragma[only_bind_into](config)) and - Stage4::revFlow(node, pragma[only_bind_into](config)) and - ( - // A sink that is also a source ... - sourceNode(node, config) - or - // ... or a sink that can be reached from a source - exists(PathNodeMid mid | - pathStep(mid, node, _, _, TAccessPathNil(_)) and - pragma[only_bind_into](config) = mid.getConfiguration() - ) + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + config = sink.getConfiguration() ) } @@ -3379,22 +3459,46 @@ private class PathNodeMid extends PathNodeImpl, TPathNodeMid { // an intermediate step to another intermediate node result = this.getSuccMid() or - // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges - exists(PathNodeMid mid, PathNodeSink sink | - mid = this.getSuccMid() and - mid.getNodeEx() = sink.getNodeEx() and - mid.getAp() instanceof AccessPathNil and - sink.getConfiguration() = unbindConf(mid.getConfiguration()) and - result = sink - ) + // a final step to a sink + result = this.getSuccMid().projectToSink() } override predicate isSource() { sourceNode(node, config) and - cc instanceof CallContextAny and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and sc instanceof SummaryCtxNone and ap instanceof AccessPathNil } + + predicate isAtSink() { + sinkNode(node, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getConfiguration() = unbindConf(config) + } } /** @@ -3458,7 +3562,7 @@ private predicate pathStep( exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and sc = mid.getSummaryCtx() or - pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + pathIntoCallable(mid, node, _, cc, sc, _, _) and ap = mid.getAp() or pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone or @@ -3535,18 +3639,20 @@ private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) */ pragma[noinline] private predicate pathIntoArg( - PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa, + Configuration config ) { exists(ArgNode arg | arg = mid.getNodeEx().asNode() and cc = mid.getCallContext() and arg.argumentOf(call, i) and ap = mid.getAp() and - apa = ap.getApprox() + apa = ap.getApprox() and + config = mid.getConfiguration() ) } -pragma[noinline] +pragma[nomagic] private predicate parameterCand( DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config ) { @@ -3559,12 +3665,14 @@ private predicate parameterCand( pragma[nomagic] private predicate pathIntoCallable0( PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, - AccessPath ap + AccessPath ap, Configuration config ) { exists(AccessPathApprox apa | - pathIntoArg(mid, i, outercc, call, ap, apa) and + pathIntoArg(mid, pragma[only_bind_into](i), outercc, call, ap, pragma[only_bind_into](apa), + pragma[only_bind_into](config)) and callable = resolveCall(call, outercc) and - parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + parameterCand(callable, pragma[only_bind_into](i), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) ) } @@ -3573,18 +3681,23 @@ private predicate pathIntoCallable0( * before and after entering the callable are `outercc` and `innercc`, * respectively. */ +pragma[nomagic] private predicate pathIntoCallable( PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, - DataFlowCall call + DataFlowCall call, Configuration config ) { exists(int i, DataFlowCallable callable, AccessPath ap | - pathIntoCallable0(mid, callable, i, outercc, call, ap) and + pathIntoCallable0(mid, callable, i, outercc, call, ap, config) and p.isParameterOf(callable, i) and ( sc = TSummaryCtxSome(p, ap) or not exists(TSummaryCtxSome(p, ap)) and - sc = TSummaryCtxNone() + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext ) | if recordDataFlowCallSite(call, callable) @@ -3608,18 +3721,23 @@ private predicate paramFlowsThrough( ap = mid.getAp() and apa = ap.getApprox() and pos = sc.getParameterPos() and - not kind.(ParamUpdateReturnKind).getPosition() = pos + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) ) } pragma[nomagic] private predicate pathThroughCallable0( DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, - AccessPathApprox apa + AccessPathApprox apa, Configuration config ) { exists(CallContext innercc, SummaryCtx sc | - pathIntoCallable(mid, _, cc, innercc, sc, call) and - paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + pathIntoCallable(mid, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, innercc, sc, ap, apa, config) ) } @@ -3629,9 +3747,9 @@ private predicate pathThroughCallable0( */ pragma[noinline] private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) { - exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | - pathThroughCallable0(call, mid, kind, cc, ap, apa) and - out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) ) } @@ -3642,12 +3760,15 @@ private module Subpaths { */ pragma[nomagic] private predicate subpaths01( - PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, NodeEx out, AccessPath apout ) { - pathThroughCallable(arg, out, _, apout) and - pathIntoCallable(arg, par, _, innercc, sc, _) and - paramFlowsThrough(kind, innercc, sc, apout, _, unbindConf(arg.getConfiguration())) + exists(Configuration config | + pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, innercc, sc, _, config) and + paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) } /** @@ -3680,8 +3801,17 @@ private module Subpaths { innercc = ret.getCallContext() and sc = ret.getSummaryCtx() and ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and - apout = ret.getAp() and - not ret.isHidden() + apout = ret.getAp() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, n2, _, _, _, _) or + store(n1, _, n2, _, _) or + read(n1, _, n2, _) ) } @@ -3690,11 +3820,12 @@ private module Subpaths { * a subpath between `par` and `ret` with the connecting edges `arg -> par` and * `ret -> out` is summarized as the edge `arg -> out`. */ - predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) { + predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNodeMid out) { exists(ParamNodeEx p, NodeEx o, AccessPath apout | - arg.getASuccessor() = par and - arg.getASuccessor() = out and - subpaths03(arg, p, ret, o, apout) and + pragma[only_bind_into](arg).getASuccessor() = par and + pragma[only_bind_into](arg).getASuccessor() = out and + subpaths03(arg, p, localStepToHidden*(ret), o, apout) and + not ret.isHidden() and par.getNodeEx() = p and out.getNodeEx() = o and out.getAp() = apout diff --git a/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll b/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll index fc20b535bae..c28ceabb438 100644 --- a/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll +++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll @@ -2,6 +2,42 @@ private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public import Cached +module DataFlowImplCommonPublic { + private newtype TFlowFeature = + TFeatureHasSourceCallContext() or + TFeatureHasSinkCallContext() or + TFeatureEqualSourceSinkCallContext() + + /** A flow configuration feature for use in `Configuration::getAFeature()`. */ + class FlowFeature extends TFlowFeature { + string toString() { none() } + } + + /** + * A flow configuration feature that implies that sources have some existing + * call context. + */ + class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext { + override string toString() { result = "FeatureHasSourceCallContext" } + } + + /** + * A flow configuration feature that implies that sinks have some existing + * call context. + */ + class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext { + override string toString() { result = "FeatureHasSinkCallContext" } + } + + /** + * A flow configuration feature that implies that source-sink pairs have some + * shared existing call context. + */ + class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext { + override string toString() { result = "FeatureEqualSourceSinkCallContext" } + } +} + /** * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion. * @@ -251,7 +287,7 @@ private module Cached { predicate forceCachingInSameStage() { any() } cached - predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() } + predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = nodeGetEnclosingCallable(n) } cached predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) { @@ -316,9 +352,7 @@ private module Cached { } cached - predicate parameterNode(Node n, DataFlowCallable c, int i) { - n.(ParameterNode).isParameterOf(c, i) - } + predicate parameterNode(Node p, DataFlowCallable c, int pos) { isParameterNode(p, c, pos) } cached predicate argumentNode(Node n, DataFlowCall call, int pos) { @@ -801,6 +835,9 @@ private module Cached { exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call)) } + cached + predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) } + cached newtype TCallContext = TAnyCallContext() or @@ -1236,6 +1273,13 @@ class TypedContent extends MkTypedContent { /** Gets a textual representation of this content. */ string toString() { result = c.toString() } + + /** + * Holds if access paths with this `TypedContent` at their head always should + * be tracked at high precision. This disables adaptive access path precision + * for such access paths. + */ + predicate forceHighPrecision() { forceHighPrecision(c) } } /** diff --git a/ql/lib/semmle/go/dataflow/internal/DataFlowImplConsistency.qll b/ql/lib/semmle/go/dataflow/internal/DataFlowImplConsistency.qll new file mode 100644 index 00000000000..a2332ad8ea1 --- /dev/null +++ b/ql/lib/semmle/go/dataflow/internal/DataFlowImplConsistency.qll @@ -0,0 +1,196 @@ +/** + * Provides consistency queries for checking invariants in the language-specific + * data-flow classes and predicates. + */ + +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +private import tainttracking1.TaintTrackingParameter::Private +private import tainttracking1.TaintTrackingParameter::Public + +module Consistency { + private newtype TConsistencyConfiguration = MkConsistencyConfiguration() + + /** A class for configuring the consistency queries. */ + class ConsistencyConfiguration extends TConsistencyConfiguration { + string toString() { none() } + + /** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */ + predicate postWithInFlowExclude(Node n) { none() } + + /** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */ + predicate argHasPostUpdateExclude(ArgumentNode n) { none() } + } + + private class RelevantNode extends Node { + RelevantNode() { + this instanceof ArgumentNode or + this instanceof ParameterNode or + this instanceof ReturnNode or + this = getAnOutNode(_, _) or + simpleLocalFlowStep(this, _) or + simpleLocalFlowStep(_, this) or + jumpStep(this, _) or + jumpStep(_, this) or + storeStep(this, _, _) or + storeStep(_, _, this) or + readStep(this, _, _) or + readStep(_, _, this) or + defaultAdditionalTaintStep(this, _) or + defaultAdditionalTaintStep(_, this) + } + } + + query predicate uniqueEnclosingCallable(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(nodeGetEnclosingCallable(n)) and + c != 1 and + msg = "Node should have one enclosing callable but has " + c + "." + ) + } + + query predicate uniqueType(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(getNodeType(n)) and + c != 1 and + msg = "Node should have one type but has " + c + "." + ) + } + + query predicate uniqueNodeLocation(Node n, string msg) { + exists(int c | + c = + count(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) and + c != 1 and + msg = "Node should have one location but has " + c + "." + ) + } + + query predicate missingLocation(string msg) { + exists(int c | + c = + strictcount(Node n | + not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + ) and + msg = "Nodes without location: " + c + ) + } + + query predicate uniqueNodeToString(Node n, string msg) { + exists(int c | + c = count(n.toString()) and + c != 1 and + msg = "Node should have one toString but has " + c + "." + ) + } + + query predicate missingToString(string msg) { + exists(int c | + c = strictcount(Node n | not exists(n.toString())) and + msg = "Nodes without toString: " + c + ) + } + + query predicate parameterCallable(ParameterNode p, string msg) { + exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and + msg = "Callable mismatch for parameter." + } + + query predicate localFlowIsLocal(Node n1, Node n2, string msg) { + simpleLocalFlowStep(n1, n2) and + nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and + msg = "Local flow step does not preserve enclosing callable." + } + + private DataFlowType typeRepr() { result = getNodeType(_) } + + query predicate compatibleTypesReflexive(DataFlowType t, string msg) { + t = typeRepr() and + not compatibleTypes(t, t) and + msg = "Type compatibility predicate is not reflexive." + } + + query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) { + isUnreachableInCall(n, call) and + exists(DataFlowCallable c | + c = nodeGetEnclosingCallable(n) and + not viableCallable(call) = c + ) and + msg = "Call context for isUnreachableInCall is inconsistent with call graph." + } + + query predicate localCallNodes(DataFlowCall call, Node n, string msg) { + ( + n = getAnOutNode(call, _) and + msg = "OutNode and call does not share enclosing callable." + or + n.(ArgumentNode).argumentOf(call, _) and + msg = "ArgumentNode and call does not share enclosing callable." + ) and + nodeGetEnclosingCallable(n) != call.getEnclosingCallable() + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a result of `getPreUpdateNode` to be an + // instance of `PostUpdateNode`. + private Node getPre(PostUpdateNode n) { + result = n.getPreUpdateNode() + or + none() + } + + query predicate postIsNotPre(PostUpdateNode n, string msg) { + getPre(n) = n and + msg = "PostUpdateNode should not equal its pre-update node." + } + + query predicate postHasUniquePre(PostUpdateNode n, string msg) { + exists(int c | + c = count(n.getPreUpdateNode()) and + c != 1 and + msg = "PostUpdateNode should have one pre-update node but has " + c + "." + ) + } + + query predicate uniquePostUpdate(Node n, string msg) { + 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and + msg = "Node has multiple PostUpdateNodes." + } + + query predicate postIsInSameCallable(PostUpdateNode n, string msg) { + nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and + msg = "PostUpdateNode does not share callable with its pre-update node." + } + + private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) } + + query predicate reverseRead(Node n, string msg) { + exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and + msg = "Origin of readStep is missing a PostUpdateNode." + } + + query predicate argHasPostUpdate(ArgumentNode n, string msg) { + not hasPost(n) and + not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and + msg = "ArgumentNode is missing PostUpdateNode." + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a `PostUpdateNode` to be the target of + // `simpleLocalFlowStep`. + private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() } + + query predicate postWithInFlow(Node n, string msg) { + isPostUpdateNode(n) and + not clearsContent(n, _) and + simpleLocalFlowStep(_, n) and + not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and + msg = "PostUpdateNode should not be the target of local flow." + } +} diff --git a/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index e655419f454..7d5077b2280 100644 --- a/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -85,6 +85,9 @@ module Public { /** Holds if this stack contains summary component `c`. */ predicate contains(SummaryComponent c) { c = this.drop(_).head() } + /** Gets the bottom element of this stack. */ + SummaryComponent bottom() { result = this.drop(this.length() - 1).head() } + /** Gets a textual representation of this stack. */ string toString() { exists(SummaryComponent head, SummaryComponentStack tail | @@ -124,6 +127,38 @@ module Public { SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } } + private predicate noComponentSpecificCsv(SummaryComponent sc) { + not exists(getComponentSpecificCsv(sc)) + } + + /** Gets a textual representation of this component used for flow summaries. */ + private string getComponentCsv(SummaryComponent sc) { + result = getComponentSpecificCsv(sc) + or + noComponentSpecificCsv(sc) and + ( + exists(int i | sc = TParameterSummaryComponent(i) and result = "Parameter[" + i + "]") + or + exists(int i | sc = TArgumentSummaryComponent(i) and result = "Argument[" + i + "]") + or + sc = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" + ) + } + + /** Gets a textual representation of this stack used for flow summaries. */ + string getComponentStackCsv(SummaryComponentStack stack) { + exists(SummaryComponent head, SummaryComponentStack tail | + head = stack.head() and + tail = stack.tail() and + result = getComponentCsv(head) + " of " + getComponentStackCsv(tail) + ) + or + exists(SummaryComponent c | + stack = TSingletonSummaryComponentStack(c) and + result = getComponentCsv(c) + ) + } + /** * A class that exists for QL technical reasons only (the IPA type used * to represent component stacks needs to be bounded). @@ -186,10 +221,19 @@ module Private { TArgumentSummaryComponent(int i) { parameterPosition(i) } or TReturnSummaryComponent(ReturnKind rk) + private TSummaryComponent thisParam() { + result = TParameterSummaryComponent(instanceParameterPosition()) + } + newtype TSummaryComponentStack = TSingletonSummaryComponentStack(SummaryComponent c) or TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { tail.(RequiredSummaryComponentStack).required(head) + or + tail.(RequiredSummaryComponentStack).required(TParameterSummaryComponent(_)) and + head = thisParam() + or + derivedFluentFlowPush(_, _, _, head, tail, _) } pragma[nomagic] @@ -198,20 +242,130 @@ module Private { boolean preservesValue ) { c.propagatesFlow(input, output, preservesValue) + or + // observe side effects of callbacks on input arguments + c.propagatesFlow(output, input, preservesValue) and + preservesValue = true and + isCallbackParameter(input) and + isContentOfArgument(output, _) + or + // flow from the receiver of a callback into the instance-parameter + exists(SummaryComponentStack s, SummaryComponentStack callbackRef | + c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) + | + callbackRef = s.drop(_) and + (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and + input = callbackRef.tail() and + output = TConsSummaryComponentStack(thisParam(), input) and + preservesValue = true + ) + or + exists(SummaryComponentStack arg, SummaryComponentStack return | + derivedFluentFlow(c, input, arg, return, preservesValue) + | + arg.length() = 1 and + output = return + or + exists(SummaryComponent head, SummaryComponentStack tail | + derivedFluentFlowPush(c, input, arg, head, tail, 0) and + output = SummaryComponentStack::push(head, tail) + ) + ) + or + // Chain together summaries where values get passed into callbacks along the way + exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | + c.propagatesFlow(input, mid, preservesValue1) and + c.propagatesFlow(mid, output, preservesValue2) and + mid.drop(mid.length() - 2) = + SummaryComponentStack::push(TParameterSummaryComponent(_), + SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and + preservesValue = preservesValue1.booleanAnd(preservesValue2) + ) + } + + /** + * Holds if `c` has a flow summary from `input` to `arg`, where `arg` + * writes to (contents of) the `i`th argument, and `c` has a + * value-preserving flow summary from the `i`th argument to a return value + * (`return`). + * + * In such a case, we derive flow from `input` to (contents of) the return + * value. + * + * As an example, this simplifies modeling of fluent methods: + * for `StringBuilder.append(x)` with a specified value flow from qualifier to + * return value and taint flow from argument 0 to the qualifier, then this + * allows us to infer taint flow from argument 0 to the return value. + */ + pragma[nomagic] + private predicate derivedFluentFlow( + SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, + SummaryComponentStack return, boolean preservesValue + ) { + exists(int i | + summary(c, input, arg, preservesValue) and + isContentOfArgument(arg, i) and + summary(c, SummaryComponentStack::singleton(TArgumentSummaryComponent(i)), return, true) and + return.bottom() = TReturnSummaryComponent(_) + ) + } + + pragma[nomagic] + private predicate derivedFluentFlowPush( + SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, + SummaryComponent head, SummaryComponentStack tail, int i + ) { + derivedFluentFlow(c, input, arg, tail, _) and + head = arg.drop(i).head() and + i = arg.length() - 2 + or + exists(SummaryComponent head0, SummaryComponentStack tail0 | + derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and + head = arg.drop(i).head() and + tail = SummaryComponentStack::push(head0, tail0) + ) + } + + private predicate isCallbackParameter(SummaryComponentStack s) { + s.head() = TParameterSummaryComponent(_) and exists(s.tail()) + } + + private predicate isContentOfArgument(SummaryComponentStack s, int i) { + s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), i) + or + s = TSingletonSummaryComponentStack(TArgumentSummaryComponent(i)) + } + + private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, _, s, _) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TContentSummaryComponent(_) and + s = out.tail() + ) + or + // Add the argument node corresponding to the requested post-update node + inputState(c, s) and isCallbackParameter(s) + } + + private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, s, _, _) + or + exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TParameterSummaryComponent(_) and + s = out.tail() + ) } private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { - exists(SummaryComponentStack input | - summary(_, input, _, _) and - s = input.drop(_) - ) - } or - TSummaryNodeOutputState(SummaryComponentStack s) { - exists(SummaryComponentStack output | - summary(_, _, output, _) and - s = output.drop(_) - ) + TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or + TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } or + TSummaryNodeClearsContentState(int i, boolean post) { + any(SummarizedCallable sc).clearsContent(i, _) and post in [false, true] } /** @@ -238,20 +392,14 @@ module Private { pragma[nomagic] predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { this = TSummaryNodeInputState(s) and - exists(SummaryComponentStack input | - summary(c, input, _, _) and - s = input.drop(_) - ) + inputState(c, s) } /** Holds if this state is a valid output state for `c`. */ pragma[nomagic] predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { this = TSummaryNodeOutputState(s) and - exists(SummaryComponentStack output | - summary(c, _, output, _) and - s = output.drop(_) - ) + outputState(c, s) } /** Gets a textual representation of this state. */ @@ -265,6 +413,12 @@ module Private { this = TSummaryNodeOutputState(s) and result = "to write: " + s ) + or + exists(int i, boolean post, string postStr | + this = TSummaryNodeClearsContentState(i, post) and + (if post = true then postStr = " (post)" else postStr = "") and + result = "clear: " + i + postStr + ) } } @@ -286,6 +440,11 @@ module Private { not parameterReadState(c, state, _) or state.isOutputState(c, _) + or + exists(int i | + c.clearsContent(i, _) and + state = TSummaryNodeClearsContentState(i, _) + ) } pragma[noinline] @@ -321,6 +480,8 @@ module Private { parameterReadState(c, _, i) or isParameterPostUpdate(_, c, i) + or + c.clearsContent(i, _) } private predicate callbackOutput( @@ -331,19 +492,12 @@ module Private { receiver = summaryNodeInputState(c, s.drop(1)) } - private Node pre(Node post) { - summaryPostUpdateNode(post, result) - or - not summaryPostUpdateNode(post, _) and - result = post - } - private predicate callbackInput( SummarizedCallable c, SummaryComponentStack s, Node receiver, int i ) { any(SummaryNodeState state).isOutputState(c, s) and s.head() = TParameterSummaryComponent(i) and - receiver = pre(summaryNodeOutputState(c, s.drop(1))) + receiver = summaryNodeInputState(c, s.drop(1)) } /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ @@ -395,11 +549,17 @@ module Private { or exists(int i | head = TParameterSummaryComponent(i) | result = - getCallbackParameterType(getNodeType(summaryNodeOutputState(pragma[only_bind_out](c), + getCallbackParameterType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c), s.drop(1))), i) ) ) ) + or + exists(SummarizedCallable c, int i, ParamNode p | + n = summaryNode(c, TSummaryNodeClearsContentState(i, false)) and + p.isParameterOf(c, i) and + result = getNodeType(p) + ) } /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ @@ -421,10 +581,19 @@ module Private { } /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(Node post, ParamNode pre) { + predicate summaryPostUpdateNode(Node post, Node pre) { exists(SummarizedCallable c, int i | isParameterPostUpdate(post, c, i) and - pre.isParameterOf(c, i) + pre.(ParamNode).isParameterOf(c, i) + or + pre = summaryNode(c, TSummaryNodeClearsContentState(i, false)) and + post = summaryNode(c, TSummaryNodeClearsContentState(i, true)) + ) + or + exists(SummarizedCallable callable, SummaryComponentStack s | + callbackInput(callable, s, _, _) and + pre = summaryNodeOutputState(callable, s) and + post = summaryNodeInputState(callable, s) ) } @@ -436,6 +605,22 @@ module Private { ) } + /** + * Holds if flow is allowed to pass from parameter `p`, to a return + * node, and back out to `p`. + */ + predicate summaryAllowParameterReturnInSelf(ParamNode p) { + exists(SummarizedCallable c, int i | p.isParameterOf(c, i) | + c.clearsContent(i, _) + or + exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | + summary(c, inputContents, outputContents, _) and + inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(i)) and + outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(i)) + ) + ) + } + /** Provides a compilation of flow summaries to atomic data-flow steps. */ module Steps { /** @@ -456,13 +641,11 @@ module Private { preservesValue = false and not summary(c, inputContents, outputContents, true) ) or - // If flow through a method updates a parameter from some input A, and that - // parameter also is returned through B, then we'd like a combined flow from A - // to B as well. As an example, this simplifies modeling of fluent methods: - // for `StringBuilder.append(x)` with a specified value flow from qualifier to - // return value and taint flow from argument 0 to the qualifier, then this - // allows us to infer taint flow from argument 0 to the return value. - summaryPostUpdateNode(pred, succ) and preservesValue = true + exists(SummarizedCallable c, int i | + pred.(ParamNode).isParameterOf(c, i) and + succ = summaryNode(c, TSummaryNodeClearsContentState(i, _)) and + preservesValue = true + ) } /** @@ -490,10 +673,39 @@ module Private { } /** - * Holds if values stored inside content `c` are cleared when passed as - * input of type `input` in `call`. + * Holds if values stored inside content `c` are cleared at `n`. `n` is a + * synthesized summary node, so in order for values to be cleared at calls + * to the relevant method, it is important that flow does not pass over + * the argument, either via use-use flow or def-use flow. + * + * Example: + * + * ``` + * a.b = taint; + * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier + * sink(a.b); + * ``` + * + * In the above, flow should not pass from `a` on the first line (or the second + * line) to `a` on the third line. Instead, there will be synthesized flow from + * `a` on line 2 to the post-update node for `a` on that line (via an intermediate + * node where field `b` is cleared). */ - predicate summaryClearsContent(ArgNode arg, Content c) { + predicate summaryClearsContent(Node n, Content c) { + exists(SummarizedCallable sc, int i | + n = summaryNode(sc, TSummaryNodeClearsContentState(i, true)) and + sc.clearsContent(i, c) + ) + } + + /** + * Holds if values stored inside content `c` are cleared inside a + * callable to which `arg` is an argument. + * + * In such cases, it is important to prevent use-use flow out of + * `arg` (see comment for `summaryClearsContent`). + */ + predicate summaryClearsContentArg(ArgNode arg, Content c) { exists(DataFlowCall call, int i | viableCallable(call).(SummarizedCallable).clearsContent(i, c) and arg.argumentOf(call, i) @@ -553,25 +765,6 @@ module Private { ret.getKind() = rk ) } - - /** - * Holds if data is written into content `c` of argument `arg` using a flow summary. - * - * Depending on the type of `c`, this predicate may be relevant to include in the - * definition of `clearsContent()`. - */ - predicate summaryStoresIntoArg(Content c, Node arg) { - exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out | - exists(DataFlowCall call, SummarizedCallable callable | - getNodeEnclosingCallable(ret) = callable and - viableCallable(call) = callable and - summaryStoreStep(_, c, ret) and - ret.getKind() = pragma[only_bind_into](rk) and - out = rk.getAnOutNode(call) and - arg = out.getPreUpdateNode() - ) - ) - } } /** @@ -629,22 +822,6 @@ module Private { ) } - /** Holds if specification component `c` parses as return value `n`. */ - predicate parseReturn(string c, int n) { - specSplit(_, c, _) and - ( - c = "ReturnValue" and n = 0 - or - c.regexpCapture("ReturnValue\\[([-0-9]+)\\]", 1).toInt() = n - or - exists(int n1, int n2 | - c.regexpCapture("ReturnValue\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and - c.regexpCapture("ReturnValue\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and - n = [n1 .. n2] - ) - ) - } - private SummaryComponent interpretComponent(string c) { specSplit(_, c, _) and ( @@ -652,9 +829,7 @@ module Private { or exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos)) or - exists(int pos | - parseReturn(c, pos) and result = SummaryComponent::return(getReturnKind(pos)) - ) + c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) or result = interpretComponentSpecific(c) ) @@ -721,12 +896,15 @@ module Private { not exists(interpretComponent(c)) } - private predicate inputNeedsReference(string c) { parseArg(c, _) } + private predicate inputNeedsReference(string c) { + c = "Argument" or + parseArg(c, _) + } private predicate outputNeedsReference(string c) { + c = "Argument" or parseArg(c, _) or - c = "ReturnValue" or - parseReturn(c, _) + c = "ReturnValue" } private predicate sourceElementRef(InterpretNode ref, string output, string kind) { @@ -759,20 +937,15 @@ module Private { exists(int pos | node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos) | - parseArg(c, pos) + c = "Argument" or parseArg(c, pos) ) or exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) | c = "Parameter" or parseParam(c, pos) ) or - exists(int pos | - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnKind(pos))) - | - c = "ReturnValue" and pos = 0 - or - parseReturn(c, pos) - ) + c = "ReturnValue" and + node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) or interpretOutputSpecific(c, mid, node) ) @@ -787,16 +960,14 @@ module Private { interpretInput(input, idx + 1, ref, mid) and specSplit(input, c, idx) | - exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) | parseArg(c, pos)) + exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) | + c = "Argument" or parseArg(c, pos) + ) or - exists(int pos, ReturnNodeExt ret | - ( - c = "ReturnValue" and pos = 0 - or - parseReturn(c, pos) - ) and + exists(ReturnNodeExt ret | + c = "ReturnValue" and ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnKind(pos) and + ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and mid.asCallable() = getNodeEnclosingCallable(ret) ) or @@ -831,19 +1002,130 @@ module Private { module TestOutput { /** A flow summary to include in the `summary/3` query predicate. */ abstract class RelevantSummarizedCallable extends SummarizedCallable { - /** Gets the string representation of this callable used by `summary/3`. */ - string getFullString() { result = this.toString() } + /** Gets the string representation of this callable used by `summary/1`. */ + abstract string getCallableCsv(); + + /** Holds if flow is propagated between `input` and `output`. */ + predicate relevantSummary( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + this.propagatesFlow(input, output, preservesValue) + } } - /** A query predicate for outputting flow summaries in QL tests. */ - query predicate summary(string callable, string flow, boolean preservesValue) { + /** Render the kind in the format used in flow summaries. */ + private string renderKind(boolean preservesValue) { + preservesValue = true and result = "value" + or + preservesValue = false and result = "taint" + } + + /** + * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. + * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind", + * ext is hardcoded to empty. + */ + query predicate summary(string csv) { exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output + RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, + boolean preservesValue | - callable = c.getFullString() and - c.propagatesFlow(input, output, preservesValue) and - flow = input + " -> " + output + c.relevantSummary(input, output, preservesValue) and + csv = + c.getCallableCsv() + ";;" + getComponentStackCsv(input) + ";" + + getComponentStackCsv(output) + ";" + renderKind(preservesValue) ) } } + + /** + * Provides query predicates for rendering the generated data flow graph for + * a summarized callable. + * + * Import this module into a `.ql` file of `@kind graph` to render the graph. + * The graph is restricted to callables from `RelevantSummarizedCallable`. + */ + module RenderSummarizedCallable { + /** A summarized callable to include in the graph. */ + abstract class RelevantSummarizedCallable extends SummarizedCallable { } + + private newtype TNodeOrCall = + MkNode(Node n) { + exists(RelevantSummarizedCallable c | + n = summaryNode(c, _) + or + n.(ParamNode).isParameterOf(c, _) + ) + } or + MkCall(DataFlowCall call) { + call = summaryDataFlowCall(_) and + call.getEnclosingCallable() instanceof RelevantSummarizedCallable + } + + private class NodeOrCall extends TNodeOrCall { + Node asNode() { this = MkNode(result) } + + DataFlowCall asCall() { this = MkCall(result) } + + string toString() { + result = this.asNode().toString() + or + result = this.asCall().toString() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + or + this.asCall().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + query predicate nodes(NodeOrCall n, string key, string val) { + key = "semmle.label" and val = n.toString() + } + + private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { + exists(boolean preservesValue | + Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and + if preservesValue = true then value = "value" else value = "taint" + ) + or + exists(Content c | + Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and + value = "read (" + c + ")" + or + Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and + value = "store (" + c + ")" + or + Private::Steps::summaryClearsContent(a.asNode(), c) and + b = a and + value = "clear (" + c + ")" + ) + or + summaryPostUpdateNode(b.asNode(), a.asNode()) and + value = "post-update" + or + b.asCall() = summaryDataFlowCall(a.asNode()) and + value = "receiver" + or + exists(int i | + summaryArgumentNode(b.asCall(), a.asNode(), i) and + value = "argument (" + i + ")" + ) + } + + query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { + key = "semmle.label" and + value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") + } + } } diff --git a/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll index 1607704093d..acb029c23d9 100644 --- a/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll +++ b/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll @@ -75,25 +75,25 @@ abstract class Configuration extends DataFlow::Configuration { predicate isSanitizer(DataFlow::Node node) { none() } final override predicate isBarrier(DataFlow::Node node) { - isSanitizer(node) or + this.isSanitizer(node) or defaultTaintSanitizer(node) } /** Holds if taint propagation into `node` is prohibited. */ predicate isSanitizerIn(DataFlow::Node node) { none() } - final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) } /** Holds if taint propagation out of `node` is prohibited. */ predicate isSanitizerOut(DataFlow::Node node) { none() } - final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) } /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard) + this.isSanitizerGuard(guard) } /** @@ -103,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration { predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isAdditionalTaintStep(node1, node2) or + this.isAdditionalTaintStep(node1, node2) or defaultAdditionalTaintStep(node1, node2) } diff --git a/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll b/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll index 1607704093d..acb029c23d9 100644 --- a/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll +++ b/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll @@ -75,25 +75,25 @@ abstract class Configuration extends DataFlow::Configuration { predicate isSanitizer(DataFlow::Node node) { none() } final override predicate isBarrier(DataFlow::Node node) { - isSanitizer(node) or + this.isSanitizer(node) or defaultTaintSanitizer(node) } /** Holds if taint propagation into `node` is prohibited. */ predicate isSanitizerIn(DataFlow::Node node) { none() } - final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) } /** Holds if taint propagation out of `node` is prohibited. */ predicate isSanitizerOut(DataFlow::Node node) { none() } - final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) } /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard) + this.isSanitizerGuard(guard) } /** @@ -103,7 +103,7 @@ abstract class Configuration extends DataFlow::Configuration { predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isAdditionalTaintStep(node1, node2) or + this.isAdditionalTaintStep(node1, node2) or defaultAdditionalTaintStep(node1, node2) }