Merge commit '737dd9d4c1' into jb1/lib/dataflowstack

This commit is contained in:
Josh Brown
2024-02-08 08:18:04 -08:00
2596 changed files with 351218 additions and 96833 deletions

View File

@@ -1,3 +1,13 @@
## 0.1.7
No user-facing changes.
## 0.1.6
### Deprecated APIs
* The old configuration-class based data flow api has been deprecated. The configuration-module based api should be used instead. For details, see https://github.blog/changelog/2023-08-14-new-dataflow-api-for-writing-custom-codeql-queries/.
## 0.1.5
No user-facing changes.

View File

@@ -0,0 +1,5 @@
## 0.1.6
### Deprecated APIs
* The old configuration-class based data flow api has been deprecated. The configuration-module based api should be used instead. For details, see https://github.blog/changelog/2023-08-14-new-dataflow-api-for-writing-custom-codeql-queries/.

View File

@@ -0,0 +1,3 @@
## 0.1.7
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.1.5
lastReleaseVersion: 0.1.7

View File

@@ -87,13 +87,13 @@ signature module InputSig {
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context.
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c);
default predicate mayBenefitFromCallContext(DataFlowCall call) { none() }
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx);
default DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
/**
* Gets a node that can read the value returned from `call` with return kind
@@ -150,6 +150,9 @@ signature module InputSig {
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet {
/** Gets a textual representation of this element. */
string toString();
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent();
@@ -180,6 +183,13 @@ signature module InputSig {
predicate simpleLocalFlowStep(Node node1, Node node2);
/**
* Holds if the data-flow step from `node1` to `node2` can be used to
* determine where side-effects may return from a callable.
*/
bindingset[node1, node2]
default predicate validParameterAliasStep(Node node1, Node node2) { any() }
/**
* Holds if data can flow from `node1` to `node2` through a non-local step
* that does not follow a call edge. For example, a step through a global

View File

@@ -0,0 +1,220 @@
/**
* Module for parsing access paths from MaD models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/**
* Convenience-predicate for extracting two capture groups at once.
*/
bindingset[input, regexp]
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
capture1 = input.regexpCapture(regexp, 1) and
capture2 = input.regexpCapture(regexp, 2)
}
/**
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
* of the constant or any value contained in the interval.
*/
bindingset[arg]
int parseInt(string arg) {
result = arg.toInt()
or
// Match "n1..n2"
exists(string lo, string hi |
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
result = [lo.toInt() .. hi.toInt()]
)
}
/**
* Parses a lower-bounded interval `n..` and gets the lower bound.
*/
bindingset[arg]
int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() }
/**
* An access path token such as `Argument[1]` or `ReturnValue`.
*/
class AccessPathTokenBase extends string {
bindingset[this]
AccessPathTokenBase() { exists(this) }
bindingset[this]
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
bindingset[this]
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
bindingset[this]
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
bindingset[this]
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
bindingset[this]
string getAnArgument() { result = this.getArgument(_) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
bindingset[this]
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}
final private class AccessPathTokenBaseFinal = AccessPathTokenBase;
signature predicate accessPathRangeSig(string s);
/** Companion module to the `AccessPath` class. */
module AccessPath<accessPathRangeSig/1 accessPathRange> {
/**
* Parses an integer constant or interval (bounded or unbounded) that explicitly
* references the arity, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
private int parseIntWithExplicitArity(string arg, int arity) {
result >= 0 and // do not allow N-1 to resolve to a negative index
exists(string lo |
// N-x
lo = arg.regexpCapture("N-(\\d+)", 1) and
result = arity - lo.toInt()
or
// N-x..
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
result = [arity - lo.toInt(), arity - 1]
)
or
exists(string lo, string hi |
// x..N-y
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [lo.toInt() .. arity - hi.toInt()]
or
// N-x..N-y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. arity - hi.toInt()] and
result >= 0
or
// N-x..y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. hi.toInt()] and
result >= 0
)
}
/**
* Parses an integer constant or interval (bounded or unbounded) and gets any
* of the integers contained within (of which there may be infinitely many).
*
* Has no result for arguments involving an explicit arity, such as `N-1`.
*/
bindingset[arg, result]
int parseIntUnbounded(string arg) {
result = parseInt(arg)
or
result >= parseLowerBound(arg)
}
/**
* Parses an integer constant or interval (bounded or unbounded) that
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
int parseIntWithArity(string arg, int arity) {
result = parseInt(arg)
or
result in [parseLowerBound(arg) .. arity - 1]
or
result = parseIntWithExplicitArity(arg, arity)
}
/** Gets the `n`th token on the access path as a string. */
private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
final class AccessPath extends string {
AccessPath() { accessPathRange(this) }
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = getRawToken(this, n) and
not this.hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(getRawToken(this, n))) and
not this.hasSyntaxError()
}
}
/**
* An access path token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends AccessPathTokenBaseFinal {
AccessPathToken() { this = getRawToken(_, _) }
/** Gets the name of the token, such as `Member` from `Member[x]` */
pragma[nomagic]
string getName() { result = super.getName() }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
pragma[nomagic]
string getArgumentList() { result = super.getArgumentList() }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
string getArgument(int n) { result = super.getArgument(n) }
/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
string getArgument(string name, int n) {
name = this.getName() and result = this.getArgument(n)
}
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument(string name) { result = this.getArgument(name, _) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
pragma[nomagic]
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}
}

View File

@@ -6,6 +6,7 @@
private import codeql.util.Unit
private import codeql.util.Option
private import codeql.util.Boolean
private import codeql.dataflow.DataFlow
module MakeImpl<InputSig Lang> {
@@ -1183,7 +1184,9 @@ module MakeImpl<InputSig Lang> {
string toString();
}
class Ap;
class Ap {
string toString();
}
class ApNil extends Ap;
@@ -1464,10 +1467,11 @@ module MakeImpl<InputSig Lang> {
pragma[nomagic]
private predicate fwdFlowIntoArg(
ArgNodeEx arg, FlowState state, Cc outercc, ParamNodeOption summaryCtx, TypOption argT,
ApOption argAp, Typ t, Ap ap, ApApprox apa, boolean cc
ApOption argAp, Typ t, Ap ap, boolean emptyAp, ApApprox apa, boolean cc
) {
fwdFlow(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and
if outercc instanceof CcCall then cc = true else cc = false
(if outercc instanceof CcCall then cc = true else cc = false) and
if ap instanceof ApNil then emptyAp = true else emptyAp = false
}
private signature module FwdFlowInInputSig {
@@ -1549,26 +1553,59 @@ module MakeImpl<InputSig Lang> {
viableImplNotCallContextReducedInlineLate(call, outercc)
}
pragma[nomagic]
pragma[inline]
private predicate fwdFlowInCand(
DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p,
ApApprox apa, boolean allowsFieldFlow, boolean cc
DataFlowCall call, ArgNodeEx arg, FlowState state, Cc outercc, DataFlowCallable inner,
ParamNodeEx p, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap,
boolean emptyAp, ApApprox apa, boolean cc
) {
fwdFlowIntoArg(arg, _, outercc, _, _, _, _, _, apa, cc) and
(
inner = viableImplCallContextReducedInlineLate(call, arg, outercc)
or
viableImplArgNotCallContextReduced(call, arg, outercc)
) and
callEdgeArgParamRestrictedInlineLate(call, inner, arg, p, allowsFieldFlow, apa)
exists(boolean allowsFieldFlow |
fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, emptyAp, apa, cc) and
(
inner = viableImplCallContextReducedInlineLate(call, arg, outercc)
or
viableImplArgNotCallContextReduced(call, arg, outercc)
) and
callEdgeArgParamRestrictedInlineLate(call, inner, arg, p, allowsFieldFlow, apa) and
if allowsFieldFlow = false then emptyAp = true else any()
)
}
pragma[inline]
private predicate fwdFlowInCandTypeFlowDisabled(
DataFlowCall call, ArgNodeEx arg, FlowState state, Cc outercc, DataFlowCallable inner,
ParamNodeEx p, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap,
ApApprox apa, boolean cc
) {
not enableTypeFlow() and
fwdFlowInCand(call, arg, state, outercc, inner, p, summaryCtx, argT, argAp, t, ap, _,
apa, cc)
}
pragma[nomagic]
private predicate fwdFlowInValidEdge(
private predicate fwdFlowInCandTypeFlowEnabled(
DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p,
CcCall innercc, ApApprox apa, boolean allowsFieldFlow, boolean cc
boolean emptyAp, ApApprox apa, boolean cc
) {
fwdFlowInCand(call, arg, outercc, inner, p, apa, allowsFieldFlow, cc) and
enableTypeFlow() and
fwdFlowInCand(call, arg, _, outercc, inner, p, _, _, _, _, _, emptyAp, apa, cc)
}
pragma[nomagic]
private predicate fwdFlowInValidEdgeTypeFlowDisabled(
DataFlowCall call, DataFlowCallable inner, CcCall innercc, boolean cc
) {
not enableTypeFlow() and
FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and
innercc = getCallContextCall(call, inner)
}
pragma[nomagic]
private predicate fwdFlowInValidEdgeTypeFlowEnabled(
DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p,
CcCall innercc, boolean emptyAp, ApApprox apa, boolean cc
) {
fwdFlowInCandTypeFlowEnabled(call, arg, outercc, inner, p, emptyAp, apa, cc) and
FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and
innercc = getCallContextCall(call, inner)
}
@@ -1579,10 +1616,18 @@ module MakeImpl<InputSig Lang> {
CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t,
Ap ap, ApApprox apa, boolean cc
) {
exists(ArgNodeEx arg, boolean allowsFieldFlow |
fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa, cc) and
fwdFlowInValidEdge(call, arg, outercc, inner, p, innercc, apa, allowsFieldFlow, cc) and
if allowsFieldFlow = false then ap instanceof ApNil else any()
exists(ArgNodeEx arg |
// type flow disabled: linear recursion
fwdFlowInCandTypeFlowDisabled(call, arg, state, outercc, inner, p, summaryCtx, argT,
argAp, t, ap, apa, cc) and
fwdFlowInValidEdgeTypeFlowDisabled(call, inner, innercc, cc)
or
// type flow enabled: non-linear recursion
exists(boolean emptyAp |
fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, emptyAp, apa, cc) and
fwdFlowInValidEdgeTypeFlowEnabled(call, arg, outercc, inner, p, innercc, emptyAp,
apa, cc)
)
)
}
}
@@ -2432,9 +2477,7 @@ module MakeImpl<InputSig Lang> {
class Typ = Unit;
class Ap extends boolean {
Ap() { this in [true, false] }
}
class Ap = Boolean;
class ApNil extends Ap {
ApNil() { this = false }
@@ -2724,7 +2767,7 @@ module MakeImpl<InputSig Lang> {
pragma[noinline]
ApHeadContent getHeadContent(Ap ap) { result = ap.getHead() }
predicate projectToHeadContent = getContentApprox/1;
predicate projectToHeadContent = getContentApproxCached/1;
class ApOption = ApproxAccessPathFrontOption;

View File

@@ -7,16 +7,22 @@ module MakeImplCommon<InputSig Lang> {
import Cached
module DataFlowImplCommonPublic {
/** Provides `FlowState = string`. */
module FlowStateString {
/**
* DEPRECATED: Generally, a custom `FlowState` type should be used instead,
* but `string` can of course still be used without referring to this
* module.
*
* Provides `FlowState = string`.
*/
deprecated module FlowStateString {
/** A state value to track during data flow. */
class FlowState = string;
deprecated class FlowState = string;
/**
* The default state, which is used when the state is unspecified for a source
* or a sink.
*/
class FlowStateEmpty extends FlowState {
deprecated class FlowStateEmpty extends FlowState {
FlowStateEmpty() { this = "" }
}
}
@@ -551,7 +557,8 @@ module MakeImplCommon<InputSig Lang> {
// local flow
exists(Node mid |
parameterValueFlowCand(p, mid, read) and
simpleLocalFlowStep(mid, node)
simpleLocalFlowStep(mid, node) and
validParameterAliasStep(mid, node)
)
or
// read
@@ -670,7 +677,8 @@ module MakeImplCommon<InputSig Lang> {
// local flow
exists(Node mid |
parameterValueFlow(p, mid, read) and
simpleLocalFlowStep(mid, node)
simpleLocalFlowStep(mid, node) and
validParameterAliasStep(mid, node)
)
or
// read
@@ -783,10 +791,12 @@ module MakeImplCommon<InputSig Lang> {
*/
pragma[nomagic]
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
mayBenefitFromCallContext(call, callable)
or
callEnclosingCallable(call, callable) and
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
(
mayBenefitFromCallContext(call)
or
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
) and
callEnclosingCallable(call, callable)
}
/**
@@ -973,6 +983,9 @@ module MakeImplCommon<InputSig Lang> {
cached
predicate paramMustFlow(ParamNode p, ArgNode arg) { localMustFlowStep+(p, arg) }
cached
ContentApprox getContentApproxCached(Content c) { result = getContentApprox(c) }
cached
newtype TCallContext =
TAnyCallContext() or
@@ -1121,8 +1134,8 @@ module MakeImplCommon<InputSig Lang> {
Input::enableTypeFlow() and
(
exists(ParamNode p, DataFlowType at, DataFlowType pt |
at = getNodeType(arg) and
pt = getNodeType(p) and
nodeDataFlowType(arg, at) and
nodeDataFlowType(p, pt) and
relevantCallEdge(_, _, arg, p) and
typeStrongerThan0(pt, at)
)
@@ -1131,8 +1144,8 @@ module MakeImplCommon<InputSig Lang> {
// A call edge may implicitly strengthen a type by ensuring that a
// specific argument node was reached if the type of that argument was
// strengthened via a cast.
at = getNodeType(arg) and
pt = getNodeType(p) and
nodeDataFlowType(arg, at) and
nodeDataFlowType(p, pt) and
paramMustFlow(p, arg) and
relevantCallEdge(_, _, arg, _) and
typeStrongerThan0(at, pt)
@@ -1172,8 +1185,8 @@ module MakeImplCommon<InputSig Lang> {
or
exists(ArgNode arg, DataFlowType at, DataFlowType pt |
trackedParamTypeCand(p) and
at = getNodeType(arg) and
pt = getNodeType(p) and
nodeDataFlowType(arg, at) and
nodeDataFlowType(p, pt) and
relevantCallEdge(_, _, arg, p) and
typeStrongerThan0(at, pt)
)
@@ -1883,7 +1896,7 @@ module MakeImplCommon<InputSig Lang> {
Content getAHead() {
exists(ContentApprox cont |
this = TApproxFrontHead(cont) and
cont = getContentApprox(result)
cont = getContentApproxCached(result)
)
}
}

View File

@@ -319,4 +319,9 @@ module MakeConsistency<
strictcount(DataFlowCall call0 | multipleArgumentCallInclude(arg, call0)) > 1 and
msg = "Multiple calls for argument node."
}
query predicate lambdaCallEnclosingCallableMismatch(DataFlowCall call, Node receiver) {
lambdaCall(call, _, receiver) and
not nodeGetEnclosingCallable(receiver) = call.getEnclosingCallable()
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -35,6 +35,7 @@ signature module InputSig<DF::InputSig DataFlowLang> {
predicate defaultSink(DataFlowLang::Node source);
bindingset[src, sink]
string getArgString(DataFlowLang::Node src, DataFlowLang::Node sink);
}
@@ -62,7 +63,13 @@ module InlineFlowTestMake<
predicate isSink(DataFlowLang::Node sink) { none() }
}
module FlowTest<DataFlow::ConfigSig ValueFlowConfig, DataFlow::ConfigSig TaintFlowConfig> {
bindingset[src, sink]
signature string getArgStringSig(DataFlowLang::Node src, DataFlowLang::Node sink);
module FlowTestArgString<
DataFlow::ConfigSig ValueFlowConfig, DataFlow::ConfigSig TaintFlowConfig,
getArgStringSig/2 getArgString>
{
module ValueFlow = DataFlow::Global<ValueFlowConfig>;
module TaintFlow = TaintTracking::Global<TaintFlowConfig>;
@@ -82,7 +89,7 @@ module InlineFlowTestMake<
exists(DataFlowLang::Node src, DataFlowLang::Node sink | ValueFlow::flow(src, sink) |
hasLocationInfo(sink, location) and
element = sink.toString() and
value = Impl::getArgString(src, sink)
value = getArgString(src, sink)
)
or
tag = "hasTaintFlow" and
@@ -91,7 +98,7 @@ module InlineFlowTestMake<
|
hasLocationInfo(sink, location) and
element = sink.toString() and
value = Impl::getArgString(src, sink)
value = getArgString(src, sink)
)
}
}
@@ -105,13 +112,27 @@ module InlineFlowTestMake<
}
}
module FlowTest<DataFlow::ConfigSig ValueFlowConfig, DataFlow::ConfigSig TaintFlowConfig> {
import FlowTestArgString<ValueFlowConfig, TaintFlowConfig, Impl::getArgString/2>
}
module DefaultFlowTest = FlowTest<DefaultFlowConfig, DefaultFlowConfig>;
module ValueFlowTestArgString<DataFlow::ConfigSig ValueFlowConfig, getArgStringSig/2 getArgString>
{
import FlowTestArgString<ValueFlowConfig, NoFlowConfig, getArgString/2>
}
module ValueFlowTest<DataFlow::ConfigSig ValueFlowConfig> {
import FlowTest<ValueFlowConfig, NoFlowConfig>
import ValueFlowTestArgString<ValueFlowConfig, Impl::getArgString/2>
}
module TaintFlowTestArgString<DataFlow::ConfigSig TaintFlowConfig, getArgStringSig/2 getArgString>
{
import FlowTestArgString<NoFlowConfig, TaintFlowConfig, getArgString/2>
}
module TaintFlowTest<DataFlow::ConfigSig TaintFlowConfig> {
import FlowTest<NoFlowConfig, TaintFlowConfig>
import TaintFlowTestArgString<TaintFlowConfig, Impl::getArgString/2>
}
}

View File

@@ -1,5 +1,5 @@
name: codeql/dataflow
version: 0.1.5
version: 0.1.7
groups: shared
library: true
dependencies: