Files
codeql/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll
2024-02-21 10:57:51 +01:00

350 lines
12 KiB
Plaintext

private import codeql.dataflow.DataFlow
private import codeql.actions.Ast
private import codeql.actions.Cfg as Cfg
private import codeql.Locations
private import codeql.actions.controlflow.BasicBlocks
private import DataFlowPublic
private import codeql.actions.dataflow.ExternalFlow
private import codeql.actions.dataflow.FlowSteps
cached
newtype TNode = TExprNode(DataFlowExpr e)
class OutNode extends ExprNode {
private DataFlowCall call;
OutNode() { call = this.getCfgNode() }
DataFlowCall getCall(ReturnKind kind) {
result = call and
kind instanceof NormalReturn
}
}
/**
* Not implemented
*/
class CastNode extends Node {
CastNode() { none() }
}
/**
* Not implemented
*/
class PostUpdateNode extends Node {
PostUpdateNode() { none() }
Node getPreUpdateNode() { none() }
}
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
predicate isArgumentNode(ArgumentNode arg, DataFlowCall call, ArgumentPosition pos) {
arg.argumentOf(call, pos)
}
DataFlowCallable nodeGetEnclosingCallable(Node node) {
node = TExprNode(any(DataFlowExpr e | result = e.getScope()))
}
DataFlowType getNodeType(Node node) { any() }
predicate nodeIsHidden(Node node) { none() }
class DataFlowExpr extends Cfg::Node {
DataFlowExpr() { this.getAstNode() instanceof Expression }
}
/**
* A call corresponds to a Uses steps where a 3rd party action or a reusable workflow get called
*/
class DataFlowCall instanceof Cfg::Node {
DataFlowCall() { super.getAstNode() instanceof UsesExpr }
/** Gets a textual representation of this element. */
string toString() { result = super.toString() }
Location getLocation() { result = super.getLocation() }
string getName() { result = super.getAstNode().(UsesExpr).getCallee() }
DataFlowCallable getEnclosingCallable() { result = super.getScope() }
}
/**
* A Cfg scope that can be called
*/
class DataFlowCallable instanceof Cfg::CfgScope {
string toString() { result = super.toString() }
Location getLocation() { result = super.getLocation() }
string getName() {
if this instanceof ReusableWorkflowStmt
then result = this.(ReusableWorkflowStmt).getName()
else
if this instanceof CompositeActionStmt
then result = this.(CompositeActionStmt).getName()
else none()
}
}
newtype TReturnKind = TNormalReturn()
abstract class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
abstract string toString();
}
class NormalReturn extends ReturnKind, TNormalReturn {
override string toString() { result = "return" }
}
/** Gets a viable implementation of the target of the given `Call`. */
DataFlowCallable viableCallable(DataFlowCall c) { c.getName() = result.getName() }
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
private newtype TDataFlowType = TUnknownDataFlowType()
/**
* A type for a data flow node.
*
* This may or may not coincide with any type system existing for the source
* language, but should minimally include unique types for individual closure
* expressions (typically lambdas).
*/
class DataFlowType extends TDataFlowType {
string toString() { result = "" }
}
string ppReprType(DataFlowType t) { none() }
bindingset[t1, t2]
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { t1 = t2 }
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
newtype TContent =
TFieldContent(string name) {
// We only use field flow for steps and jobs outputs, not for accessing other context fields such as env or inputs
name = any(StepsCtxAccessExpr a).getFieldName() or
name = any(NeedsCtxAccessExpr a).getFieldName() or
name = any(JobsCtxAccessExpr a).getFieldName()
}
predicate forceHighPrecision(Content c) { c instanceof FieldContent }
class ContentApprox = ContentSet;
ContentApprox getContentApprox(Content c) { result = c }
/**
* Made a string to match the ArgumentPosition type.
*/
class ParameterPosition extends string {
ParameterPosition() { exists(any(ReusableWorkflowStmt w).getInputsStmt().getInputExpr(this)) }
}
/**
* Made a string to match `With:` keys in the AST
*/
class ArgumentPosition extends string {
ArgumentPosition() { exists(any(UsesExpr e).getArgumentExpr(this)) }
}
/**
*/
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/**
* Holds if there is a local flow step between a ${{ steps.xxx.outputs.yyy }} expression accesing a step output field
* and the step output itself. But only for those cases where the step output is defined externally in a MaD Source
* specification. The reason for this is that we don't currently have a way to specify that a source starts with a
* non-empty access path so we cannot write a Source that stores the taint in a Content, we can only do that for steps
* (storeStep). The easiest thing is to add this local flow step that simulates a read step from the source node for a specific
* field name.
*/
predicate stepsCtxLocalStep(Node nodeFrom, Node nodeTo) {
exists(UsesExpr astFrom, StepsCtxAccessExpr astTo |
externallyDefinedSource(nodeFrom, _, "output." + astTo.getFieldName(), _) and
astFrom = nodeFrom.asExpr() and
astTo = nodeTo.asExpr() and
astTo.getRefExpr() = astFrom
)
}
/**
* Holds if there is a local flow step between a ${{ needs.xxx.outputs.yyy }} expression accesing a job output field
* and the step output itself. But only for those cases where the job (needs) output is defined externally in a MaD Source
* specification. The reason for this is that we don't currently have a way to specify that a source starts with a
* non-empty access path so we cannot write a Source that stores the taint in a Content, we can only do that for steps
* (storeStep). The easiest thing is to add this local flow step that simulates a read step from the source node for a specific
* field name.
*/
predicate needsCtxLocalStep(Node nodeFrom, Node nodeTo) {
exists(UsesExpr astFrom, NeedsCtxAccessExpr astTo |
externallyDefinedSource(nodeFrom, _, "output." + astTo.getFieldName(), _) and
astFrom = nodeFrom.asExpr() and
astTo = nodeTo.asExpr() and
astTo.getRefExpr() = astFrom
)
}
/**
* Holds if there is a local flow step between a ${{}} expression accesing an input variable and the input itself
* e.g. ${{ inputs.foo }}
*/
predicate inputsCtxLocalStep(Node nodeFrom, Node nodeTo) {
exists(Expression astFrom, InputsCtxAccessExpr astTo |
astFrom = nodeFrom.asExpr() and
astTo = nodeTo.asExpr() and
astTo.getRefExpr() = astFrom
)
}
/**
* Holds if there is a local flow step between a ${{}} expression accesing an env var and the var definition itself
* e.g. ${{ env.foo }}
*/
predicate envCtxLocalStep(Node nodeFrom, Node nodeTo) {
exists(Expression astFrom, EnvCtxAccessExpr astTo |
astFrom = nodeFrom.asExpr() and
astTo = nodeTo.asExpr() and
(
externallyDefinedSource(nodeFrom, _, "env." + astTo.getFieldName(), _) or
astTo.getRefExpr() = astFrom
)
)
}
/**
* Holds if there is a local flow step from `nodeFrom` to `nodeTo`.
* For Actions, we dont need SSA nodes since it should be already in SSA form
* Local flow steps are always between two nodes in the same Cfg scope (job definition).
*/
pragma[nomagic]
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
stepsCtxLocalStep(nodeFrom, nodeTo) or
needsCtxLocalStep(nodeFrom, nodeTo) or
inputsCtxLocalStep(nodeFrom, nodeTo) or
envCtxLocalStep(nodeFrom, nodeTo)
}
/**
* a simple local flow step that should always preserve the call context (same callable)
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { localFlowStep(nodeFrom, nodeTo) }
/**
* Holds if data can flow from `node1` to `node2` through a non-local step
* that does not follow a call edge. For example, a step through a global
* variable.
* We throw away the call context and let us jump to any location
* AKA teleport steps
* local steps are preferible since they are more predictable and easier to control
*/
predicate jumpStep(Node nodeFrom, Node nodeTo) { none() }
/**
* Holds if a CtxAccessExpr reads a field from a job (needs/jobs), step (steps) output via a read of `c` (fieldname)
*/
predicate ctxFieldReadStep(Node node1, Node node2, ContentSet c) {
exists(CtxAccessExpr access |
(
access instanceof NeedsCtxAccessExpr or
access instanceof StepsCtxAccessExpr or
access instanceof JobsCtxAccessExpr
) and
c = any(FieldContent ct | ct.getName() = access.getFieldName()) and
node1.asExpr() = access.getRefExpr() and
node2.asExpr() = access
)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `c`. Thus,
* `node1` references an object with a content `c.getAReadContent()` whose
* value ends up in `node2`.
* Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path.
*/
predicate readStep(Node node1, ContentSet c, Node node2) { ctxFieldReadStep(node1, node2, c) }
/**
* Stores an output expression (node1) into its OutputsStm node (node2)
* using the output variable name as the access path
*/
predicate fieldStoreStep(Node node1, Node node2, ContentSet c) {
exists(OutputsStmt out, string fieldName |
node1.asExpr() = out.getOutputExpr(fieldName) and
node2.asExpr() = out and
c = any(FieldContent ct | ct.getName() = fieldName)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a store into `c`. Thus,
* `node2` references an object with a content `c.getAStoreContent()` that
* contains the value of `node1`.
* Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) {
fieldStoreStep(node1, node2, c) or
externallyDefinedStoreStep(node1, node2, c) or
runEnvToScriptStoreStep(node1, node2, c)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet c) { none() }
/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }
/**
* Holds if the node `n` is unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
*
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { localFlowStep(nodeFrom, nodeTo) }
private newtype TLambdaCallKind = TNone()
class LambdaCallKind = TLambdaCallKind;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
/**
* Since our model is so simple, we dont want to compress the local flow steps.
* This compression is normally done to not show SSA steps, casts, etc.
*/
predicate neverSkipInPathGraph(Node node) { any() }