Files
codeql/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll
2025-12-04 11:59:58 +00:00

483 lines
16 KiB
Plaintext

private import go
private import DataFlowUtil
private import DataFlowImplCommon
private import ContainerFlow
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.go.dataflow.FlowSummary as FlowSummary
private import semmle.go.dataflow.ExternalFlow
private import codeql.util.Unit
import DataFlowNodes::Private
private newtype TReturnKind =
MkReturnKind(int i) { exists(SignatureType st | exists(st.getResultType(i))) }
ReturnKind getReturnKind(int i) { result = MkReturnKind(i) }
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Go, this is either a return of a single value
* or of one of multiple values.
*/
class ReturnKind extends TReturnKind {
int i;
ReturnKind() { this = MkReturnKind(i) }
/** Gets the index of this return value. */
int getIndex() { result = i }
/** Gets a textual representation of this return kind. */
string toString() { result = "return[" + i + "]" }
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
exists(DataFlow::CallNode c, int i | c.asExpr() = call and kind = MkReturnKind(i) |
result = c.getResult(i)
)
}
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step, not taking function models into account.
*/
predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Instruction -> Instruction
exists(Expr pred, Expr succ |
succ.(LogicalBinaryExpr).getAnOperand() = pred or
succ.(ConversionExpr).getOperand() = pred
|
nodeFrom = exprNode(pred) and
nodeTo = exprNode(succ)
)
or
// Type assertion: if in the context `checked, ok := e.(*Type)` (in which
// case tuple-extraction instructions exist), flow from `e` to `e.(*Type)[0]`;
// otherwise flow from `e` to `e.(*Type)`.
exists(IR::Instruction evalAssert, TypeAssertExpr assert |
nodeFrom.asExpr() = assert.getExpr() and
evalAssert = IR::evalExprInstruction(assert) and
if exists(IR::extractTupleElement(evalAssert, _))
then nodeTo.asInstruction() = IR::extractTupleElement(evalAssert, 0)
else nodeTo.asInstruction() = evalAssert
)
or
// Instruction -> SSA defn
exists(IR::Instruction pred, SsaExplicitDefinition succ |
succ.getRhs() = pred and
(
nodeFrom = instructionNode(pred) or
nodeFrom.(PostUpdateNode).getPreUpdateNode() = instructionNode(pred)
) and
nodeTo = ssaNode(succ.getVariable())
)
or
// SSA defn -> first SSA use
exists(SsaDefinition pred, IR::Instruction succ | succ = pred.getAFirstUse() |
(pred instanceof SsaExplicitDefinition or pred instanceof SsaVariableCapture) and
nodeFrom = ssaNode(pred.getVariable()) and
nodeTo = instructionNode(succ)
)
or
// SSA use -> successive SSA use
// Note this case includes Phi node traversal
exists(IR::Instruction pred, IR::Instruction succ | succ = getAnAdjacentUse(pred) |
(
nodeFrom = instructionNode(pred) or
nodeFrom.(PostUpdateNode).getPreUpdateNode() = instructionNode(pred)
) and
nodeTo = instructionNode(succ) and
nodeTo != nodeFrom
)
or
// GlobalFunctionNode -> use
nodeFrom =
any(GlobalFunctionNode fn | fn.getFunction() = nodeTo.asExpr().(FunctionName).getTarget())
}
pragma[noinline]
private Field getASparselyUsedChannelTypedField() {
result.getType() instanceof ChanType and
count(result.getARead()) = 2
}
bindingset[v]
pragma[inline_late]
private predicate isValueEntityRead(ValueEntity v, Node n) { n = v.getARead() }
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
predicate jumpStep(Node n1, Node n2) {
exists(ValueEntity v |
not v instanceof SsaSourceVariable and
not v instanceof Field and
(
any(Write w).writes(v, n1)
or
n1.(DataFlow::PostUpdateNode).getPreUpdateNode() = v.getARead()
) and
isValueEntityRead(v, n2)
)
or
exists(SsaExplicitDefinition def, SsaVariableCapture succ |
succ.getSourceVariable() = def.getSourceVariable() and
n2 = ssaNode(succ)
|
not exists(def.getAFirstUse()) and n1 = ssaNode(def)
or
exists(IR::Instruction lastUse |
lastUse = getAnAdjacentUse*(def.getAFirstUse()) and
not exists(getAnAdjacentUse(lastUse))
|
n1 = instructionNode(lastUse) or
n1.(DataFlow::PostUpdateNode).getPreUpdateNode() = instructionNode(lastUse)
)
)
or
// If a channel-typed field is referenced exactly once in the context of
// a send statement and once in a receive expression, assume the two are linked.
exists(
Field f, DataFlow::ReadNode recvRead, DataFlow::ReadNode sendRead, RecvExpr re, SendStmt ss
|
f = getASparselyUsedChannelTypedField() and
recvRead = f.getARead() and
sendRead = f.getARead() and
recvRead.asExpr() = re.getOperand() and
sendRead.asExpr() = ss.getChannel() and
n1.(DataFlow::PostUpdateNode).getPreUpdateNode() = sendRead and
n2 = recvRead
)
or
FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(),
n2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `c`.
* Thus, `node2` references an object with a content `x` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
// a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of the
// post-update node of `(*p)`, which in turn flows into the pointer content of the post-update
// node of `p`
exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) |
node1 = rhs and
node2 = base and
c = any(DataFlow::FieldContent fc | fc.getField() = f)
or
node1 = base and
node2.(PostUpdateNode).getPreUpdateNode() =
node1.(PostUpdateNode).getPreUpdateNode().(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
)
or
node1 = node2.(AddressOperationNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
or
containerStoreStep(node1, node2, c)
)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `c`.
* Thus, `node1` references an object with a content `c` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
node1 = node2.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType())
or
exists(FieldReadNode read |
node2 = read and
node1 = read.getBase() and
c = any(DataFlow::FieldContent fc | fc.getField() = read.getField())
)
or
containerReadStep(node1, node2, c)
)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
or
any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and
cs.isUniversalContent() and
node1 = node2
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
predicate clearsContent(Node n, ContentSet c) {
// Because our post-update nodes are shared between multiple pre-update
// nodes, attempting to clear content causes summary stores into arg in
// particular to malfunction.
none()
// c instanceof FieldContent and
// FlowSummaryImpl::Private::Steps::summaryStoresIntoArg(c, n)
// or
// FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
}
/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) {
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
}
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
predicate localMustFlowStep(Node node1, Node node2) { none() }
/** Gets the type of `n` used for type pruning. */
DataFlowType getNodeType(Node n) { result = TTodoDataFlowType() and exists(n) }
/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) {
t1 = TTodoDataFlowType() and t2 = TTodoDataFlowType() // stub implementation
}
//////////////////////////////////////////////////////////////////////////////
// Java QL library compatibility wrappers
//////////////////////////////////////////////////////////////////////////////
/** A node that performs a type cast. */
class CastNode extends ExprNode {
override ConversionExpr expr;
}
/**
* Holds if `n` should never be skipped over in the `PathGraph` and in path
* explanations.
*/
predicate neverSkipInPathGraph(Node n) {
exists(DataFlow::FunctionModel fm | fm.getAnInputNode(_) = n or fm.getAnOutputNode(_) = n)
or
exists(TaintTracking::FunctionModel fm | fm.getAnInputNode(_) = n or fm.getAnOutputNode(_) = n)
}
class DataFlowExpr = Expr;
private newtype TDataFlowType = TTodoDataFlowType()
class DataFlowType extends TDataFlowType {
/** Gets a textual representation of this element. */
string toString() { result = "" }
}
private newtype TDataFlowCallable =
TCallable(Callable c) or
TFileScope(File f) or
TExternalFileScope() or
TSummarizedCallable(FlowSummary::SummarizedCallable c)
class DataFlowCallable extends TDataFlowCallable {
/**
* Gets the `Callable` corresponding to this `DataFlowCallable`, if any.
*/
Callable asCallable() { this = TCallable(result) }
/**
* Gets the `File` whose root scope corresponds to this `DataFlowCallable`, if any.
*/
File asFileScope() { this = TFileScope(result) }
/**
* Holds if this `DataFlowCallable` is an external file scope.
*/
predicate isExternalFileScope() { this = TExternalFileScope() }
/**
* Gets the `SummarizedCallable` corresponding to this `DataFlowCallable`, if any.
*/
FlowSummary::SummarizedCallable asSummarizedCallable() { this = TSummarizedCallable(result) }
/**
* Gets the type of this callable.
*
* If this is a `File` root scope, this has no value.
*/
SignatureType getType() { result = [this.asCallable(), this.asSummarizedCallable()].getType() }
/**
* Gets a string representation of this callable.
*/
string toString() {
result = this.asCallable().toString() or
result = "File scope: " + this.asFileScope().toString() or
result = "Summary: " + this.asSummarizedCallable().toString()
}
/** Gets the location of this callable. */
Location getLocation() {
result = this.asCallable().getLocation() or
result = this.asFileScope().getLocation() or
result = this.asSummarizedCallable().getLocation()
}
/**
* DEPRECATED: Use `getLocation()` instead.
*
* Holds if this callable is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
deprecated predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A function call relevant for data flow. */
class DataFlowCall extends Expr {
DataFlow::CallNode call;
DataFlowCall() { this = call.asExpr() }
/**
* Gets the nth argument for this call.
*/
Node getArgument(int n) { result = call.getArgument(n) }
/** Gets the data flow node corresponding to this call. */
ExprNode getNode() { result = call }
/** Gets the enclosing callable of this call. */
DataFlowCallable getEnclosingCallable() {
// NB. At present calls cannot occur inside summarized callables-- this will change if we implement advanced lambda support.
result.asCallable().getFuncDef() = this.getEnclosingFunction()
or
not exists(this.getEnclosingFunction()) and result.asFileScope() = this.getFile()
}
/** Gets the location of this call. */
Location getLocation() { result = super.getLocation() }
}
/** Holds if `e` is an expression that always has the same Boolean value `val`. */
private predicate constantBooleanExpr(Expr e, boolean val) {
e.getBoolValue() = val
or
exists(SsaExplicitDefinition v, Expr src |
IR::evalExprInstruction(e) = v.getVariable().getAUse() and
IR::evalExprInstruction(src) = v.getRhs() and
constantBooleanExpr(src, val)
)
}
/** An argument that always has the same Boolean value. */
private class ConstantBooleanArgumentNode extends ArgumentNode, ExprNode {
ConstantBooleanArgumentNode() { constantBooleanExpr(this.getExpr(), _) }
/** Gets the Boolean value of this expression. */
boolean getBooleanValue() { constantBooleanExpr(this.getExpr(), result) }
}
/**
* Returns a guard that will certainly not hold in calling context `call`.
*
* In particular it does not hold because it checks that `param` has value `b`, but
* in context `call` it is known to have value `!b`. Note this is `noinline`d in order
* to avoid a bad join order in `isUnreachableInCall`.
*/
pragma[noinline]
private ControlFlow::ConditionGuardNode getAFalsifiedGuard(DataFlowCall call) {
exists(SsaParameterNode param, ConstantBooleanArgumentNode arg |
// get constant bool argument and parameter for this call
viableParamArg(call, pragma[only_bind_into](param), pragma[only_bind_into](arg)) and
// which is used in a guard controlling `n` with the opposite value of `arg`
result.ensures(param.getAUse(), arg.getBooleanValue().booleanNot())
)
}
class NodeRegion instanceof BasicBlock {
string toString() { result = "NodeRegion" }
predicate contains(Node n) { n.getBasicBlock() = this }
}
/**
* Holds if the nodes in `nr` are unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(NodeRegion nr, DataFlowCall call) {
getAFalsifiedGuard(call).dominates(nr)
}
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) {
c instanceof ArrayContent or c instanceof CollectionContent
}
/**
* Gets the `i`th argument of call `c`, where the receiver of a method call
* counts as argument -1.
*/
Node getArgument(CallNode c, int i) {
result = c.getArgument(i)
or
result = c.(MethodCallNode).getReceiver() and
i = -1
}
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { n instanceof FlowSummaryNode }
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) { sourceNode(source, _, model) }
predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }
class DataFlowSecondLevelScope = Unit;
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
*
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) {
exists(DataFlowCallable c, int pos |
p.isParameterOf(c, pos) and
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asSummarizedCallable(), pos)
)
}
/** An approximated `Content`. */
class ContentApprox = Unit;
/** Gets an approximated value for content `c`. */
pragma[inline]
ContentApprox getContentApprox(Content c) { any() }