JS: Capture flow

This commit is contained in:
Asger F
2023-10-04 15:15:23 +02:00
parent 16df2c31bb
commit 7bcf8b858b
4 changed files with 436 additions and 0 deletions

View File

@@ -1994,6 +1994,7 @@ private class CallAgainstEqualityCheck extends DerivedBarrierGuardNode {
* Can be added to a `isBarrier` in a data-flow configuration to block flow through such checks.
*/
class VarAccessBarrier extends DataFlow::Node {
// TODO: This does not work in dataflow2 when the variable is captured, since the capture-flow library bypasses the refinement node.
VarAccessBarrier() {
exists(ConditionGuardNode guard, SsaRefinementNode refinement |
this = DataFlow::ssaDefinitionNode(refinement) and

View File

@@ -10,6 +10,8 @@ private import semmle.javascript.dataflow.internal.Contents::Private
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowImplCommon as DataFlowImplCommon
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.javascript.dataflow.internal.sharedlib.FlowSummaryImpl as FlowSummaryImpl
private import semmle.javascript.dataflow.internal.VariableCapture as VariableCapture
cached
private module Cached {
/**
@@ -57,6 +59,7 @@ private module Cached {
TFlowSummaryIntermediateAwaitStoreNode(FlowSummaryImpl::Private::SummaryNode sn) {
FlowSummaryImpl::Private::Steps::summaryStoreStep(sn, MkAwaited(), _)
} or
TSynthCaptureNode(VariableCapture::VariableCaptureOutput::SynthesizedCaptureNode node) or
TGenericSynthesizedNode(AstNode node, string tag, DataFlowPrivate::DataFlowCallable container) {
any(AdditionalFlowInternal flow).needsSynthesizedNode(node, tag, container)
}

View File

@@ -42,6 +42,30 @@ class FlowSummaryIntermediateAwaitStoreNode extends DataFlow::Node,
}
}
class CaptureNode extends DataFlow::Node, TSynthCaptureNode {
/** Gets the underlying node from the variable-capture library. */
VariableCaptureOutput::SynthesizedCaptureNode getNode() {
this = TSynthCaptureNode(result) and DataFlowImplCommon::forceCachingInSameStage()
}
cached
override StmtContainer getContainer() { result = this.getNode().getEnclosingCallable() }
cached
private string toStringInternal() { result = this.getNode().toString() }
override string toString() { result = this.toStringInternal() } // cached in parent class
cached
private Location getLocation() { result = this.getNode().getLocation() }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
class GenericSynthesizedNode extends DataFlow::Node, TGenericSynthesizedNode {
private AstNode node;
private string tag;
@@ -145,6 +169,8 @@ predicate postUpdatePair(Node pre, Node post) {
or
FlowSummaryImpl::Private::summaryPostUpdateNode(post.(FlowSummaryNode).getSummaryNode(),
pre.(FlowSummaryNode).getSummaryNode())
or
VariableCaptureOutput::capturePostUpdateNode(getClosureNode(post), getClosureNode(pre))
}
class CastNode extends DataFlow::Node instanceof EmptyType { }
@@ -232,6 +258,15 @@ private predicate isArgumentNodeImpl(Node n, DataFlowCall call, ArgumentPosition
or
pos.isFunctionSelfReference() and n = call.asOrdinaryCall().getCalleeNode()
or
pos.isFunctionSelfReference() and n = call.asImpliedLambdaCall().flow()
or
exists(Function fun |
call.asImpliedLambdaCall() = fun and
CallGraph::impliedReceiverStep(n, TThisNode(fun)) and
sameContainerAsEnclosingContainer(n, fun) and
pos.isThis()
)
or
pos.isThis() and n = TConstructorThisArgumentNode(call.asOrdinaryCall().asExpr())
or
// For now, treat all spread argument as flowing into the 'arguments' array, regardless of preceding arguments
@@ -280,6 +315,15 @@ predicate nodeIsHidden(Node node) {
or
node instanceof FlowSummaryIntermediateAwaitStoreNode
or
node instanceof CaptureNode
or
// Hide function expressions, as capture-flow causes them to appear in unhelpful ways
// TODO: Instead hide PathNodes with a capture content as the head of its access path?
node.asExpr() instanceof Function
or
// Also hide post-update nodes for function expressions
node.(DataFlow::ExprPostUpdateNode).getExpr() instanceof Function
or
node instanceof GenericSynthesizedNode
}
@@ -324,6 +368,9 @@ private newtype TDataFlowCall =
node = TValueNode(any(PropAccess p)) or
node = TPropNode(any(PropertyPattern p))
} or
MkImpliedLambdaCall(Function f) {
VariableCaptureConfig::captures(f, _) or CallGraph::impliedReceiverStep(_, TThisNode(f))
} or
MkSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
@@ -343,6 +390,7 @@ class DataFlowCall extends TDataFlowCall {
DataFlow::InvokeNode asBoundCall(int boundArgs) { this = MkBoundCall(result, boundArgs) }
Function asImpliedLambdaCall() { this = MkImpliedLambdaCall(result) }
predicate isSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable enclosingCallable,
@@ -350,6 +398,7 @@ class DataFlowCall extends TDataFlowCall {
) {
this = MkSummaryCall(enclosingCallable, receiver)
}
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
@@ -438,6 +487,7 @@ private class AccessorCall extends DataFlowCall, MkAccessorCall {
ref.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
class SummaryCall extends DataFlowCall, MkSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable enclosingCallable;
private FlowSummaryImpl::Private::SummaryNode receiver;
@@ -456,6 +506,30 @@ class SummaryCall extends DataFlowCall, MkSummaryCall {
FlowSummaryImpl::Private::SummaryNode getReceiver() { result = receiver }
}
/**
* A call that invokes a lambda with nothing but its self-reference node.
*
* This is to help ensure captured variables can flow into the lambda in cases where
* we can't find its call sites.
*/
private class ImpliedLambdaCall extends DataFlowCall, MkImpliedLambdaCall {
private Function function;
ImpliedLambdaCall() { this = MkImpliedLambdaCall(function) }
override string toString() { result = "[implied lambda call] " + function }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
function.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
override DataFlowCallable getEnclosingCallable() {
result.asSourceCallable() = function.getEnclosingContainer()
}
}
private int getMaxArity() {
// TODO: account for flow summaries
result =
@@ -542,6 +616,8 @@ DataFlowCallable viableCallable(DataFlowCall node) {
result = MkLibraryCallable(callable) and
node.asOrdinaryCall() = [callable.getACall(), callable.getACallSimple()]
)
or
result.asSourceCallableNotExterns() = node.asImpliedLambdaCall()
}
/**
@@ -568,12 +644,28 @@ private predicate sameContainerAsEnclosingContainer(Node node, Function fun) {
node.getContainer() = fun.getEnclosingContainer()
}
/**
* Holds if `node` should be removed from the local data flow graph, but the node
* still exists for use by the legacy data flow library.
*/
pragma[nomagic]
private predicate isBlockedLegacyNode(TCapturedVariableNode node) {
// Ignore captured variable nodes for those variables that are handled by the captured-variable library.
// Note that some variables, such as top-level variables, are still modelled with these nodes (which will result in jump steps).
exists(LocalVariable variable |
node = TCapturedVariableNode(variable) and
variable instanceof VariableCaptureConfig::CapturedVariable
)
}
/**
* Holds if there is a value-preserving steps `node1` -> `node2` that might
* be cross function boundaries.
*/
private predicate valuePreservingStep(Node node1, Node node2) {
node1.getASuccessor() = node2 and
not isBlockedLegacyNode(node1) and
not isBlockedLegacyNode(node2)
or
FlowSteps::propertyFlowStep(node1, node2)
or
@@ -613,6 +705,8 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
node2 = TFlowSummaryNode(output)
)
or
VariableCaptureOutput::localFlowStep(getClosureNode(node1), getClosureNode(node2))
or
// NOTE: For consistency with readStep/storeStep, we do not translate these steps to jump steps automatically.
DataFlow::AdditionalFlowStep::step(node1, node2)
}
@@ -674,6 +768,11 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
c = ContentSet::arrayElement()
)
or
exists(LocalVariable variable |
VariableCaptureOutput::readStep(getClosureNode(node1), variable, getClosureNode(node2)) and
c.asSingleton() = MkCapturedContent(variable)
)
or
DataFlow::AdditionalFlowStep::readStep(node1, c, node2)
}
@@ -714,6 +813,11 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
c = ContentSet::promiseValue()
)
or
exists(LocalVariable variable |
VariableCaptureOutput::storeStep(getClosureNode(node1), variable, getClosureNode(node2)) and
c.asSingleton() = MkCapturedContent(variable)
)
or
DataFlow::AdditionalFlowStep::storeStep(node1, c, node2)
}
@@ -771,6 +875,11 @@ int accessPathLimit() { result = 5 }
*/
predicate allowParameterReturnInSelf(ParameterNode p) {
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
or
exists(Function f |
VariableCaptureOutput::heuristicAllowInstanceParameterReturnInSelf(f) and
p = TFunctionSelfReferenceNode(f)
)
}
class LambdaCallKind = Unit;

View File

@@ -0,0 +1,323 @@
private import javascript as js
private import semmle.javascript.dataflow.internal.DataFlowNode
private import codeql.dataflow.VariableCapture
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowImplCommon as DataFlowImplCommon
module VariableCaptureConfig implements InputSig {
private js::Function getLambdaFromVariable(js::LocalVariable variable) {
result.getVariable() = variable
or
result = variable.getAnAssignedExpr()
or
exists(js::ClassDeclStmt cls |
result = cls.getConstructor().getBody() and
variable = cls.getVariable()
)
}
additional predicate isTopLevelLike(js::StmtContainer container) {
container instanceof js::TopLevel
or
container = any(js::AmdModuleDefinition mod).getFactoryFunction()
or
isTopLevelLike(container.(js::ImmediatelyInvokedFunctionExpr).getEnclosingContainer())
or
// Functions declared in a top-level with no parameters and can't generate flow-through, except through 'this'
// which we rule out with a few syntactic checks. In this case we treat its captured variables as singletons.
// NOTE: This was done to prevent a blow-up in fiddlesalad where a function called 'Runtime' captures 7381 variables but is only called once.
exists(js::Function fun |
container = fun and
fun.getNumParameter() = 0 and
isTopLevelLike(fun.getEnclosingContainer()) and
not mayHaveFlowThroughThisArgument(fun)
)
or
// Container declaring >100 captured variables tend to be singletons and are too expensive anyway
strictcount(js::LocalVariable v | v.isCaptured() and v.getDeclaringContainer() = container) >
100
}
private predicate hasLocalConstructorCall(js::Function fun) {
fun = getLambdaFromVariable(any(js::NewExpr e).getCallee().(js::VarAccess).getVariable())
}
private predicate mayHaveFlowThroughThisArgument(js::Function fun) {
any(js::ThisExpr e).getBinder() = fun and
not hasLocalConstructorCall(fun) and // 'this' argument is assumed to be a fresh object
(
exists(fun.getAReturnedExpr())
or
exists(js::YieldExpr e | e.getContainer() = fun)
)
}
class CapturedVariable extends js::LocalVariable {
CapturedVariable() {
DataFlowImplCommon::forceCachingInSameStage() and
this.isCaptured() and
not isTopLevelLike(this.getDeclaringContainer()) and
// Exclude variables that just contain a function
// TODO: explain why
// TODO: also exclude if only use of variable is to call it. Handles case where variable is just alias for top-level function
not exists(getLambdaFromVariable(this))
}
Callable getCallable() { result = this.getDeclaringContainer().getFunctionBoundary() }
}
additional predicate captures(js::Function fun, CapturedVariable variable) {
(
variable.getAnAccess().getContainer().getFunctionBoundary() = fun
or
exists(js::Function inner |
captures(inner, variable) and
containsReferenceTo(fun, inner)
)
) and
not variable.getDeclaringContainer() = fun
}
private predicate containsReferenceTo(js::Function fun, js::Function other) {
other.getEnclosingContainer() = fun
or
exists(js::LocalVariable variable |
other = getLambdaFromVariable(variable) and
variable.getAnAccess().getEnclosingFunction() = fun and
fun.getEnclosingContainer() = other.getEnclosingContainer().getEnclosingContainer*() and
other != fun
)
}
private js::Function getACapturingFunctionInTree(js::AstNode e) {
result = e and
captures(e, _)
or
not e instanceof js::Function and
result = getACapturingFunctionInTree(e.getAChild())
}
/**
* Holds if `decl` declares a variable that is captured by its own initializer, that is, the initializer of `decl`.
*
* For example, the declaration of `obj` below captures itself in its initializer:
* ```js
* const obj = {
* method: () => { ...obj... }
* }
* ```
*
* The lambda can only observe values of `obj` at one of the aliases of that lambda. Due to limited aliases analysis,
* the only alias we can see is the lambda itself. However, at this stage the `obj` variable is still unassigned, so it
* just sees its implicit initialization, thus failing to capture any real flows through `obj`.
*
* Consider that the similar example does not have this problem:
*
* ```js
* const obj = {};
* obj.method = () => { ...obj... };
* ```
*
* In this case, `obj` has already been assigned at the point of the lambda creation, so we propagate the correct value
* into the lambda.
*
* Our workaround is to make the first example look like the second one, by placing the assignment of
* `obj` before the object literal. We do this whenever a variable captures itself in its initializer.
*/
private predicate isCapturedByOwnInitializer(js::VariableDeclarator decl) {
exists(js::Function function |
function = getACapturingFunctionInTree(decl.getInit()) and
captures(function, decl.getBindingPattern().(js::VarDecl).getVariable())
)
}
class BasicBlock extends js::BasicBlock {
Callable getEnclosingCallable() { result = this.getContainer().getFunctionBoundary() }
}
class Location = js::Location;
class Callable extends js::StmtContainer {
predicate isConstructor() {
// TODO: clarify exactly what the library wants to know here as the meaning of "constructor" varies between languages.
// JS constructors should not be seen as "constructors" in this context.
none()
}
}
class CapturedParameter extends CapturedVariable {
CapturedParameter() { this.isParameter() }
}
class Expr extends js::AST::ValueNode {
/** Holds if the `i`th node of basic block `bb` evaluates this expression. */
predicate hasCfgNode(BasicBlock bb, int i) {
// Note: this is overridden for FunctionDeclStmt
bb.getNode(i) = this
}
}
class VariableRead extends Expr instanceof js::VarAccess, js::RValue {
private CapturedVariable variable;
VariableRead() { this = variable.getAnAccess() }
CapturedVariable getVariable() { result = variable }
}
class ClosureExpr extends Expr {
ClosureExpr() { captures(this, _) }
predicate hasBody(Callable c) { c = this }
predicate hasAliasedAccess(Expr e) {
e = this
or
exists(js::LocalVariable variable |
this = getLambdaFromVariable(variable) and
e = variable.getAnAccess()
)
}
}
private newtype TVariableWrite =
MkExplicitVariableWrite(js::VarRef pattern) {
exists(js::DataFlow::lvalueNodeInternal(pattern)) and
pattern.getVariable() instanceof CapturedVariable
} or
MkImplicitVariableInit(CapturedVariable v) { not v instanceof CapturedParameter }
class VariableWrite extends TVariableWrite {
CapturedVariable getVariable() { none() } // Overridden in subclass
string toString() { none() } // Overridden in subclass
Location getLocation() { none() } // Overridden in subclass
predicate hasCfgNode(BasicBlock bb, int i) { none() } // Overridden in subclass
// note: langauge-specific
js::DataFlow::Node getSource() { none() } // Overridden in subclass
}
additional class ExplicitVariableWrite extends VariableWrite, MkExplicitVariableWrite {
private js::VarRef pattern;
ExplicitVariableWrite() { this = MkExplicitVariableWrite(pattern) }
override CapturedVariable getVariable() { result = pattern.getVariable() }
override string toString() { result = pattern.toString() }
/** Gets the location of this write. */
override Location getLocation() { result = pattern.getLocation() }
override js::DataFlow::Node getSource() {
// Note: there is not always an expression corresponding to the RHS of the assignment.
// We do however have a data-flow node for this purpose (the lvalue-node).
// We use the pattern as a placeholder here, to be mapped to a data-flow node with `DataFlow::lvalueNode`.
result = js::DataFlow::lvalueNodeInternal(pattern)
}
/**
* Gets a CFG node that should act at the place where this variable write happens, overriding its "true" CFG node.
*/
private js::ControlFlowNode getCfgNodeOverride() {
exists(js::VariableDeclarator decl |
decl.getBindingPattern() = pattern and
isCapturedByOwnInitializer(decl) and
result = decl.getInit().getFirstControlFlowNode()
)
}
/** Holds if the `i`th node of basic block `bb` evaluates this expression. */
override predicate hasCfgNode(BasicBlock bb, int i) {
bb.getNode(i) = this.getCfgNodeOverride()
or
not exists(this.getCfgNodeOverride()) and
bb.getNode(i) = pattern.(js::LValue).getDefNode()
}
}
additional class ImplicitVariableInit extends VariableWrite, MkImplicitVariableInit {
private CapturedVariable variable;
ImplicitVariableInit() { this = MkImplicitVariableInit(variable) }
override string toString() { result = "[implicit init] " + variable }
override Location getLocation() { result = variable.getLocation() }
override CapturedVariable getVariable() { result = variable }
override predicate hasCfgNode(BasicBlock bb, int i) {
// 'i' would normally be bound to 0, but we lower it to -1 so FunctionDeclStmts can be evaluated
// at index 0.
any(js::SsaImplicitInit def).definesAt(bb, _, variable) and i = -1
}
}
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() }
predicate entryBlock(BasicBlock bb) { bb instanceof js::EntryBasicBlock }
predicate exitBlock(BasicBlock bb) { bb.getLastNode() instanceof js::ControlFlowExitNode }
}
module VariableCaptureOutput = Flow<VariableCaptureConfig>;
js::DataFlow::Node getNodeFromClosureNode(VariableCaptureOutput::ClosureNode node) {
result = TValueNode(node.(VariableCaptureOutput::ExprNode).getExpr())
or
result = TValueNode(node.(VariableCaptureOutput::ParameterNode).getParameter().getADeclaration()) // TODO: is this subsumed by the ExprNode case?
or
result = TExprPostUpdateNode(node.(VariableCaptureOutput::ExprPostUpdateNode).getExpr())
or
// Note: the `this` parameter in the capture library is expected to be a parameter that refers to the lambda object itself,
// which for JS means the `TFunctionSelfReferenceNode`, not `TThisNode` as one might expect.
result = TFunctionSelfReferenceNode(node.(VariableCaptureOutput::ThisParameterNode).getCallable())
or
result = TSynthCaptureNode(node.(VariableCaptureOutput::SynthesizedCaptureNode))
or
result = node.(VariableCaptureOutput::VariableWriteSourceNode).getVariableWrite().getSource()
}
VariableCaptureOutput::ClosureNode getClosureNode(js::DataFlow::Node node) {
node = getNodeFromClosureNode(result)
}
private module Debug {
private import VariableCaptureConfig
predicate relevantContainer(js::StmtContainer container) {
container.getEnclosingContainer*().(js::Function).getName() = "exists"
}
predicate localFlowStep(
VariableCaptureOutput::ClosureNode node1, VariableCaptureOutput::ClosureNode node2
) {
VariableCaptureOutput::localFlowStep(node1, node2)
}
predicate localFlowStepMapped(js::DataFlow::Node node1, js::DataFlow::Node node2) {
localFlowStep(getClosureNode(node1), getClosureNode(node2)) and
relevantContainer(node1.getContainer())
}
predicate readBB(VariableRead read, BasicBlock bb, int i) { read.hasCfgNode(bb, i) }
predicate writeBB(VariableWrite write, BasicBlock bb, int i) { write.hasCfgNode(bb, i) }
int captureDegree(js::Function fun) {
result = strictcount(CapturedVariable v | captures(fun, v))
}
int maxDegree() { result = max(captureDegree(_)) }
int captureMax(js::Function fun) { result = captureDegree(fun) and result = maxDegree() }
int captureMax(js::Function fun, CapturedVariable v) {
result = captureDegree(fun) and result = maxDegree() and captures(fun, v)
}
}