JS: Add TEarlyStageNode

This commit is contained in:
Asger F
2023-10-03 09:48:09 +02:00
parent 51ef0e5836
commit 79e7aae9f6
7 changed files with 202 additions and 105 deletions

View File

@@ -6,6 +6,7 @@
import javascript
private import semmle.javascript.internal.CachedStages
private import Expressions.ExprHasNoEffect
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* Companion module to the `AmdModuleDefinition` class.
@@ -78,10 +79,15 @@ class AmdModuleDefinition extends CallExpr instanceof AmdModuleDefinition::Range
result instanceof DataFlow::ValueNode
}
private DataFlow::Node getFactoryNodeInternal() {
// To avoid recursion, this should not depend on `SourceNode`.
result = DataFlow::valueNode(this.getLastArgument()) or
result = this.getFactoryNodeInternal().getAPredecessor()
/**
* Gets the factory function of this module definition.
*/
Function getFactoryFunction() { TValueNode(result) = this.getFactoryNodeInternal() }
private EarlyStageNode getFactoryNodeInternal() {
result = TValueNode(this.getLastArgument())
or
DataFlow::localFlowStep(result, this.getFactoryNodeInternal())
}
/** Gets the expression defining this module. */
@@ -132,7 +138,10 @@ class AmdModuleDefinition extends CallExpr instanceof AmdModuleDefinition::Range
* Gets the `i`th parameter of the factory function of this module.
*/
private Parameter getFactoryParameter(int i) {
this.getFactoryNodeInternal().asExpr().(Function).getParameter(i) = result
exists(Function fun |
this.getFactoryNodeInternal() = TValueNode(fun) and
result = fun.getParameter(i)
)
}
/**

View File

@@ -4,6 +4,7 @@ import javascript
private import NodeModuleResolutionImpl
private import semmle.javascript.DynamicPropertyAccess as DynamicPropertyAccess
private import semmle.javascript.internal.CachedStages
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* A Node.js module.
@@ -240,60 +241,69 @@ private class RequireVariable extends Variable {
*/
private predicate moduleInFile(Module m, File f) { m.getFile() = f }
private predicate isModuleModule(DataFlow::Node nd) {
exists(ImportDeclaration imp |
imp.getImportedPath().getValue() = "module" and
nd =
[
DataFlow::destructuredModuleImportNode(imp),
DataFlow::valueNode(imp.getASpecifier().(ImportNamespaceSpecifier))
]
private predicate isModuleModule(EarlyStageNode nd) {
exists(ImportDeclaration imp | imp.getImportedPath().getValue() = "module" |
nd = TDestructuredModuleImportNode(imp)
or
nd = TValueNode(imp.getASpecifier().(ImportNamespaceSpecifier))
)
or
isModuleModule(nd.getAPredecessor())
exists(EarlyStageNode other |
isModuleModule(other) and
DataFlow::localFlowStep(other, nd)
)
}
private predicate isCreateRequire(DataFlow::Node nd) {
private predicate isCreateRequire(EarlyStageNode nd) {
exists(PropAccess prop |
isModuleModule(prop.getBase().flow()) and
isModuleModule(TValueNode(prop.getBase())) and
prop.getPropertyName() = "createRequire" and
nd = prop.flow()
nd = TValueNode(prop)
)
or
exists(PropertyPattern prop |
isModuleModule(prop.getObjectPattern().flow()) and
isModuleModule(TValueNode(prop.getObjectPattern())) and
prop.getName() = "createRequire" and
nd = prop.getValuePattern().flow()
nd = TValueNode(prop.getValuePattern())
)
or
exists(ImportDeclaration decl, NamedImportSpecifier spec |
decl.getImportedPath().getValue() = "module" and
spec = decl.getASpecifier() and
spec.getImportedName() = "createRequire" and
nd = spec.flow()
nd = TValueNode(spec)
)
or
isCreateRequire(nd.getAPredecessor())
exists(EarlyStageNode other |
isCreateRequire(other) and
DataFlow::localFlowStep(other, nd)
)
}
/**
* Holds if `nd` may refer to `require`, either directly or modulo local data flow.
*/
cached
private predicate isRequire(DataFlow::Node nd) {
nd.asExpr() = any(RequireVariable req).getAnAccess() and
// `mjs` files explicitly disallow `require`
not nd.getFile().getExtension() = "mjs"
private predicate isRequire(EarlyStageNode nd) {
exists(VarAccess access |
access = any(RequireVariable v).getAnAccess() and
nd = TValueNode(access) and
// `mjs` files explicitly disallow `require`
not access.getFile().getExtension() = "mjs"
)
or
isRequire(nd.getAPredecessor())
exists(EarlyStageNode other |
isRequire(other) and
DataFlow::localFlowStep(other, nd)
)
or
// `import { createRequire } from 'module';`.
// specialized to ES2015 modules to avoid recursion in the `DataFlow::moduleImport()` predicate and to avoid
// negative recursion between `Import.getImportedModuleNode()` and `Import.getImportedModule()`, and
// to avoid depending on `SourceNode` as this would make `SourceNode::Range` recursive.
exists(CallExpr call |
isCreateRequire(call.getCallee().flow()) and
nd = call.flow()
isCreateRequire(TValueNode(call.getCallee())) and
nd = TValueNode(call)
)
}
@@ -307,7 +317,7 @@ private predicate isRequire(DataFlow::Node nd) {
* ```
*/
class Require extends CallExpr, Import {
Require() { isRequire(this.getCallee().flow()) }
Require() { isRequire(TValueNode(this.getCallee())) }
override PathExpr getImportedPath() { result = this.getArgument(0) }
@@ -401,7 +411,7 @@ private class RequirePath extends PathExprCandidate {
this = any(Require req).getArgument(0)
or
exists(MethodCallExpr reqres |
isRequire(reqres.getReceiver().flow()) and
isRequire(TValueNode(reqres.getReceiver())) and
reqres.getMethodName() = "resolve" and
this = reqres.getArgument(0)
)

View File

@@ -4,6 +4,7 @@
*/
import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
/**
* Internal representation of paths as lists of components.
@@ -381,16 +382,16 @@ private class PathExprString extends PathString {
}
pragma[nomagic]
private DataFlow::Node getAPathExprAlias(PathExpr expr) {
result.getImmediatePredecessor().asExpr() = expr
private EarlyStageNode getAPathExprAlias(PathExpr expr) {
DataFlow::Impl::earlyStageImmediateFlowStep(TValueNode(expr), result)
or
result.getImmediatePredecessor() = getAPathExprAlias(expr)
DataFlow::Impl::earlyStageImmediateFlowStep(getAPathExprAlias(expr), result)
}
private class PathExprFromAlias extends PathExpr {
private PathExpr other;
PathExprFromAlias() { this = getAPathExprAlias(other).asExpr() }
PathExprFromAlias() { TValueNode(this) = getAPathExprAlias(other) }
override string getValue() { result = other.getValue() }
@@ -435,13 +436,15 @@ abstract class PathExprCandidate extends Expr {
pragma[nomagic]
private Expr getAPart1() { result = this or result = this.getAPart().getAChildExpr() }
private EarlyStageNode getAnAliasedPart1() {
result = TValueNode(this.getAPart1())
or
DataFlow::Impl::earlyStageImmediateFlowStep(result, this.getAnAliasedPart1())
}
/**
* Gets an expression that is nested inside this expression.
*
* Equivalent to `getAChildExpr*()`, but useful to enforce a better join order (in spite of
* what the optimizer thinks, there are generally far fewer `PathExprCandidate`s than
* `ConstantString`s).
* Gets an expression that is depended on by an expression nested inside this expression.
*/
pragma[nomagic]
Expr getAPart() { result = this.getAPart1().flow().getImmediatePredecessor*().asExpr() }
Expr getAPart() { TValueNode(result) = this.getAnAliasedPart1() }
}

View File

@@ -179,29 +179,8 @@ module DataFlow {
*/
cached
DataFlow::Node getImmediatePredecessor() {
lvalueFlowStep(result, this) and
not lvalueDefaultFlowStep(_, this)
or
immediateFlowStep(result, this)
or
// Refinement of variable -> original definition of variable
exists(SsaRefinementNode refinement |
this = TSsaDefNode(refinement) and
result = TSsaDefNode(refinement.getAnInput())
)
or
exists(SsaPhiNode phi |
this = TSsaDefNode(phi) and
result = TSsaDefNode(phi.getRephinedVariable())
)
or
// IIFE call -> return value of IIFE
exists(Function fun |
localCall(this.asExpr(), fun) and
result = unique(Expr ret | ret = fun.getAReturnedExpr()).flow() and
not fun.getExit().isJoin() // can only reach exit by the return statement
)
or
FlowSteps::identityFunctionStep(result, this)
}
@@ -783,14 +762,7 @@ module DataFlow {
override string getPropertyName() { result = prop.getName() }
override Node getRhs() {
exists(Parameter param, Node paramNode |
param = prop.getParameter() and
parameterNode(paramNode, param)
|
result = paramNode
)
}
override Node getRhs() { result = TValueNode(prop.getParameter()) }
override ControlFlowNode getWriteNode() { result = prop.getParameter() }
}
@@ -1107,6 +1079,14 @@ module DataFlow {
* instead.
*/
module Impl {
/**
* INTERNAL. DO NOT USE.
*
* An alias for `Node.getImmediatePredecessor` that can be used at an earlier stage
* that does not depend on `DataFlow::Node`.
*/
predicate earlyStageImmediateFlowStep = immediateFlowStep/2;
/**
* A data flow node representing a function invocation, either explicitly or reflectively,
* and either with or without `new`.
@@ -1420,12 +1400,12 @@ module DataFlow {
/**
* INTERNAL: Use `parameterNode(Parameter)` instead.
*/
predicate parameterNode(DataFlow::Node nd, Parameter p) { nd = valueNode(p) }
predicate parameterNode(EarlyStageNode nd, Parameter p) { nd = TValueNode(p) }
/**
* INTERNAL: Use `thisNode(StmtContainer container)` instead.
*/
predicate thisNode(DataFlow::Node node, StmtContainer container) { node = TThisNode(container) }
predicate thisNode(EarlyStageNode node, StmtContainer container) { node = TThisNode(container) }
/**
* Gets the node representing the receiver of the given function, or `this` in the given top-level.
@@ -1487,7 +1467,15 @@ module DataFlow {
* _before_ the l-value is assigned to, whereas `DataFlow::lvalueNode()`
* represents the value _after_ the assignment.
*/
Node lvalueNode(BindingPattern lvalue) {
Node lvalueNode(BindingPattern lvalue) { result = lvalueNodeInternal(lvalue) }
/**
* INTERNAL: Do not use outside standard library.
*
* Same as `lvalueNode()` except the return type is `EarlyStageNode`, which allows it to be used
* before all data flow nodes have been materialised.
*/
EarlyStageNode lvalueNodeInternal(BindingPattern lvalue) {
exists(SsaExplicitDefinition ssa |
ssa.defines(lvalue.(LValue).getDefNode(), lvalue.(VarRef).getVariable()) and
result = TSsaDefNode(ssa)
@@ -1535,31 +1523,31 @@ module DataFlow {
* Holds if there is a step from `pred -> succ` due to an assignment
* to an expression in l-value position.
*/
private predicate lvalueFlowStep(Node pred, Node succ) {
private predicate lvalueFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
exists(VarDef def |
pred = valueNode(defSourceNode(def)) and
succ = lvalueNode(def.getTarget())
pred = TValueNode(defSourceNode(def)) and
succ = lvalueNodeInternal(def.getTarget())
)
or
exists(SimpleParameter param |
pred = valueNode(param) and // The value node represents the incoming argument
succ = lvalueNode(param) // The SSA node represents the parameters's local variable
pred = TValueNode(param) and // The value node represents the incoming argument
succ = lvalueNodeInternal(param) // The SSA node represents the parameters's local variable
)
or
exists(Expr arg, Parameter param |
localArgumentPassing(arg, param) and
pred = valueNode(arg) and
succ = valueNode(param)
pred = TValueNode(arg) and
succ = TValueNode(param)
)
or
exists(PropertyPattern pattern |
pred = TPropNode(pattern) and
succ = lvalueNode(pattern.getValuePattern())
succ = lvalueNodeInternal(pattern.getValuePattern())
)
or
exists(Expr element |
pred = TElementPatternNode(_, element) and
succ = lvalueNode(element)
succ = lvalueNodeInternal(element)
)
}
@@ -1567,37 +1555,37 @@ module DataFlow {
* Holds if there is a step from `pred -> succ` from the default
* value of a destructuring pattern or parameter.
*/
private predicate lvalueDefaultFlowStep(Node pred, Node succ) {
private predicate lvalueDefaultFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
exists(PropertyPattern pattern |
pred = TValueNode(pattern.getDefault()) and
succ = lvalueNode(pattern.getValuePattern())
succ = lvalueNodeInternal(pattern.getValuePattern())
)
or
exists(ArrayPattern array, int i |
pred = TValueNode(array.getDefault(i)) and
succ = lvalueNode(array.getElement(i))
succ = lvalueNodeInternal(array.getElement(i))
)
or
exists(Parameter param |
pred = TValueNode(param.getDefault()) and
parameterNode(succ, param)
succ = TValueNode(param)
)
}
/**
* Flow steps shared between `getImmediatePredecessor` and `localFlowStep`.
* Flow steps shared between `immediateFlowStep` and `localFlowStep`.
*
* Inlining is forced because the two relations are indexed differently.
*/
pragma[inline]
private predicate immediateFlowStep(Node pred, Node succ) {
private predicate immediateFlowStepShared(EarlyStageNode pred, EarlyStageNode succ) {
exists(SsaVariable v |
pred = TSsaDefNode(v.getDefinition()) and
succ = valueNode(v.getAUse())
succ = TValueNode(v.getAUse())
)
or
exists(Expr predExpr, Expr succExpr |
pred = valueNode(predExpr) and succ = valueNode(succExpr)
pred = TValueNode(predExpr) and succ = TValueNode(succExpr)
|
predExpr = succExpr.(ParExpr).getExpression()
or
@@ -1627,25 +1615,55 @@ module DataFlow {
// flow from 'this' parameter into 'this' expressions
exists(ThisExpr thiz |
pred = TThisNode(thiz.getBindingContainer()) and
succ = valueNode(thiz)
succ = TValueNode(thiz)
)
or
// `f.call(...)` and `f.apply(...)` evaluate to the result of the reflective call they perform
pred = TReflectiveCallNode(succ.asExpr(), _)
exists(MethodCallExpr call |
pred = TReflectiveCallNode(call, _) and
succ = TValueNode(call)
)
}
pragma[nomagic]
private predicate immediateFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
lvalueFlowStep(pred, succ) and
not lvalueDefaultFlowStep(_, succ)
or
immediateFlowStepShared(pred, succ)
or
// Refinement of variable -> original definition of variable
exists(SsaRefinementNode refinement |
succ = TSsaDefNode(refinement) and
pred = TSsaDefNode(refinement.getAnInput())
)
or
exists(SsaPhiNode phi |
succ = TSsaDefNode(phi) and
pred = TSsaDefNode(phi.getRephinedVariable())
)
or
// IIFE call -> return value of IIFE
exists(Function fun, Expr expr |
succ = TValueNode(expr) and
localCall(expr, fun) and
pred = TValueNode(unique(Expr ret | ret = fun.getAReturnedExpr())) and
not fun.getExit().isJoin() // can only reach exit by the return statement
)
}
/**
* Holds if data can flow from `pred` to `succ` in one local step.
*/
cached
predicate localFlowStep(Node pred, Node succ) {
Stages::DataFlowStage::ref() and
predicate localFlowStep(EarlyStageNode pred, EarlyStageNode succ) {
Stages::EarlyDataFlowStage::ref() and
// flow from RHS into LHS
lvalueFlowStep(pred, succ)
or
lvalueDefaultFlowStep(pred, succ)
or
immediateFlowStep(pred, succ)
immediateFlowStepShared(pred, succ)
or
// From an assignment or implicit initialization of a captured variable to its flow-insensitive node.
exists(SsaDefinition predDef |
@@ -1669,7 +1687,7 @@ module DataFlow {
)
or
exists(Expr predExpr, Expr succExpr |
pred = valueNode(predExpr) and succ = valueNode(succExpr)
pred = TValueNode(predExpr) and succ = TValueNode(succExpr)
|
predExpr = succExpr.(LogicalBinaryExpr).getAnOperand()
or
@@ -1683,13 +1701,19 @@ module DataFlow {
or
// from returned expr to the FunctionReturnNode.
exists(Function f | not f.isAsyncOrGenerator() |
DataFlow::functionReturnNode(succ, f) and pred = valueNode(f.getAReturnedExpr())
succ = TFunctionReturnNode(f) and pred = TValueNode(f.getAReturnedExpr())
)
or
// from a reflective params node to a reference to the arguments object.
exists(DataFlow::ReflectiveParametersNode params, Function f | f = params.getFunction() |
succ = f.getArgumentsVariable().getAnAccess().flow() and
pred = params
exists(Function f |
pred = TReflectiveParametersNode(f) and
succ = TValueNode(f.getArgumentsVariable().getAnAccess())
)
or
// Pass 'this' into super calls
exists(SuperCall call |
pred = TThisNode(call.getBinder()) and
succ = TConstructorThisArgumentNode(call)
)
}

View File

@@ -38,6 +38,33 @@ private module Cached {
import Cached
private class TEarlyStageNode =
TValueNode or TSsaDefNode or TCapturedVariableNode or TPropNode or TRestPatternNode or
TElementPatternNode or TElementNode or TReflectiveCallNode or TThisNode or
TFunctionSelfReferenceNode or TDestructuredModuleImportNode or THtmlAttributeNode or
TFunctionReturnNode or TExceptionalFunctionReturnNode or TExceptionalInvocationReturnNode or
TGlobalAccessPathRoot or TTemplatePlaceholderTag or TReflectiveParametersNode or
TExprPostUpdateNode or TConstructorThisArgumentNode;
/**
* The raw data type underlying `DataFlow::Node`.
* A data-flow node that is not a flow summary node.
*
* This node exists to avoid an unwanted dependency on flow summaries in some parts of the codebase
* that should not depend on them.
*
* In particular, this dependency chain must not result in negative recursion:
* - Flow summaries can only be created after pruning irrelevant flow summaries
* - To prune irrelevant flow summaries, we must know which packages are imported
* - To know which packages are imported, module systems must be evaluated
* - The AMD and NodeJS module systems rely on data flow to find calls to `require` and similar.
* These module systems must therefore use `EarlyStageNode` instead of `DataFlow::Node`.
*/
class EarlyStageNode extends TEarlyStageNode {
string toString() { result = this.(DataFlow::Node).toString() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.(DataFlow::Node).hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}

View File

@@ -48,11 +48,13 @@ predicate parseTypeString(string rawType, string package, string qualifiedName)
* Holds if models describing `package` may be relevant for the analysis of this database.
*/
predicate isPackageUsed(string package) {
exists(DataFlow::moduleImport(package))
or
package = "global"
or
any(DataFlow::SourceNode sn).hasUnderlyingType(package, _)
package = any(JS::Import imp).getImportedPath().getValue()
or
any(JS::TypeName t).hasQualifiedName(package, _)
or
any(JS::TypeAnnotation t).hasQualifiedName(package, _)
}
bindingset[type]

View File

@@ -106,6 +106,30 @@ module Stages {
}
}
/**
* The part of data flow computed before flow summary nodes.
*/
cached
module EarlyDataFlowStage {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the early DataFlow stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
DataFlow::localFlowStep(_, _)
}
}
/**
* The `dataflow` stage.
*/
@@ -128,8 +152,6 @@ module Stages {
or
exists(AmdModule a)
or
DataFlow::localFlowStep(_, _)
or
exists(any(DataFlow::SourceNode s).getAPropertyReference("foo"))
or
exists(any(Expr e).getExceptionTarget())