combine stages by introducing extended stages

This commit is contained in:
Erik Krogh Kristensen
2021-03-03 18:32:29 +01:00
parent aeb13146d2
commit 25ef3edb20
19 changed files with 375 additions and 12 deletions

View File

@@ -25,6 +25,7 @@ import semmle.javascript.Errors
import semmle.javascript.ES2015Modules
import semmle.javascript.Expr
import semmle.javascript.Extend
import semmle.javascript.ExtendedStaging
import semmle.javascript.Externs
import semmle.javascript.Files
import semmle.javascript.Functions

View File

@@ -298,7 +298,10 @@ private class AmdDependencyImport extends Import {
*/
class AmdModule extends Module {
cached
AmdModule() { exists(unique(AmdModuleDefinition def | amdModuleTopLevel(def, this))) }
AmdModule() {
ExtendedStaging::DataFlowStage::ref() and
exists(unique(AmdModuleDefinition def | amdModuleTopLevel(def, this)))
}
/** Gets the definition of this module. */
AmdModuleDefinition getDefine() { amdModuleTopLevel(result, this) }

View File

@@ -75,7 +75,7 @@ class ASTNode extends @ast_node, NodeInStmtContainer {
/** Gets the toplevel syntactic unit to which this element belongs. */
cached
TopLevel getTopLevel() { result = getParent().getTopLevel() }
TopLevel getTopLevel() { ExtendedStaging::Ast::ref() and result = getParent().getTopLevel() }
/**
* Gets the `i`th child node of this node.
@@ -119,7 +119,7 @@ class ASTNode extends @ast_node, NodeInStmtContainer {
/** Gets the parent node of this node, if any. */
cached
ASTNode getParent() { this = result.getAChild() }
ASTNode getParent() { ExtendedStaging::Ast::ref() and this = result.getAChild() }
/** Gets the first control flow node belonging to this syntactic entity. */
ControlFlowNode getFirstControlFlowNode() { result = this }
@@ -135,6 +135,7 @@ class ASTNode extends @ast_node, NodeInStmtContainer {
*/
cached
private predicate isAmbientInternal() {
ExtendedStaging::Ast::ref() and
getParent().isAmbientInternal()
or
not isAmbientTopLevel(getTopLevel()) and
@@ -186,7 +187,9 @@ class ASTNode extends @ast_node, NodeInStmtContainer {
* Holds if the given file is a `.d.ts` file.
*/
cached
private predicate isAmbientTopLevel(TopLevel tl) { tl.getFile().getBaseName().matches("%.d.ts") }
private predicate isAmbientTopLevel(TopLevel tl) {
ExtendedStaging::Ast::ref() and tl.getFile().getBaseName().matches("%.d.ts")
}
/**
* A toplevel syntactic unit; that is, a stand-alone script, an inline script

View File

@@ -60,7 +60,9 @@ private module Internal {
cached
predicate useAt(BasicBlock bb, int i, Variable v, VarUse u) {
v = u.getVariable() and bbIndex(bb, u, i)
ExtendedStaging::BasicBlocks::ref() and
v = u.getVariable() and
bbIndex(bb, u, i)
}
cached

View File

@@ -109,7 +109,7 @@ class Expr extends @expr, ExprOrStmt, ExprOrType, AST::ValueNode {
/** Gets the constant string value this expression evaluates to, if any. */
cached
string getStringValue() { result = getStringValue(this) }
string getStringValue() { ExtendedStaging::Ast::ref() and result = getStringValue(this) }
/** Holds if this expression is impure, that is, its evaluation could have side effects. */
predicate isImpure() { any() }
@@ -257,6 +257,7 @@ class Expr extends @expr, ExprOrStmt, ExprOrType, AST::ValueNode {
cached
private DataFlow::Node getCatchParameterFromStmt(Stmt stmt) {
ExtendedStaging::DataFlowStage::ref() and
result =
DataFlow::parameterNode(stmt.getEnclosingTryCatchStmt().getACatchClause().getAParameter())
}
@@ -806,7 +807,9 @@ class FunctionExpr extends @function_expr, Expr, Function {
/** Gets the statement in which this function expression appears. */
override Stmt getEnclosingStmt() { result = Expr.super.getEnclosingStmt() }
override StmtContainer getEnclosingContainer() { result = Expr.super.getContainer() }
override StmtContainer getEnclosingContainer() {
ExtendedStaging::Ast::ref() and result = Expr.super.getContainer()
}
override predicate isImpure() { none() }

View File

@@ -0,0 +1,333 @@
/**
* INTERNAL: Do not use.
*
* The purpose of this file is to reduce the number of stages computed by the runtime,
* thereby speeding up the evaluation without affecting any results.
*
* Computing less stages can improve performance as each stages is less likely to recompute non-cached predicates.
*
* A number of stages are grouped into an extended stage.
* An extended stage contains a number of substages - corrosponding to to how the stages would be grouped if this file didn't exist.
* Each extended stage is identified by a `cached module` in the `ExtendedStaging` module.
*
* The number of stages are reduced by using how the compiler groups predicates into stages.
* The compiler will group mutually recursive cached predicates, or cached predicates within the same `cached module`, into the same stage.
* This file uses the latter by creating a `cached module` with two predicates for each extended stage.
* The first predicate is referenced from all the `cached` predicates we want in the same extended stage,
* and the second predicate has references to all the `cached` predicates we want in the same extended stage.
*
* With these two predicates in a `cached module` we ensure that all substages will be in a single stage at runtime.
*
* Grouping stages into extended stages can cause unnecessary computation, as a concrete query might not depend on
* all the substages in an extended stage.
* Care should therefore be taken not to group stages into an extended stage, if it is likely that a query only depend
* on some but not all the stages in the extended stage.
*/
import javascript
private import internal.StmtContainers
private import semmle.javascript.dataflow.internal.PreCallGraphStep
private import semmle.javascript.dataflow.internal.FlowSteps
/**
* Contains a `cached module` for each extended stage.
* Each `cached module` ensures that predicates that are supposed to be in the same stage, are in the same stage.
*
* Each `cached module` contain two predicates:
* The first, `ref`, always holds, and is referenced from `cached` predicates in each of the substages.
* The second, `backref`, contains references to `cached` predicate from each substage.
* The `backref` predicate starts with `1 = 1 or` to ensure that the predicate will be optimized down to a constant by the optimizer.
*/
module ExtendedStaging {
/**
* The `ast` extended stage.
* Consists of 7 substages (as of writing this).
*
* substage 1:
* AST::ASTNode::getParent
* substage 2:
* JSDoc::Documentable::getDocumentation
* substage 3:
* StmtContainers::getStmtContainer
* substage 4:
* AST::StmtContainer::getEnclosingContainer
* substage 5:
* AST::ASTNode::getTopLevel
* substage 6:
* AST::isAmbientTopLevel
* substage 7:
* Expr::Expr::getStringValue // maybe doesn't belong here?
* substage 8:
* AST::ASTNode::isAmbientInternal
*/
cached
module Ast {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the Ast stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
exists(any(ASTNode a).getTopLevel())
or
exists(any(ASTNode a).getParent())
or
exists(any(StmtContainer c).getEnclosingContainer())
or
exists(any(Documentable d).getDocumentation())
or
exists(any(NodeInStmtContainer n).getContainer())
or
exists(any(Expr e).getStringValue())
or
any(ASTNode node).isAmbient()
}
}
/**
* The `basicblocks` extended stage.
* Consists of 2 substages (as of writing this).
*
* substage 1:
* BasicBlocks::Internal::bbLength#ff
* BasicBlocks::Internal::useAt#ffff
* BasicBlocks::Internal::defAt#ffff
* BasicBlocks::Internal::reachableBB#f
* BasicBlocks::Internal::bbIndex#fff
* substage 2:
* BasicBlocks::bbIDominates#ff
*/
cached
module BasicBlocks {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the BasicBlocks stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
any(ReachableBasicBlock bb).dominates(_)
or
exists(any(BasicBlock bb).getNode(_))
}
}
/**
* The `dataflow` extended stage.
* Consists of 6 substages (as of writing this).
*
* substage 1:
* SSA::Internal
* substage 2:
* All the constructors in DataFlowNode.qll
* substage 3:
* AMD::AmdModule
* substage 4:
* DataFlow::DataFlow::localFlowStep
* substage 5:
* Sources::Cached::isSyntacticMethodCall
* NodeJS::isRequire
* Sources::Cached::dynamicPropRef
* Sources::Cached::hasLocalSource
* Sources::Cached::invocation
* Sources::Cached::namedPropRef
* Sources::SourceNode::Range
* substage 6:
* Expr::getCatchParameterFromStmt // maybe doesn't belong here?
*/
cached
module DataFlowStage {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the DataFlow stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
exists(AmdModule a)
or
DataFlow::localFlowStep(_, _)
or
exists(any(DataFlow::SourceNode s).getAPropertyReference("foo"))
or
exists(any(Expr e).getExceptionTarget())
or
exists(DataFlow::ssaDefinitionNode(_))
}
}
/**
* The `imports` extended stage.
* Consists of 2 substages (as of writing this).
*
* substage 1:
* Modules::Import::getImportedModule
* substage 2:
* Nodes::moduleImport
*
* Implemented as a cached module as there is a negative dependency between the predicates.
*
* It would have been preferable to include these predicates in the dataflow or typetracking stage.
* But that trips the BDD limit.
*/
cached
module Imports {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the Imports stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backrefs() {
1 = 1
or
exists(any(Import i).getImportedModule())
or
exists(DataFlow::moduleImport(_))
}
}
/**
* The `typetracking` extended stage.
* Consists of 2 substages (as of writing this).
*
* substage 1:
* PreCallGraphStep::PreCallGraphStep::loadStep
* substage 2:
* PreCallGraphStep::PreCallGraphStep::loadStoreStep
* PreCallGraphStep::PreCallGraphStep::storeStep
* PreCallGraphStep::PreCallGraphStep::step
* FlowSteps::CachedSteps
* CallGraphs::CallGraph
* Nodes::ClassNode::getAClassReference
* JSDoc::JSDocNamedTypeExpr::resolvedName
* TypeTracking::TypeTracker::append
* StepSummary::StepSummary::step
* Modules::Module::getAnExportedValue
* DataFlow::DataFlow::Node::getImmediatePredecessor
* VariableTypeInference::clobberedProp
* TypeInference::AnalyzedNode::getAValue
* GlobalAccessPaths::AccessPath::fromReference
* GlobalAccessPaths::AccessPath::fromRhs
*/
cached
module TypeTracking {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the TypeTracking stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
PreCallGraphStep::loadStep(_, _, _)
or
basicLoadStep(_, _, _)
}
}
/**
* The `flowsteps` extended stage.
* Consists of 2 substages (as of writing this).
*
* substage 1:
* Configuration::AdditionalFlowStep::loadStoreStep
* Configuration::AdditionalFlowStep::step
* Configuration::AdditionalFlowStep::storeStep
* Configuration::AdditionalFlowStep::loadStep
* substage 2:
* GlobalAccessPaths::AccessPath::DominatingPaths::hasDominatingWrite
*/
cached
module FlowSteps {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the FlowSteps stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
AccessPath::DominatingPaths::hasDominatingWrite(_)
or
any(DataFlow::AdditionalFlowStep s).step(_, _)
}
}
/**
* The `taint` extended stage.
* Consists of 2 substages (as of writing this).
*
* substage 1:
* TaintTracking::TaintTracking::AdditionalTaintStep::step
* substage 2:
* RemoteFlowSources::RemoteFlowSource
*/
cached
module Taint {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the Taint stage.
*/
cached
predicate ref() { 1 = 1 }
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
any(TaintTracking::AdditionalTaintStep step).step(_, _)
or
exists(RemoteFlowSource r)
}
}
}

View File

@@ -429,7 +429,6 @@ module AccessPath {
/**
* A classification of acccess paths into reads and writes.
*/
cached
private newtype AccessPathKind =
AccessPathRead() or
AccessPathWrite()
@@ -440,6 +439,7 @@ module AccessPath {
*
* Only has a result if there exists both a read and write of the access-path within `bb`.
*/
pragma[nomagic]
private ControlFlowNode rankedAccessPath(
ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
) {
@@ -539,6 +539,7 @@ module AccessPath {
*/
cached
predicate hasDominatingWrite(DataFlow::PropRead read) {
ExtendedStaging::FlowSteps::ref() and
// within the same basic block.
exists(ReachableBasicBlock bb, Root root, string path, int ranking |
read.asExpr() = rankedAccessPath(bb, root, path, ranking, AccessPathRead()) and

View File

@@ -57,7 +57,9 @@ class JSDoc extends @jsdoc, Locatable {
abstract class Documentable extends ASTNode {
/** Gets the JSDoc comment for this element, if any. */
cached
JSDoc getDocumentation() { result.getComment().getNextToken() = getFirstToken() }
JSDoc getDocumentation() {
ExtendedStaging::Ast::ref() and result.getComment().getNextToken() = getFirstToken()
}
}
/**

View File

@@ -239,6 +239,7 @@ abstract class Import extends ASTNode {
*/
cached
Module getImportedModule() {
ExtendedStaging::Imports::ref() and
if exists(resolveExternsImport())
then result = resolveExternsImport()
else (

View File

@@ -354,6 +354,7 @@ private module Internal {
*/
cached
SsaDefinition getDefReachingEndOf(ReachableBasicBlock bb, SsaSourceVariable v) {
ExtendedStaging::DataFlowStage::ref() and
exists(int lastRef | lastRef = max(int i | ssaRef(bb, i, v, _)) |
result = getLocalDefinition(bb, lastRef, v)
or

View File

@@ -733,6 +733,7 @@ private class FlowStepThroughImport extends AdditionalFlowStep, DataFlow::ValueN
override ImportSpecifier astNode;
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
ExtendedStaging::FlowSteps::ref() and
pred = this and
succ = DataFlow::ssaDefinitionNode(SSA::definition(astNode))
}

View File

@@ -1493,6 +1493,7 @@ module DataFlow {
*/
cached
predicate localFlowStep(Node pred, Node succ) {
ExtendedStaging::DataFlowStage::ref() and
// flow from RHS into LHS
lvalueFlowStep(pred, succ)
or

View File

@@ -734,7 +734,9 @@ module ModuleImportNode {
* This predicate can be extended by subclassing `ModuleImportNode::Range`.
*/
cached
ModuleImportNode moduleImport(string path) { result.getPath() = path }
ModuleImportNode moduleImport(string path) {
ExtendedStaging::Imports::ref() and result.getPath() = path
}
/**
* Gets a (default) import of the given dependency `dep`, such as

View File

@@ -220,6 +220,7 @@ private module Cached {
*/
cached
predicate namedPropRef(DataFlow::SourceNode base, string prop, DataFlow::PropRef ref) {
ExtendedStaging::DataFlowStage::ref() and
hasLocalSource(ref.getBase(), base) and
ref.getPropertyName() = prop
}

View File

@@ -232,7 +232,9 @@ module TaintTracking {
HeapTaintStep() { heapStep(_, this) }
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
heapStep(pred, succ) and succ = this
ExtendedStaging::Taint::ref() and
heapStep(pred, succ) and
succ = this
}
}

View File

@@ -378,6 +378,7 @@ private module CachedSteps {
*/
cached
predicate basicLoadStep(DataFlow::Node pred, DataFlow::PropRead succ, string prop) {
ExtendedStaging::TypeTracking::ref() and
succ.accesses(pred, prop)
}
@@ -403,6 +404,7 @@ private module CachedSteps {
*/
cached
predicate callback(DataFlow::Node arg, DataFlow::SourceNode cb) {
ExtendedStaging::TypeTracking::ref() and
exists(DataFlow::InvokeNode invk, DataFlow::ParameterNode cbParm, DataFlow::Node cbArg |
arg = invk.getAnArgument() and
cbParm.flowsTo(invk.getCalleeNode()) and

View File

@@ -63,6 +63,7 @@ module PreCallGraphStep {
*/
cached
predicate loadStep(DataFlow::Node pred, DataFlow::Node succ, string prop) {
ExtendedStaging::TypeTracking::ref() and
any(PreCallGraphStep s).loadStep(pred, succ, prop)
}

View File

@@ -9,6 +9,7 @@ private import javascript
cached
private StmtContainer getStmtContainer(NodeInStmtContainer node) {
ExtendedStaging::Ast::ref() and
expr_containers(node, result)
or
stmt_containers(node, result)

View File

@@ -110,7 +110,9 @@ private class ExternalRemoteFlowSourceSpecEntryPoint extends API::EntryPoint {
private class ExternalRemoteFlowSource extends RemoteFlowSource {
RemoteFlowSourceAccessPath ap;
ExternalRemoteFlowSource() { this = ap.resolve().getAnImmediateUse() }
ExternalRemoteFlowSource() {
ExtendedStaging::Taint::ref() and this = ap.resolve().getAnImmediateUse()
}
override string getSourceType() { result = ap.getSourceType() }
}