Files
codeql/javascript/ql/lib/semmle/javascript/GlobalAccessPaths.qll

702 lines
23 KiB
Plaintext

/**
* Provides predicates for associating qualified names with data flow nodes.
*/
overlay[local]
module;
import javascript
private import semmle.javascript.dataflow.InferredTypes
private import semmle.javascript.dataflow.internal.FlowSteps as FlowSteps
private import semmle.javascript.internal.CachedStages
/**
* Provides predicates for associating access paths with data flow nodes.
*
* For example, `AccessPath::getAReferenceTo(x)` can be used to obtain the global access path
* that `x` refers to, as in the following sample:
* ```
* function f() {
* let v = foo.bar; // reference to 'foo.bar'
* v.baz; // reference to 'foo.bar.baz'
* }
*
* (function(ns) {
* ns.x; // reference to 'NS.x'
* })(NS = NS || {});
* ```
*
* A pseudo-property named `[number]` is sometimes used to represent array indices within an access path.
*/
module AccessPath {
/**
* A source node that can be the root of an access path.
*/
class Root extends DataFlow::SourceNode {
Root() {
not this.accessesGlobal(_) and
not this instanceof DataFlow::PropRead and
not this instanceof PropertyProjection and
not this.asExpr() instanceof Closure::RequireCallExpr and
not this = DataFlow::parameterNode(any(ImmediatelyInvokedFunctionExpr iife).getAParameter()) and
not FlowSteps::identityFunctionStep(_, this)
}
/** Holds if this represents the root of the global access path. */
predicate isGlobal() { this = DataFlow::globalAccessPathRootPseudoNode() }
}
/**
* A local variable with exactly one definition, not counting implicit initialization.
*/
private class EffectivelyConstantVariable extends LocalVariable {
EffectivelyConstantVariable() {
strictcount(SsaExplicitDefinition ssa | ssa.getSourceVariable() = this) = 1
}
/** Gets the SSA definition of this variable. */
SsaExplicitDefinition getSsaDefinition() { result.getSourceVariable() = this }
/** Gets the data flow node representing the value of this variable, if one exists. */
DataFlow::Node getValue() { result = this.getSsaDefinition().getRhsNode() }
}
/**
* Appends a single property name onto the access path `base`, where
* the empty string represents the empty access path.
*/
bindingset[base, prop]
private string join(string base, string prop) {
base = "" and result = prop
or
base != "" and
result = base + "." + prop
}
/**
* Holds if `variable` is compared to the `length` property of something, indicating
* that, if used as a dynamic property name, it represents an array index.
*/
private predicate isLikelyArrayIndex(SsaVariable variable) {
exists(RelationalComparison cmp, DataFlow::PropRead length, Expr lengthUse |
length.getPropertyName() = "length" and
length.flowsToExpr(lengthUse) and
cmp.hasOperands(variable.getAUse(), lengthUse)
)
or
isLikelyArrayIndex(variable.getDefinition().(SsaRefinementNode).getAnInput())
}
/**
* Holds if `prop` likely accesses a non-constant array element.
*/
private predicate isLikelyDynamicArrayAccess(DataFlow::PropRead prop) {
// The implicit PropRead in a for-of loop is represented by its lvalue node
prop = DataFlow::lvalueNode(any(ForOfStmt stmt).getLValue())
or
// Match an index access x[i] where `i` is likely an array index variable.
not exists(prop.getPropertyName()) and
exists(SsaVariable indexVar |
isLikelyArrayIndex(indexVar) and
prop.getPropertyNameExpr() = indexVar.getAUse()
)
}
/**
* Gets the access path relative to `root` referred to by `node`.
*
* This holds for direct references as well as for aliases
* established through local data flow.
*
* Examples:
* ```
* function f(x) {
* let a = x.f.g; // access path relative to 'x' is 'f.g'
* let b = a.h; // access path relative to 'x' is 'f.g.h'
* }
* ```
*/
cached
private string fromReference(DataFlow::Node node, Root root) {
root = node and
not root.isGlobal() and
result = ""
or
result = fromReference(node.getImmediatePredecessor(), root)
or
exists(EffectivelyConstantVariable var |
var.isCaptured() and
node.asExpr() = var.getAnAccess() and
result = fromReference(var.getValue(), root)
)
or
node.accessesGlobal(result) and
result != "undefined" and
root.isGlobal()
or
not node.accessesGlobal(_) and
exists(DataFlow::PropRead prop | node = prop |
result = join(fromReference(prop.getBase(), root), prop.getPropertyName())
or
isLikelyDynamicArrayAccess(prop) and
result = join(fromReference(prop.getBase(), root), "[number]")
)
or
exists(Closure::RequireCallExpr req | node = req.flow() |
result = req.getClosureNamespace() and
root.isGlobal()
)
or
exists(PropertyProjection proj | node = proj |
proj.isSingletonProjection() and
result = join(fromReference(proj.getObject(), root), proj.getASelector().getStringValue())
)
or
// Treat 'e || {}' as having the same name as 'e'
exists(LogOrExpr e | node.asExpr() = e |
e.getRightOperand().(ObjectExpr).getNumProperty() = 0 and
result = fromReference(e.getLeftOperand().flow(), root)
)
or
// Treat 'e && e.f' as having the same name as 'e.f'
exists(LogAndExpr e, Expr lhs, PropAccess rhs | node.asExpr() = e |
lhs = e.getLeftOperand() and
rhs = e.getRightOperand() and
(
exists(Variable v |
lhs = v.getAnAccess() and
rhs.getBase() = v.getAnAccess()
)
or
exists(string name |
lhs.(PropAccess).getQualifiedName() = name and
rhs.getBase().(PropAccess).getQualifiedName() = name
)
) and
result = fromReference(rhs.flow(), root)
)
}
/**
* Holds if `rhs` is the right-hand side of a self-assignment.
*
* This usually happens in defensive initialization, for example:
* ```
* foo = foo || {};
* ```
*/
private predicate isSelfAssignment(DataFlow::Node rhs) {
fromRhs(rhs, DataFlow::globalAccessPathRootPseudoNode()) =
fromReference(rhs, DataFlow::globalAccessPathRootPseudoNode())
}
/**
* Holds if there is an assignment to the global `accessPath` in `file`, not counting
* self-assignments.
*/
private predicate isAssignedInFile(string accessPath, File file) {
exists(DataFlow::Node rhs |
fromRhs(rhs, DataFlow::globalAccessPathRootPseudoNode()) = accessPath and
not isSelfAssignment(rhs) and
// Note: Avoid unneeded materialization of DataFlow::Node.getFile()
rhs.getAstNode().getFile() = file
)
}
/**
* Holds if the global `accessPath` is only assigned to from one file, not counting
* self-assignments.
*/
overlay[global]
predicate isAssignedInUniqueFile(string accessPath) {
strictcount(File f | isAssignedInFile(accessPath, f)) = 1
}
/**
* Gets the access path relative to `root`, which `node` is being assigned to, if any.
*
* Only holds for the immediate right-hand side of an assignment or property, not
* for nodes that transitively flow there.
*
* For example, the class nodes below all map to `foo.bar` relative to `x`:
* ```
* function f(x) {
* x.foo.bar = class {};
* x.foo = { bar: class {} };
* let alias = x;
* alias.foo.bar = class {};
* }
* ```
*/
cached
private string fromRhs(DataFlow::Node node, Root root) {
exists(DataFlow::PropWrite write, string baseName |
node = write.getRhs() and
result = join(baseName, write.getPropertyName())
|
baseName = fromReference(write.getBase(), root)
or
baseName = fromRhs(write.getBase(), root)
or
baseName = fromRhs(GetLaterAccess::getLaterBaseAccess(write), root) and
not baseName.matches("%.%")
)
or
exists(GlobalVariable var |
node = var.getAnAssignedExpr().flow() and
result = var.getName() and
root.isGlobal()
)
or
exists(Assignment assign |
fromReference(assign.getLhs().flow(), root) = result and
node = assign.getRhs().flow()
)
or
exists(FunctionDeclStmt fun |
node = DataFlow::valueNode(fun) and
result = fun.getIdentifier().(GlobalVarDecl).getName() and
root.isGlobal()
)
or
exists(ClassDeclStmt cls |
node = DataFlow::valueNode(cls) and
result = cls.getIdentifier().(GlobalVarDecl).getName() and
root.isGlobal()
)
or
exists(EnumDeclaration decl |
node = DataFlow::valueNode(decl) and
result = decl.getIdentifier().(GlobalVarDecl).getName() and
root.isGlobal()
)
or
exists(NamespaceDeclaration decl |
node = DataFlow::valueNode(decl) and
result = decl.getIdentifier().(GlobalVarDecl).getName() and
root.isGlobal()
)
}
/** A module for computing an access to a variable that happens after a property has been written onto it */
private module GetLaterAccess {
/**
* Gets an reference to the SSA variable `variable`.
* Either the definition or a use of the SSA variable
*/
private VarRef getAVariableRef(SsaVariable variable) {
(
result = variable.getAUse()
or
result = variable.getDefinition().(SsaExplicitDefinition).getDef().getTarget()
) and
variable = getARelevantVariableSimple()
}
/**
* Gets an access to a variable that is written to in `write`, where the access is after the write.
*
* This allows `fromRhs` to compute an access path for e.g. the below example:
* ```JavaScript
* function foo(x) {
* var obj = {
* bar: x // `x` has the access path "foo.bar" starting from the root `this`.
* };
* this.foo = obj;
* }
* ```
*/
pragma[noopt]
DataFlow::Node getLaterBaseAccess(DataFlow::PropWrite write) {
exists(
ControlFlowNode writeNode, BindingPattern access, VarRef otherAccess, SsaVariable variable,
StmtContainer container
|
access = getBaseVar(write) and
writeNode = write.getWriteNode() and
access = getAnAccessInContainer(variable, container, true) and
variable = getARelevantVariable() and // manual magic
otherAccess = getAnAccessInContainer(variable, container, false) and
access != otherAccess and
result.asExpr() = otherAccess
|
exists(BasicBlock bb, int i, int j |
bb.getNode(i) = writeNode and
bb.getNode(j) = otherAccess and
i < j
)
or
otherAccess.getBasicBlock() = getASuccessorBBThatReadsVar(write)
)
}
/** Gets a variable ref that `write` writes a property to. */
VarRef getBaseVar(DataFlow::PropWrite write) {
result = write.getBase().asExpr()
or
exists(Assignment assign |
write.getBase().asExpr() = assign.getRhs() and
result = assign.getLhs()
)
or
exists(VariableDeclarator decl |
write.getBase().asExpr() = decl.getInit() and
result = decl.getBindingPattern()
)
}
/** Gets an access to `var` inside `container` where `usedInWrite` indicates whether the access is the base of a property write. */
private VarRef getAnAccessInContainer(
SsaVariable var, StmtContainer container, boolean usedInWrite
) {
result = getAVariableRef(var) and
result.getContainer() = container and
if result = getBaseVar(_) then usedInWrite = true else usedInWrite = false
}
/**
* Gets a variable that is relevant for the computations in the `GetLaterAccess` module.
* This predicate restricts as much as it can, but without depending on `getAVariableRef`.
*/
overlay[caller]
pragma[inline]
private SsaVariable getARelevantVariableSimple() {
// The variable might be used where `getLaterBaseAccess()` is called.
exists(DataFlow::Node node |
exists(fromRhs(node, _)) and
node.asExpr() = result.getAUse()
)
}
/**
* Gets a variable that is relevant for the computations in the `GetLaterAccess` module.
* This predicate depends on `getAVariableRef`, which in turn depends on `getARelevantVariableSimple`.
*/
private SsaVariable getARelevantVariable() {
// There is a write that writes to the variable.
getBaseVar(_) = getAVariableRef(result) and
// There is both a "write" and "read" in the same container of the variable.
exists(StmtContainer container |
exists(getAnAccessInContainer(result, container, true)) and // a "write", an access to the variable that is the base of a property reference.
exists(getAnAccessInContainer(result, container, false)) // a "read", an access to the variable that is not the base of a property reference.
)
}
/** Gets a basic-block that has a read of the variable that is written to by `write`, where the basicblock occurs after `start`. */
private ReachableBasicBlock getASuccessorBBThatReadsVar(DataFlow::PropWrite write) {
exists(VarRef baseExpr, SsaVariable var, ControlFlowNode writeNode |
baseExpr = getBaseVar(write) and
getAVariableRef(var) = baseExpr and
var = getARelevantVariable() and
writeNode = write.getWriteNode() and
result.getImmediateDominator() = writeNode.getBasicBlock()
)
or
result.getImmediateDominator() = getASuccessorBBThatReadsVar(write)
}
}
/**
* Gets a node that refers to the given access path relative to the given `root` node,
* or `root` itself if the access path is empty.
*
* This works for direct references as well as for aliases established through local data flow.
*
* For example:
* ```
* function f(x) {
* let a = x.f.g; // reference to (x, "f.g")
* let b = a.h; // reference to (x, "f.g.h")
* }
* ```
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAReferenceTo(Root root, string path) {
path = fromReference(result, root) and
not root.isGlobal()
}
/**
* Gets a node that refers to the given global access path.
*
* This works for direct references as well as for aliases established through local data flow.
*
* Examples:
* ```
* function f() {
* let v = foo.bar; // reference to 'foo.bar'
* v.baz; // reference to 'foo.bar.baz'
* }
*
* (function(ns) {
* ns.x; // reference to 'NS.x'
* })(NS = NS || {});
* ```
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAReferenceTo(string path) {
path = fromReference(result, DataFlow::globalAccessPathRootPseudoNode())
}
/**
* Gets a node that is assigned to the given access path relative to the given `root` node.
*
* Only gets the immediate right-hand side of an assignment or property, not
* nodes that transitively flow there.
*
* For example, the class nodes below are all assignments to `(x, "foo.bar")`.
* ```
* function f(x) {
* x.foo.bar = class {};
* x.foo = { bar: class {} };
* let alias = x;
* alias.foo.bar = class {};
* }
* ```
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAnAssignmentTo(Root root, string path) {
path = fromRhs(result, root) and
not root.isGlobal()
}
/**
* Gets a node that is assigned to the given global access path.
*
* Only gets the immediate right-hand side of an assignment or property or a global declaration,
* not nodes that transitively flow there.
*
* For example, the class nodes below are all assignments to `foo.bar`:
* ```
* foo.bar = class {};
* foo = { bar: class {} };
* (function(f) {
* f.bar = class {}
* })(foo = foo || {});
* ```
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAnAssignmentTo(string path) {
path = fromRhs(result, DataFlow::globalAccessPathRootPseudoNode())
}
/**
* Gets a node that refers to or is assigned to the given global access path.
*
* See `getAReferenceTo` and `getAnAssignmentTo` for more details.
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAReferenceOrAssignmentTo(string path) {
result = getAReferenceTo(path)
or
result = getAnAssignmentTo(path)
}
/**
* Gets a node that refers to or is assigned to the given access path.
*
* See `getAReferenceTo` and `getAnAssignmentTo` for more details.
*/
overlay[caller]
pragma[inline]
DataFlow::Node getAReferenceOrAssignmentTo(Root root, string path) {
result = getAReferenceTo(root, path)
or
result = getAnAssignmentTo(root, path)
}
/**
* Holds if there is a step from `pred` to `succ` through an assignment to an access path.
*/
overlay[caller?]
pragma[inline]
predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(string name, Root root |
pred = getAnAssignmentTo(root, name) and
succ = getAReferenceTo(root, name)
)
or
exists(string name |
pred = getAnAssignmentTo(name) and
succ = getAReferenceTo(name) and
isAssignedInUniqueFile(name)
)
}
/**
* Gets a `SourceNode` that refers to the same value or access path as the given node.
*/
overlay[caller]
pragma[inline]
DataFlow::SourceNode getAnAliasedSourceNode(DataFlow::Node node) {
exists(DataFlow::SourceNode root, string accessPath |
node = pragma[only_bind_into](AccessPath::getAReferenceTo(root, accessPath)) and
result = AccessPath::getAReferenceTo(root, accessPath)
)
or
// step over extend calls. Handle aliasing both ways through the extend call.
exists(
DataFlow::SourceNode rootOne, DataFlow::SourceNode rootTwo, string accessPath,
ExtendCall extendCall
|
rootOne = [extendCall, extendCall.getAnOperand().getALocalSource()] and
rootTwo = [extendCall, extendCall.getAnOperand().getALocalSource()] and
node = pragma[only_bind_into](AccessPath::getAReferenceTo(rootOne, accessPath)) and
result = AccessPath::getAReferenceTo(rootTwo, accessPath)
)
or
result = node.getALocalSource()
}
/**
* A module for reasoning dominating reads and writes to access-paths.
*/
module DominatingPaths {
/**
* A classification of access paths into reads and writes.
*/
private newtype AccessPathKind =
AccessPathRead() or
AccessPathWrite()
/**
* Gets the `ranking`th access-path with `root` and `path` within `bb`.
* And the access-path has type `type`.
*
* Only has a result if there exists both a read and write of the access-path within `bb`.
*/
pragma[nomagic]
private ControlFlowNode rankedAccessPath(
ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
) {
result =
rank[ranking](ControlFlowNode ref |
ref = getAccessTo(root, path, _) and
ref.getBasicBlock() = bb and
// Prunes the accesses where there does not exists a read and write within the same basicblock.
// This could be more precise, but doing it like this avoids massive joins.
hasRead(bb) and
hasWrite(bb)
|
ref order by any(int i | ref = bb.getNode(i))
) and
result = getAccessTo(root, path, type)
}
/**
* Holds if there exists an access-path read inside the basic-block `bb`.
*
* INTERNAL: This predicate is only meant to be used inside `rankedAccessPath`.
*/
pragma[noinline]
private predicate hasRead(ReachableBasicBlock bb) {
bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
}
/**
* Holds if there exists an access-path write inside the basic-block `bb`.
*
* INTERNAL: This predicate is only meant to be used inside `rankedAccessPath`.
*/
pragma[noinline]
private predicate hasWrite(ReachableBasicBlock bb) {
bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
}
/**
* Gets a `ControlFlowNode` for an access to `path` from `root` with type `type`.
*
* This predicate uses both the AccessPath API, and the SourceNode API.
* This ensures that we have basic support for access-paths with ambiguous roots.
*
* There is only a result if both a read and a write of the access-path exists.
*/
pragma[nomagic]
private ControlFlowNode getAccessTo(Root root, string path, AccessPathKind type) {
exists(getAReadNode(root, path)) and
exists(getAWriteNode(root, path)) and
(
type = AccessPathRead() and
result = getAReadNode(root, path)
or
type = AccessPathWrite() and
result = getAWriteNode(root, path)
)
}
/**
* Gets a `ControlFlowNode` for a read to `path` from `root`.
*
* Only used within `getAccessTo`.
*/
private ControlFlowNode getAReadNode(Root root, string path) {
exists(DataFlow::PropRead read | read.asExpr() = result |
path = fromReference(read, root) or
read = root.getAPropertyRead(path)
)
}
/**
* Gets a `ControlFlowNode` for a write to `path` from `root`.
*
* Only used within `getAccessTo`.
*/
private ControlFlowNode getAWriteNode(Root root, string path) {
result = root.getAPropertyWrite(path).getWriteNode()
or
exists(DataFlow::PropWrite write | path = fromRhs(write.getRhs(), root) |
result = write.getWriteNode()
)
}
/**
* Gets a basic-block where the access path defined by `root` and `path` is written to.
* And a read to the same access path exists.
*/
private ReachableBasicBlock getAWriteBlock(Root root, string path) {
result = getAccessTo(root, path, AccessPathWrite()).getBasicBlock() and
exists(getAccessTo(root, path, AccessPathRead())) // helps performance
}
/**
* EXPERIMENTAL. This API may change in the future.
*
* Holds for `read` if there exists a previous write to the same access-path that dominates this read.
*/
cached
predicate hasDominatingWrite(DataFlow::PropRead read) {
Stages::DataFlowStage::ref() and
// within the same basic block.
exists(ReachableBasicBlock bb, Root root, string path, int ranking |
read.asExpr() = rankedAccessPath(bb, root, path, ranking, AccessPathRead()) and
exists(rankedAccessPath(bb, root, path, any(int prev | prev < ranking), AccessPathWrite()))
)
or
// across basic blocks.
exists(Root root, string path |
read.asExpr() = getAccessTo(root, path, AccessPathRead()) and
getAWriteBlock(root, path).strictlyDominates(read.getBasicBlock())
)
or
// Dynamic write where the same variable is used to index the read and write (in the same basic block)
// For example, this is true for `dst[x]` on line 2 below:
// ```js
// dst[x] = {};
// dst[x][y] = src[y];
// ```
exists(DataFlow::PropWrite write, BasicBlock bb, int i, int j, SsaVariable ssaVar |
write = read.getBase().getALocalSource().getAPropertyWrite() and
bb.getNode(i) = write.getWriteNode() and
bb.getNode(j) = read.asExpr() and
i < j and
write.getPropertyNameExpr() = ssaVar.getAUse() and
read.getPropertyNameExpr() = ssaVar.getAUse()
)
}
}
}