mirror of
https://github.com/github/codeql.git
synced 2026-05-25 00:27:09 +02:00
PS: Add API graph files.
This commit is contained in:
681
powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll
Normal file
681
powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll
Normal file
@@ -0,0 +1,681 @@
|
||||
/**
|
||||
* Provides an implementation of _API graphs_, which allow efficient modelling of how a given
|
||||
* value is used by the code base or how values produced by the code base are consumed by a library.
|
||||
*
|
||||
* See `API::Node` for more details.
|
||||
*/
|
||||
|
||||
private import powershell
|
||||
private import semmle.code.powershell.dataflow.DataFlow
|
||||
private import semmle.code.powershell.typetracking.ApiGraphShared
|
||||
private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl
|
||||
private import semmle.code.powershell.controlflow.Cfg
|
||||
private import semmle.code.powershell.dataflow.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch
|
||||
|
||||
/**
|
||||
* Provides classes and predicates for working with APIs used in a database.
|
||||
*/
|
||||
module API {
|
||||
/**
|
||||
* A node in the API graph, that is, a value that can be tracked interprocedurally.
|
||||
*
|
||||
* The API graph is a graph for tracking values of certain types in a way that accounts for inheritance
|
||||
* and interprocedural data flow.
|
||||
*
|
||||
* API graphs are typically used to identify "API calls", that is, calls to an external function
|
||||
* whose implementation is not necessarily part of the current codebase.
|
||||
*
|
||||
* ### Basic usage
|
||||
*
|
||||
* The most basic use of API graphs is typically as follows:
|
||||
* 1. Start with `API::getTopLevelMember` for the relevant library.
|
||||
* 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function.
|
||||
* 3. Map the resulting API graph nodes to data-flow nodes, using `asSource`, `asSink`, or `asCall`.
|
||||
*
|
||||
* ### Data flow
|
||||
*
|
||||
* The members predicates on this class generally take inheritance and data flow into account.
|
||||
*
|
||||
* ### Backward data flow
|
||||
*
|
||||
* When inspecting the arguments of a call, the data flow direction is backwards.
|
||||
*
|
||||
* ### Inheritance
|
||||
*
|
||||
* When a class or module object is tracked, inheritance is taken into account.
|
||||
*
|
||||
* ### Backward data flow and classes
|
||||
*
|
||||
* When inspecting the arguments of a call, and the value flowing into that argument is a user-defined class (or an instance thereof),
|
||||
* uses of `getMethod` will find method definitions in that class (including inherited ones) rather than finding method calls.
|
||||
*
|
||||
* When modeling an external library that is known to call a specific method on a parameter, this makes
|
||||
* it possible to find the corresponding method definition in user code.
|
||||
*
|
||||
* ### Strict left-to-right evaluation
|
||||
*
|
||||
* Most member predicates on this class are intended to be chained, and are always evaluated from left to right, which means
|
||||
* the caller should restrict the initial set of values.
|
||||
*
|
||||
* For example, in the following snippet, we always find the uses of `Foo` before finding calls to `bar`:
|
||||
* ```ql
|
||||
* API::getTopLevelMember("Foo").getMethod("bar")
|
||||
* ```
|
||||
* In particular, the implementation will never look for calls to `bar` and work backward from there.
|
||||
*
|
||||
* Beware of the footgun that is to use API graphs with an unrestricted receiver:
|
||||
* ```ql
|
||||
* API::Node barCall(API::Node base) {
|
||||
* result = base.getMethod("bar") // Do not do this!
|
||||
* }
|
||||
* ```
|
||||
* The above predicate does not restrict the receiver, and will thus perform an interprocedural data flow
|
||||
* search starting at every node in the graph, which is very expensive.
|
||||
*/
|
||||
class Node extends Impl::TApiNode {
|
||||
/**
|
||||
* Gets a data-flow node where this value may flow interprocedurally.
|
||||
*
|
||||
* This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow.
|
||||
* See `asSource()` for examples.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::Node getAValueReachableFromSource() {
|
||||
result = getAValueReachableFromSourceInline(this)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data-flow node where this value enters the current codebase.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::LocalSourceNode asSource() { result = asSourceInline(this) }
|
||||
|
||||
/** Gets a data-flow node where this value potentially flows into an external library. */
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::Node asSink() { result = asSinkInline(this) }
|
||||
|
||||
/** Gets a callable that can reach this sink. */
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::CallableNode asCallable() { Impl::asCallable(this.getAnEpsilonSuccessor(), result) }
|
||||
|
||||
/**
|
||||
* Get a data-flow node that transitively flows to this value, provided that this value corresponds
|
||||
* to a sink.
|
||||
*
|
||||
* This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow.
|
||||
* See `asSink()` for examples.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::Node getAValueReachingSink() { result = getAValueReachingSinkInline(this) }
|
||||
|
||||
/** Gets the call referred to by this API node. */
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
DataFlow::CallNode asCall() { this = Impl::MkMethodAccessNode(result) }
|
||||
|
||||
pragma[inline]
|
||||
Node getMember(string m) {
|
||||
// This predicate is currently not 'inline_late' because 'm' can be an input or output
|
||||
Impl::memberEdge(this.getAnEpsilonSuccessor(), m, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that may refer to an instance of the module or class represented by this API node.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
Node getInstance() { Impl::instanceEdge(this.getAnEpsilonSuccessor(), result) }
|
||||
|
||||
/**
|
||||
* Gets a call to `method` with this value as the receiver, or the definition of `method` on
|
||||
* an object that can reach this sink.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getMethod(string method) {
|
||||
// TODO: Consider 'getMethodTarget(method)' for looking up method definitions?
|
||||
// This predicate is currently not 'inline_late' because 'method' can be an input or output
|
||||
Impl::methodEdge(this.getAnEpsilonSuccessor(), method, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the result of this call, or the return value of this callable.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
Node getReturn() { Impl::returnEdge(this.getAnEpsilonSuccessor(), result) }
|
||||
|
||||
/**
|
||||
* Gets the result of a call to `method` with this value as the receiver, or the return value of `method` defined on
|
||||
* an object that can reach this sink.
|
||||
*
|
||||
* This is a shorthand for `getMethod(method).getReturn()`.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getReturn(string method) {
|
||||
// This predicate is currently not 'inline_late' because 'method' can be an input or output
|
||||
result = this.getMethod(method).getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `n`th positional argument to this call.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getArgument(int n) {
|
||||
// This predicate is currently not 'inline_late' because 'n' can be an input or output
|
||||
Impl::positionalArgumentEdge(this, n, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the given keyword argument to this call.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getKeywordArgument(string name) {
|
||||
// This predicate is currently not 'inline_late' because 'name' can be an input or output
|
||||
Impl::keywordArgumentEdge(this, name, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `n`th positional parameter of this callable, or the `n`th positional argument to this call.
|
||||
*
|
||||
* Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future.
|
||||
* When referring to an argument, it is recommended to use `getArgument(n)` instead.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getParameter(int n) {
|
||||
// This predicate is currently not 'inline_late' because 'n' can be an input or output
|
||||
Impl::positionalParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), n, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the given keyword parameter of this callable, or keyword argument to this call.
|
||||
*
|
||||
* Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future.
|
||||
* When referring to an argument, it is recommended to use `getKeywordArgument(n)` instead.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getKeywordParameter(string name) {
|
||||
// This predicate is currently not 'inline_late' because 'name' can be an input or output
|
||||
Impl::keywordParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), name, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the argument passed in argument position `pos` at this call.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getArgumentAtPosition(DataFlowDispatch::ArgumentPosition pos) {
|
||||
// This predicate is currently not 'inline_late' because 'pos' can be an input or output
|
||||
Impl::argumentEdge(pragma[only_bind_out](this), pos, result) // note: no need for epsilon step since 'this' must be a call
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the parameter at position `pos` of this callable.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getParameterAtPosition(DataFlowDispatch::ParameterPosition pos) {
|
||||
// This predicate is currently not 'inline_late' because 'pos' can be an input or output
|
||||
Impl::parameterEdge(this.getAnEpsilonSuccessor(), pos, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a representative for the `content` of this value.
|
||||
*
|
||||
* When possible, it is preferrable to use one of the specialized variants of this predicate, such as `getAnElement`.
|
||||
*
|
||||
* Concretely, this gets sources where `content` is read from this value, and as well as sinks where
|
||||
* `content` is stored onto this value or onto an object that can reach this sink.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getContent(DataFlow::Content content) {
|
||||
// This predicate is currently not 'inline_late' because 'content' can be an input or output
|
||||
Impl::contentEdge(this.getAnEpsilonSuccessor(), content, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a representative for the `contents` of this value.
|
||||
*
|
||||
* See `getContent()` for more details.
|
||||
*/
|
||||
bindingset[this, contents]
|
||||
pragma[inline_late]
|
||||
Node getContents(DataFlow::ContentSet contents) {
|
||||
// We always use getAStoreContent when generating content edges, and we always use getAReadContent when querying the graph.
|
||||
result = this.getContent(contents.getAReadContent())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a representative for the instance field of the given `name`.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node getField(string name) {
|
||||
// This predicate is currently not 'inline_late' because 'name' can be an input or output
|
||||
Impl::fieldEdge(this.getAnEpsilonSuccessor(), name, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a representative for an arbitrary element of this collection.
|
||||
*/
|
||||
bindingset[this]
|
||||
pragma[inline_late]
|
||||
Node getAnElement() { Impl::elementEdge(this.getAnEpsilonSuccessor(), result) }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node that gives rise to this node, if any.
|
||||
*/
|
||||
DataFlow::Node getInducingNode() {
|
||||
this = Impl::MkMethodAccessNode(result) or
|
||||
this = Impl::MkBackwardNode(result, _) or
|
||||
this = Impl::MkForwardNode(result, _) or
|
||||
this = Impl::MkSinkNode(result)
|
||||
}
|
||||
|
||||
/** Gets the location of this node. */
|
||||
Location getLocation() {
|
||||
result = this.getInducingNode().getLocation()
|
||||
or
|
||||
this instanceof RootNode and
|
||||
result instanceof EmptyLocation
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a textual representation of this element.
|
||||
*/
|
||||
string toString() { none() }
|
||||
|
||||
pragma[inline]
|
||||
private Node getAnEpsilonSuccessor() { result = getAnEpsilonSuccessorInline(this) }
|
||||
}
|
||||
|
||||
/** The root node of an API graph. */
|
||||
private class RootNode extends Node, Impl::MkRoot {
|
||||
override string toString() { result = "Root()" }
|
||||
}
|
||||
|
||||
/** A node representing a given type-tracking state when tracking forwards. */
|
||||
private class ForwardNode extends Node, Impl::MkForwardNode {
|
||||
private DataFlow::LocalSourceNode node;
|
||||
private TypeTracker tracker;
|
||||
|
||||
ForwardNode() { this = Impl::MkForwardNode(node, tracker) }
|
||||
|
||||
override string toString() {
|
||||
if tracker.start()
|
||||
then result = "ForwardNode(" + node + ")"
|
||||
else result = "ForwardNode(" + node + ", " + tracker + ")"
|
||||
}
|
||||
}
|
||||
|
||||
/** A node representing a given type-tracking state when tracking backwards. */
|
||||
private class BackwardNode extends Node, Impl::MkBackwardNode {
|
||||
private DataFlow::LocalSourceNode node;
|
||||
private TypeTracker tracker;
|
||||
|
||||
BackwardNode() { this = Impl::MkBackwardNode(node, tracker) }
|
||||
|
||||
override string toString() {
|
||||
if tracker.start()
|
||||
then result = "BackwardNode(" + node + ")"
|
||||
else result = "BackwardNode(" + node + ", " + tracker + ")"
|
||||
}
|
||||
}
|
||||
|
||||
/** A node representing a module/class object with epsilon edges to its descendents. */
|
||||
private class ModuleNode extends Node, Impl::MkModule {
|
||||
/** Gets the module represented by this API node. */
|
||||
string getModule() { this = Impl::MkModule(result) }
|
||||
|
||||
override string toString() { result = "Module(" + this.getModule() + ")" }
|
||||
|
||||
TypeNode getType(string name) { result.getType() = this.getModule() + "." + name } // TODO: Check that name exists in module
|
||||
}
|
||||
|
||||
private class TypeNode extends Node, Impl::MkType {
|
||||
/** Gets the type represented by this API node. */
|
||||
string getType() { this = Impl::MkType(result) }
|
||||
|
||||
override string toString() { result = "Type(" + this.getType() + ")" }
|
||||
}
|
||||
|
||||
/** A node representing instances of a module/class with epsilon edges to its ancestors. */
|
||||
private class InstanceUp extends Node, Impl::MkInstanceUp {
|
||||
/** Gets the module whose instances are represented by this API node. */
|
||||
string getType() { this = Impl::MkInstanceUp(result) }
|
||||
|
||||
override string toString() { result = "ModuleInstanceUp(" + this.getType() + ")" }
|
||||
}
|
||||
|
||||
/** A node representing instances of a module/class with epsilon edges to its descendents. */
|
||||
private class InstanceDownNode extends Node, Impl::MkInstanceDown {
|
||||
/** Gets the module whose instances are represented by this API node. */
|
||||
string getType() { this = Impl::MkInstanceDown(result) }
|
||||
|
||||
override string toString() { result = "ModuleInstanceDown(" + this.getType() + ")" }
|
||||
}
|
||||
|
||||
/** A node corresponding to the method being invoked at a method call. */
|
||||
class MethodAccessNode extends Node, Impl::MkMethodAccessNode {
|
||||
override string toString() { result = "MethodAccessNode(" + this.asCall() + ")" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node corresponding to an argument, right-hand side of a store, or return value from a callable.
|
||||
*
|
||||
* Such a node may serve as the starting-point of backtracking, and has epsilon edges going to
|
||||
* the backward nodes corresponding to `getALocalSource`.
|
||||
*/
|
||||
private class SinkNode extends Node, Impl::MkSinkNode {
|
||||
override string toString() { result = "SinkNode(" + this.getInducingNode() + ")" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An API entry point.
|
||||
*
|
||||
* By default, API graph nodes are only created for nodes that come from an external
|
||||
* library or escape into an external library. The points where values are cross the boundary
|
||||
* between codebases are called "entry points".
|
||||
*
|
||||
* Anything in the global scope is considered to be an entry point, but
|
||||
* additional entry points may be added by extending this class.
|
||||
*/
|
||||
abstract class EntryPoint extends string {
|
||||
// Note: this class can be deprecated in Ruby, but is still referenced by shared code in ApiGraphModels.qll,
|
||||
// where it can't be removed since other languages are still dependent on the EntryPoint class.
|
||||
bindingset[this]
|
||||
EntryPoint() { any() }
|
||||
|
||||
/** Gets a data-flow node corresponding to a use-node for this entry point. */
|
||||
DataFlow::LocalSourceNode getASource() { none() }
|
||||
|
||||
/** Gets a data-flow node corresponding to a def-node for this entry point. */
|
||||
DataFlow::Node getASink() { none() }
|
||||
|
||||
/** Gets a call corresponding to a method access node for this entry point. */
|
||||
DataFlow::CallNode getACall() { none() }
|
||||
|
||||
/** Gets an API-node for this entry point. */
|
||||
API::Node getANode() { Impl::entryPointEdge(this, result) }
|
||||
}
|
||||
|
||||
// Ensure all entry points are imported from ApiGraphs.qll
|
||||
private module ImportEntryPoints {
|
||||
private import semmle.code.powershell.frameworks.data.ModelsAsData
|
||||
}
|
||||
|
||||
/** Gets the root node. */
|
||||
Node root() { result instanceof RootNode }
|
||||
|
||||
/**
|
||||
* Gets the node that represents the module with qualified
|
||||
* name `qualifiedModule`.
|
||||
*/
|
||||
ModuleNode mod(string qualifiedModule) { result = Impl::MkModule(qualifiedModule) }
|
||||
|
||||
/**
|
||||
* Gets the node that represents the type with qualified
|
||||
* name `qualifiedType`.
|
||||
*/
|
||||
TypeNode type(string qualifiedType) { result = Impl::MkType(qualifiedType) }
|
||||
|
||||
/**
|
||||
* Gets an unqualified call at the top-level with the given method name.
|
||||
*/
|
||||
pragma[inline]
|
||||
MethodAccessNode getTopLevelCall(string name) { Impl::toplevelCall(name, result) }
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate isReachable(DataFlow::LocalSourceNode node, TypeTracker t) {
|
||||
t.start() and exists(node)
|
||||
or
|
||||
exists(DataFlow::LocalSourceNode prev, TypeTracker t2 |
|
||||
isReachable(prev, t2) and
|
||||
node = prev.track(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
private module SharedArg implements ApiGraphSharedSig {
|
||||
class ApiNode = Node;
|
||||
|
||||
ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) {
|
||||
result = Impl::MkForwardNode(node, t)
|
||||
}
|
||||
|
||||
ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) {
|
||||
result = Impl::MkBackwardNode(node, t)
|
||||
}
|
||||
|
||||
ApiNode getSinkNode(DataFlow::Node node) { result = Impl::MkSinkNode(node) }
|
||||
|
||||
pragma[nomagic]
|
||||
predicate specificEpsilonEdge(ApiNode pred, ApiNode succ) { none() }
|
||||
}
|
||||
|
||||
/** INTERNAL USE ONLY. */
|
||||
module Internal {
|
||||
private module MkShared = ApiGraphShared<SharedArg>;
|
||||
|
||||
import MkShared
|
||||
}
|
||||
|
||||
private import Internal
|
||||
import Internal::Public
|
||||
|
||||
cached
|
||||
private module Impl {
|
||||
cached
|
||||
newtype TApiNode =
|
||||
/** The root of the API graph. */
|
||||
MkRoot() or
|
||||
/** The method accessed at `call`, synthetically treated as a separate object. */
|
||||
MkMethodAccessNode(DataFlow::CallNode call) or
|
||||
MkModule(string qualifiedModule) {
|
||||
any(UsingStmt using).getName() = qualifiedModule
|
||||
or
|
||||
any(Cmd cmd).getQualifiedCommandName() = qualifiedModule
|
||||
or
|
||||
any(ModuleManifest manifest).getModuleName() = qualifiedModule
|
||||
} or
|
||||
MkType(string qualifiedType) { any(ConstantValue cv).asString() = qualifiedType } or // TODO
|
||||
/** Instances of `mod` with epsilon edges to its ancestors. */
|
||||
MkInstanceUp(string qualifiedType) { exists(MkType(qualifiedType)) } or
|
||||
/** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */
|
||||
MkInstanceDown(string qualifiedType) { exists(MkType(qualifiedType)) } or
|
||||
/** Intermediate node for following forward data flow. */
|
||||
MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
|
||||
/** Intermediate node for following backward data flow. */
|
||||
MkBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
|
||||
MkSinkNode(DataFlow::Node node) { needsSinkNode(node) }
|
||||
|
||||
private predicate needsSinkNode(DataFlow::Node node) {
|
||||
node instanceof DataFlowPrivate::ArgumentNode
|
||||
or
|
||||
TypeTrackingInput::storeStep(node, _, _)
|
||||
or
|
||||
node = any(DataFlow::CallableNode callable).getAReturnNode()
|
||||
or
|
||||
node = any(EntryPoint e).getASink()
|
||||
}
|
||||
|
||||
bindingset[e]
|
||||
pragma[inline_late]
|
||||
private DataFlow::Node getNodeFromExpr(Expr e) { result.asExpr().getExpr() = e }
|
||||
|
||||
cached
|
||||
predicate toplevelCall(string name, Node node) {
|
||||
exists(DataFlow::CallNode call |
|
||||
call.asExpr().getExpr().getEnclosingScope() instanceof TopLevel and
|
||||
call.getName() = name and
|
||||
node = MkMethodAccessNode(call)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate callEdge(Node pred, string name, Node succ) {
|
||||
exists(DataFlow::CallNode call |
|
||||
// from receiver to method call node
|
||||
pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier())) and
|
||||
succ = MkMethodAccessNode(call) and
|
||||
name = call.getName()
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate typeEdge(Node pred, string name, Node succ) {
|
||||
exists(ModuleNode mod |
|
||||
pred = mod and
|
||||
succ = mod.getType(name)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate memberEdge(Node pred, string name, Node succ) {
|
||||
exists(MemberExpr member | succ = getForwardStartNode(getNodeFromExpr(member)) |
|
||||
pred = getForwardEndNode(getALocalSourceStrict(getNodeFromExpr(member.getQualifier()))) and
|
||||
name = member.getMemberName()
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate methodEdge(Node pred, string name, Node succ) {
|
||||
exists(DataFlow::CallNode call | succ = MkMethodAccessNode(call) and name = call.getName() |
|
||||
pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier()))
|
||||
or
|
||||
exists(string qualifiedModule, ModuleManifest manifest |
|
||||
pred = mod(qualifiedModule) and
|
||||
manifest.getModuleName() = qualifiedModule
|
||||
|
|
||||
manifest.getACmdLetToExport() = name
|
||||
or
|
||||
manifest.getAFunctionToExport() = name
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate asCallable(Node apiNode, DataFlow::CallableNode callable) {
|
||||
apiNode = getBackwardStartNode(callable)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate contentEdge(Node pred, DataFlow::Content content, Node succ) {
|
||||
exists(DataFlow::Node object, DataFlow::Node value, DataFlow::ContentSet c |
|
||||
TypeTrackingInput::loadStep(object, value, c) and
|
||||
content = c.getAStoreContent() and
|
||||
// `x -> x.foo` with content "foo"
|
||||
pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and
|
||||
succ = getForwardStartNode(value)
|
||||
or
|
||||
// Based on `object.c = value` generate `object -> value` with content `c`
|
||||
TypeTrackingInput::storeStep(value, object, c) and
|
||||
content = c.getAStoreContent() and
|
||||
pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and
|
||||
succ = MkSinkNode(value)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate fieldEdge(Node pred, string name, Node succ) {
|
||||
Impl::contentEdge(pred, DataFlowPrivate::TFieldContent(name), succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate elementEdge(Node pred, Node succ) {
|
||||
contentEdge(pred, any(DataFlow::ContentSet set | set.isAnyElement()).getAReadContent(), succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate parameterEdge(Node pred, DataFlowDispatch::ParameterPosition paramPos, Node succ) {
|
||||
exists(DataFlowPrivate::ParameterNodeImpl parameter, DataFlow::CallableNode callable |
|
||||
parameter.isSourceParameterOf(callable.asCallableAstNode(), paramPos) and
|
||||
pred = getBackwardEndNode(callable) and
|
||||
succ = getForwardStartNode(parameter)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate argumentEdge(Node pred, DataFlowDispatch::ArgumentPosition argPos, Node succ) {
|
||||
exists(DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument |
|
||||
argument.sourceArgumentOf(call.asExpr(), argPos) and
|
||||
pred = MkMethodAccessNode(call) and
|
||||
succ = MkSinkNode(argument)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate positionalArgumentEdge(Node pred, int n, Node succ) {
|
||||
argumentEdge(pred,
|
||||
any(DataFlowDispatch::ArgumentPosition pos |
|
||||
pos.isPositional(n, DataFlowPrivate::emptyNamedSet())
|
||||
), succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate keywordArgumentEdge(Node pred, string name, Node succ) {
|
||||
argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name)), succ)
|
||||
}
|
||||
|
||||
private predicate positionalParameterEdge(Node pred, int n, Node succ) {
|
||||
parameterEdge(pred,
|
||||
any(DataFlowDispatch::ParameterPosition pos |
|
||||
pos.isPositional(n, DataFlowPrivate::emptyNamedSet())
|
||||
), succ)
|
||||
}
|
||||
|
||||
private predicate keywordParameterEdge(Node pred, string name, Node succ) {
|
||||
parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isKeyword(name)), succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate positionalParameterOrArgumentEdge(Node pred, int n, Node succ) {
|
||||
positionalArgumentEdge(pred, n, succ)
|
||||
or
|
||||
positionalParameterEdge(pred, n, succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate keywordParameterOrArgumentEdge(Node pred, string name, Node succ) {
|
||||
keywordArgumentEdge(pred, name, succ)
|
||||
or
|
||||
keywordParameterEdge(pred, name, succ)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate instanceEdge(Node pred, Node succ) {
|
||||
exists(string qualifiedType | pred = MkType(qualifiedType) |
|
||||
exists(DataFlow::ObjectCreationNode objCreation |
|
||||
objCreation.getConstructedTypeName() = qualifiedType and
|
||||
succ = getForwardStartNode(objCreation)
|
||||
)
|
||||
or
|
||||
exists(DataFlow::ParameterNode p |
|
||||
p.getParameter().getStaticType() = qualifiedType and
|
||||
succ = getForwardStartNode(p)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate returnEdge(Node pred, Node succ) {
|
||||
exists(DataFlow::CallNode call |
|
||||
pred = MkMethodAccessNode(call) and
|
||||
succ = getForwardStartNode(call)
|
||||
)
|
||||
or
|
||||
exists(DataFlow::CallableNode callable |
|
||||
pred = getBackwardEndNode(callable) and
|
||||
succ = MkSinkNode(callable.getAReturnNode())
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate entryPointEdge(EntryPoint entry, Node node) {
|
||||
node = MkSinkNode(entry.getASink()) or
|
||||
node = getForwardStartNode(entry.getASource()) or
|
||||
node = MkMethodAccessNode(entry.getACall())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
/**
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
|
||||
@@ -0,0 +1,634 @@
|
||||
/**
|
||||
* INTERNAL use only. This is an experimental API subject to change without notice.
|
||||
*
|
||||
* Provides classes and predicates for dealing with flow models specified in extensible predicates.
|
||||
*
|
||||
* The extensible predicates have the following columns:
|
||||
* - Sources:
|
||||
* `type, path, kind`
|
||||
* - Sinks:
|
||||
* `type, path, kind`
|
||||
* - Summaries:
|
||||
* `type, path, input, output, kind`
|
||||
* - Types:
|
||||
* `type1, type2, path`
|
||||
*
|
||||
* The interpretation of a row is similar to API-graphs with a left-to-right
|
||||
* reading.
|
||||
* 1. The `type` column selects all instances of a named type. The syntax of this column is language-specific.
|
||||
* The language defines some type names that the analysis knows how to identify without models.
|
||||
* It can also be a synthetic type name defined by a type definition (see type definitions below).
|
||||
* 2. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `type`.
|
||||
*
|
||||
* Every language supports the following tokens:
|
||||
* - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
|
||||
* Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on.
|
||||
* - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
|
||||
* - ReturnValue: the value returned by a function call
|
||||
* - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
|
||||
*
|
||||
* The following tokens are common and should be implemented for languages where it makes sense:
|
||||
* - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names.
|
||||
* - Instance: an instance of a class
|
||||
* - Subclass: a subclass of a class
|
||||
* - ArrayElement: an element of array
|
||||
* - Element: an element of a collection-like object
|
||||
* - MapKey: a key in map-like object
|
||||
* - MapValue: a value in a map-like object
|
||||
* - Awaited: the value from a resolved promise/future-like object
|
||||
*
|
||||
* For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
|
||||
*
|
||||
* 3. The `input` and `output` columns specify how data enters and leaves the element selected by the
|
||||
* first `(type, path)` tuple. Both strings are `.`-separated access paths
|
||||
* of the same syntax as the `path` column.
|
||||
* 4. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources `"remote"` indicates a default remote flow source, and for summaries
|
||||
* `"taint"` indicates a default additional taint step and `"value"` indicates a
|
||||
* globally applicable value-preserving step.
|
||||
*
|
||||
* ### Types
|
||||
*
|
||||
* A type row of form `type1; type2; path` indicates that `type2; path`
|
||||
* should be seen as an instance of the type `type1`.
|
||||
*
|
||||
* A type may refer to a static type or a synthetic type name used internally in the model.
|
||||
* Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same
|
||||
* element.
|
||||
* See `ModelsAsData.qll` for the language-specific interpretation of type names.
|
||||
*
|
||||
* By convention, if one wants to avoid clashes with static types, the type name
|
||||
* should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that
|
||||
* the type is not intended to match a static type.
|
||||
*/
|
||||
|
||||
private import codeql.util.Unit
|
||||
private import ApiGraphModelsSpecific as Specific
|
||||
|
||||
private module API = Specific::API;
|
||||
|
||||
private module DataFlow = Specific::DataFlow;
|
||||
|
||||
private import semmle.code.powershell.controlflow.CfgNodes
|
||||
private import ApiGraphModelsExtensions as Extensions
|
||||
private import codeql.dataflow.internal.AccessPathSyntax
|
||||
|
||||
/** Module containing hooks for providing input data to be interpreted as a model. */
|
||||
module ModelInput {
|
||||
/**
|
||||
* A unit class for adding additional type model rows from CodeQL models.
|
||||
*/
|
||||
class TypeModel extends Unit {
|
||||
/**
|
||||
* Holds if any of the other predicates in this class might have a result
|
||||
* for the given `type`.
|
||||
*
|
||||
* The implementation of this predicate should not depend on `DataFlow::Node`.
|
||||
*/
|
||||
bindingset[type]
|
||||
predicate isTypeUsed(string type) { none() }
|
||||
|
||||
/**
|
||||
* Gets a data-flow node that is a source of the given `type`.
|
||||
*
|
||||
* Note that `type` should also be included in `isTypeUsed`.
|
||||
*
|
||||
* This must not depend on API graphs, but ensures that an API node is generated for
|
||||
* the source.
|
||||
*/
|
||||
DataFlow::Node getASource(string type) { none() }
|
||||
|
||||
/**
|
||||
* Gets a data-flow node that is a sink of the given `type`,
|
||||
* usually because it is an argument passed to a parameter of that type.
|
||||
*
|
||||
* Note that `type` should also be included in `isTypeUsed`.
|
||||
*
|
||||
* This must not depend on API graphs, but ensures that an API node is generated for
|
||||
* the sink.
|
||||
*/
|
||||
DataFlow::Node getASink(string type) { none() }
|
||||
|
||||
/**
|
||||
* Gets an API node that is a source or sink of the given `type`.
|
||||
*
|
||||
* Note that `type` should also be included in `isTypeUsed`.
|
||||
*
|
||||
* Unlike `getASource` and `getASink`, this may depend on API graphs.
|
||||
*/
|
||||
API::Node getAnApiNode(string type) { none() }
|
||||
}
|
||||
}
|
||||
|
||||
private import ModelInput
|
||||
|
||||
/**
|
||||
* An empty class, except in specific tests.
|
||||
*
|
||||
* If this is non-empty, all models are parsed even if the type name is not
|
||||
* considered relevant for the current database.
|
||||
*/
|
||||
abstract class TestAllModels extends Unit { }
|
||||
|
||||
/** Holds if a source model exists for the given parameters. */
|
||||
predicate sourceModel(string type, string path, string kind, string model) {
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::sourceModel(type, path, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a sink model exists for the given parameters. */
|
||||
private predicate sinkModel(string type, string path, string kind, string model) {
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::sinkModel(type, path, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if a summary model `row` exists for the given parameters. */
|
||||
private predicate summaryModel(
|
||||
string type, string path, string input, string output, string kind, string model
|
||||
) {
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::summaryModel(type, path, input, output, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `(type2, path)` should be seen as an instance of `type1`. */
|
||||
predicate typeModel(string type1, string type2, string path) {
|
||||
Extensions::typeModel(type1, type2, path)
|
||||
}
|
||||
|
||||
/** Holds if a type variable model exists for the given parameters. */
|
||||
private predicate typeVariableModel(string name, string path) {
|
||||
Extensions::typeVariableModel(name, path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given extension tuple `madId` should pretty-print as `model`.
|
||||
*
|
||||
* This predicate should only be used in tests.
|
||||
*/
|
||||
predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) {
|
||||
exists(string type, string path, string kind |
|
||||
Extensions::sourceModel(type, path, kind, madId) and
|
||||
model = "Source: " + type + "; " + path + "; " + kind
|
||||
)
|
||||
or
|
||||
exists(string type, string path, string kind |
|
||||
Extensions::sinkModel(type, path, kind, madId) and
|
||||
model = "Sink: " + type + "; " + path + "; " + kind
|
||||
)
|
||||
or
|
||||
exists(string type, string path, string input, string output, string kind |
|
||||
Extensions::summaryModel(type, path, input, output, kind, madId) and
|
||||
model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if rows involving `type` might be relevant for the analysis of this database.
|
||||
*/
|
||||
predicate isRelevantType(string type) {
|
||||
(
|
||||
sourceModel(type, _, _, _) or
|
||||
sinkModel(type, _, _, _) or
|
||||
summaryModel(type, _, _, _, _, _) or
|
||||
typeModel(_, type, _)
|
||||
) and
|
||||
(
|
||||
Specific::isTypeUsed(type)
|
||||
or
|
||||
any(TypeModel model).isTypeUsed(type)
|
||||
or
|
||||
exists(TestAllModels t)
|
||||
)
|
||||
or
|
||||
exists(string other | isRelevantType(other) |
|
||||
typeModel(type, other, _)
|
||||
or
|
||||
Specific::hasImplicitTypeModel(type, other)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `type,path` is used in some row.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate isRelevantFullPath(string type, string path) {
|
||||
isRelevantType(type) and
|
||||
(
|
||||
sourceModel(type, path, _, _) or
|
||||
sinkModel(type, path, _, _) or
|
||||
summaryModel(type, path, _, _, _, _) or
|
||||
typeModel(_, type, path)
|
||||
)
|
||||
}
|
||||
|
||||
/** A string from a row that should be parsed as an access path. */
|
||||
private predicate accessPathRange(string s) {
|
||||
isRelevantFullPath(_, s)
|
||||
or
|
||||
exists(string type | isRelevantType(type) |
|
||||
summaryModel(type, _, s, _, _, _) or
|
||||
summaryModel(type, _, _, s, _, _)
|
||||
)
|
||||
or
|
||||
typeVariableModel(_, s)
|
||||
}
|
||||
|
||||
import AccessPath<accessPathRange/1>
|
||||
|
||||
/**
|
||||
* Gets a successor of `node` in the API graph.
|
||||
*/
|
||||
bindingset[token]
|
||||
API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
|
||||
// API graphs use the same label for arguments and parameters. An edge originating from a
|
||||
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
|
||||
// We just map both to the same thing.
|
||||
token.getName() = ["Argument", "Parameter"] and
|
||||
result = node.getParameter(parseIntUnbounded(token.getAnArgument()))
|
||||
or
|
||||
token.getName() = "ReturnValue" and
|
||||
result = node.getReturn()
|
||||
or
|
||||
// Language-specific tokens
|
||||
result = Specific::getExtraSuccessorFromNode(node, token)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an API-graph successor for the given invocation.
|
||||
*/
|
||||
bindingset[token]
|
||||
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) {
|
||||
token.getName() = "Argument" and
|
||||
result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
|
||||
or
|
||||
token.getName() = "ReturnValue" and
|
||||
result = invoke.getReturn()
|
||||
or
|
||||
// Language-specific tokens
|
||||
result = Specific::getExtraSuccessorFromInvoke(invoke, token)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `invoke` invokes a call-site filter given by `token`.
|
||||
*/
|
||||
bindingset[token]
|
||||
private predicate invocationMatchesCallSiteFilter(
|
||||
Specific::InvokeNode invoke, AccessPathTokenBase token
|
||||
) {
|
||||
token.getName() = "WithArity" and
|
||||
invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument())
|
||||
or
|
||||
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
|
||||
}
|
||||
|
||||
private class TypeModelUseEntry extends API::EntryPoint {
|
||||
private string type;
|
||||
|
||||
TypeModelUseEntry() {
|
||||
exists(any(TypeModel tm).getASource(type)) and
|
||||
this = "TypeModelUseEntry;" + type
|
||||
}
|
||||
|
||||
override DataFlow::LocalSourceNode getASource() { result = any(TypeModel tm).getASource(type) }
|
||||
|
||||
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
|
||||
}
|
||||
|
||||
private class TypeModelDefEntry extends API::EntryPoint {
|
||||
private string type;
|
||||
|
||||
TypeModelDefEntry() {
|
||||
exists(any(TypeModel tm).getASink(type)) and
|
||||
this = "TypeModelDefEntry;" + type
|
||||
}
|
||||
|
||||
override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(type) }
|
||||
|
||||
API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an API node identified by the given `type`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private API::Node getNodeFromType(string type) {
|
||||
exists(string type2, AccessPath path2 |
|
||||
typeModel(type, type2, path2) and
|
||||
result = getNodeFromPath(type2, path2)
|
||||
)
|
||||
or
|
||||
result = any(TypeModelUseEntry e).getNodeForType(type)
|
||||
or
|
||||
result = any(TypeModelDefEntry e).getNodeForType(type)
|
||||
or
|
||||
result = any(TypeModel t).getAnApiNode(type)
|
||||
or
|
||||
result = Specific::getExtraNodeFromType(type)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
API::Node getNodeFromPath(string type, AccessPath path, int n) {
|
||||
isRelevantFullPath(type, path) and
|
||||
(
|
||||
n = 0 and
|
||||
result = getNodeFromType(type)
|
||||
or
|
||||
result = Specific::getExtraNodeFromPath(type, path, n)
|
||||
)
|
||||
or
|
||||
result = getSuccessorFromNode(getNodeFromPath(type, path, n - 1), path.getToken(n - 1))
|
||||
or
|
||||
// Similar to the other recursive case, but where the path may have stepped through one or more call-site filters
|
||||
result = getSuccessorFromInvoke(getInvocationFromPath(type, path, n - 1), path.getToken(n - 1))
|
||||
or
|
||||
// Apply a subpath
|
||||
result = getNodeFromSubPath(getNodeFromPath(type, path, n - 1), getSubPathAt(path, n - 1))
|
||||
or
|
||||
// Apply a type step
|
||||
typeStep(getNodeFromPath(type, path, n), result)
|
||||
or
|
||||
// Apply a fuzzy step (without advancing 'n')
|
||||
path.getToken(n).getName() = "Fuzzy" and
|
||||
result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n))
|
||||
or
|
||||
// Skip a fuzzy step (advance 'n' without changing the current node)
|
||||
path.getToken(n - 1).getName() = "Fuzzy" and
|
||||
result = getNodeFromPath(type, path, n - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a subpath for the `TypeVar` token found at the `n`th token of `path`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private AccessPath getSubPathAt(AccessPath path, int n) {
|
||||
exists(string typeVarName |
|
||||
path.getToken(n).getAnArgument("TypeVar") = typeVarName and
|
||||
typeVariableModel(typeVarName, result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that is found by evaluating the first `n` tokens of `subPath` starting at `base`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) {
|
||||
exists(AccessPath path, int k |
|
||||
base = [getNodeFromPath(_, path, k), getNodeFromSubPath(_, path, k)] and
|
||||
subPath = getSubPathAt(path, k) and
|
||||
result = base and
|
||||
n = 0
|
||||
)
|
||||
or
|
||||
exists(string type, AccessPath basePath |
|
||||
typeStepModel(type, basePath, subPath) and
|
||||
base = getNodeFromPath(type, basePath) and
|
||||
result = base and
|
||||
n = 0
|
||||
)
|
||||
or
|
||||
result = getSuccessorFromNode(getNodeFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1))
|
||||
or
|
||||
result =
|
||||
getSuccessorFromInvoke(getInvocationFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1))
|
||||
or
|
||||
result =
|
||||
getNodeFromSubPath(getNodeFromSubPath(base, subPath, n - 1), getSubPathAt(subPath, n - 1))
|
||||
or
|
||||
typeStep(getNodeFromSubPath(base, subPath, n), result) and
|
||||
// Only apply type-steps strictly between the steps on the sub path, not before and after.
|
||||
// Steps before/after lead to unnecessary transitive edges, which the user of the sub-path
|
||||
// will themselves find by following type-steps.
|
||||
n > 0 and
|
||||
n < subPath.getNumToken()
|
||||
or
|
||||
// Apply a fuzzy step (without advancing 'n')
|
||||
subPath.getToken(n).getName() = "Fuzzy" and
|
||||
result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n))
|
||||
or
|
||||
// Skip a fuzzy step (advance 'n' without changing the current node)
|
||||
subPath.getToken(n - 1).getName() = "Fuzzy" and
|
||||
result = getNodeFromSubPath(base, subPath, n - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a call site that is found by evaluating the first `n` tokens of `subPath` starting at `base`.
|
||||
*/
|
||||
private Specific::InvokeNode getInvocationFromSubPath(API::Node base, AccessPath subPath, int n) {
|
||||
result = Specific::getAnInvocationOf(getNodeFromSubPath(base, subPath, n))
|
||||
or
|
||||
result = getInvocationFromSubPath(base, subPath, n - 1) and
|
||||
invocationMatchesCallSiteFilter(result, subPath.getToken(n - 1))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that is found by evaluating `subPath` starting at `base`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath) {
|
||||
result = getNodeFromSubPath(base, subPath, subPath.getNumToken())
|
||||
}
|
||||
|
||||
/** Gets the node identified by the given `(type, path)` tuple. */
|
||||
private API::Node getNodeFromPath(string type, AccessPath path) {
|
||||
result = getNodeFromPath(type, path, path.getNumToken())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) {
|
||||
summaryModel(type, basePath, "", output, "type", _)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate typeStep(API::Node pred, API::Node succ) {
|
||||
exists(string type, AccessPath basePath, AccessPath output |
|
||||
typeStepModel(type, basePath, output) and
|
||||
pred = getNodeFromPath(type, basePath) and
|
||||
succ = getNodeFromSubPath(pred, output)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an invocation identified by the given `(type, path)` tuple.
|
||||
*
|
||||
* Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters.
|
||||
*/
|
||||
private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path, int n) {
|
||||
result = Specific::getAnInvocationOf(getNodeFromPath(type, path, n))
|
||||
or
|
||||
result = getInvocationFromPath(type, path, n - 1) and
|
||||
invocationMatchesCallSiteFilter(result, path.getToken(n - 1))
|
||||
}
|
||||
|
||||
/** Gets an invocation identified by the given `(type, path)` tuple. */
|
||||
private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) {
|
||||
result = getInvocationFromPath(type, path, path.getNumToken())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` is a valid name for an access path token in the identifying access path.
|
||||
*/
|
||||
bindingset[name]
|
||||
private predicate isValidTokenNameInIdentifyingAccessPath(string name) {
|
||||
name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"]
|
||||
or
|
||||
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` is a valid name for an access path token with no arguments, occurring
|
||||
* in an identifying access path.
|
||||
*/
|
||||
bindingset[name]
|
||||
private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
|
||||
name = ["ReturnValue", "Fuzzy"]
|
||||
or
|
||||
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
|
||||
* in an identifying access path.
|
||||
*/
|
||||
bindingset[name, argument]
|
||||
private predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
|
||||
name = ["Argument", "Parameter"] and
|
||||
argument.regexpMatch("(N-|-)?\\d+(\\.\\.((N-|-)?\\d+)?)?")
|
||||
or
|
||||
name = "WithArity" and
|
||||
argument.regexpMatch("\\d+(\\.\\.(\\d+)?)?")
|
||||
or
|
||||
name = "TypeVar" and
|
||||
exists(argument)
|
||||
or
|
||||
Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument)
|
||||
}
|
||||
|
||||
/**
|
||||
* Module providing access to the imported models in terms of API graph nodes.
|
||||
*/
|
||||
module ModelOutput {
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if a source model contributed `source` with the given `kind`.
|
||||
*/
|
||||
cached
|
||||
API::Node getASourceNode(string kind, string model) {
|
||||
exists(string type, string path |
|
||||
sourceModel(type, path, kind, model) and
|
||||
result = getNodeFromPath(type, path)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a sink model contributed `sink` with the given `kind`.
|
||||
*/
|
||||
cached
|
||||
API::Node getASinkNode(string kind, string model) {
|
||||
exists(string type, string path |
|
||||
sinkModel(type, path, kind, model) and
|
||||
result = getNodeFromPath(type, path)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a relevant summary exists for these parameters.
|
||||
*/
|
||||
cached
|
||||
predicate relevantSummaryModel(
|
||||
string type, string path, string input, string output, string kind, string model
|
||||
) {
|
||||
isRelevantType(type) and
|
||||
summaryModel(type, path, input, output, kind, model)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row.
|
||||
*/
|
||||
cached
|
||||
predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) {
|
||||
summaryModel(type, path, _, _, _, _) and
|
||||
baseNode = getInvocationFromPath(type, path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row.
|
||||
*/
|
||||
cached
|
||||
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
|
||||
summaryModel(type, path, _, _, _, _) and
|
||||
baseNode = getNodeFromPath(type, path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` is seen as an instance of `type` due to a type definition
|
||||
* contributed by a model.
|
||||
*/
|
||||
cached
|
||||
API::Node getATypeNode(string type) { result = getNodeFromType(type) }
|
||||
}
|
||||
|
||||
import Cached
|
||||
import Specific::ModelOutputSpecific
|
||||
private import codeql.mad.ModelValidation as SharedModelVal
|
||||
|
||||
/**
|
||||
* Holds if a CSV source model contributed `source` with the given `kind`.
|
||||
*/
|
||||
API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) }
|
||||
|
||||
/**
|
||||
* Holds if a CSV sink model contributed `sink` with the given `kind`.
|
||||
*/
|
||||
API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) }
|
||||
|
||||
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
|
||||
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) }
|
||||
|
||||
predicate sinkKind(string kind) { sinkModel(_, _, kind, _) }
|
||||
|
||||
predicate sourceKind(string kind) { sourceModel(_, _, kind, _) }
|
||||
}
|
||||
|
||||
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
|
||||
|
||||
/**
|
||||
* Gets an error message relating to an invalid CSV row in a model.
|
||||
*/
|
||||
string getAWarning() {
|
||||
// Check names and arguments of access path tokens
|
||||
exists(AccessPath path, AccessPathToken token |
|
||||
(isRelevantFullPath(_, path) or typeVariableModel(_, path)) and
|
||||
token = path.getToken(_)
|
||||
|
|
||||
not isValidTokenNameInIdentifyingAccessPath(token.getName()) and
|
||||
result = "Invalid token name '" + token.getName() + "' in access path: " + path
|
||||
or
|
||||
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
|
||||
exists(string argument |
|
||||
argument = token.getAnArgument() and
|
||||
not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and
|
||||
result =
|
||||
"Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path
|
||||
)
|
||||
or
|
||||
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
|
||||
token.getNumArgument() = 0 and
|
||||
not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
|
||||
result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
|
||||
)
|
||||
or
|
||||
// Check for invalid model kinds
|
||||
result = KindVal::getInvalidModelKind()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
/**
|
||||
* Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`.
|
||||
*
|
||||
* It must export the following members:
|
||||
* ```ql
|
||||
* class Unit // a unit type
|
||||
* class InvokeNode // a type representing an invocation connected to the API graph
|
||||
* module API // the API graph module
|
||||
* predicate isPackageUsed(string package)
|
||||
* API::Node getExtraNodeFromPath(string package, string type, string path, int n)
|
||||
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token)
|
||||
* API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token)
|
||||
* predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token)
|
||||
* InvokeNode getAnInvocationOf(API::Node node)
|
||||
* predicate isExtraValidTokenNameInIdentifyingAccessPath(string name)
|
||||
* predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name)
|
||||
* predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument)
|
||||
* ```
|
||||
*/
|
||||
|
||||
private import powershell
|
||||
private import ApiGraphModels
|
||||
private import semmle.code.powershell.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
|
||||
private import codeql.dataflow.internal.AccessPathSyntax
|
||||
// Re-export libraries needed by ApiGraphModels.qll
|
||||
import semmle.code.powershell.ApiGraphs
|
||||
import semmle.code.powershell.dataflow.DataFlow::DataFlow as DataFlow
|
||||
private import FlowSummaryImpl::Public
|
||||
private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch
|
||||
|
||||
bindingset[rawType]
|
||||
predicate isTypeUsed(string rawType) { any() }
|
||||
|
||||
bindingset[rawType]
|
||||
private predicate parseType(string rawType, string mod, string type) {
|
||||
exists(string regexp |
|
||||
regexp = "(.+)\\.([^\\.]+)" and
|
||||
mod = rawType.regexpCapture(regexp, 1) and
|
||||
type = rawType.regexpCapture(regexp, 2)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate parseRelevantType(string rawType, string consts, string suffix) {
|
||||
isRelevantType(rawType) and
|
||||
parseType(rawType, consts, suffix)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `type` can be obtained from an instance of `otherType` due to
|
||||
* language semantics modeled by `getExtraNodeFromType`.
|
||||
*/
|
||||
bindingset[otherType]
|
||||
predicate hasImplicitTypeModel(string type, string otherType) { none() }
|
||||
|
||||
/** Gets a Powershell-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */
|
||||
bindingset[type, path]
|
||||
API::Node getExtraNodeFromPath(string type, AccessPath path, int n) {
|
||||
// A row of form `any;Method[foo]` should match any method named `foo`.
|
||||
type = "any" and
|
||||
n = 1 and
|
||||
exists(string methodName, DataFlow::CallNode call |
|
||||
methodMatchedByName(path, methodName) and
|
||||
call.getName() = methodName and
|
||||
result.(API::MethodAccessNode).asCall() = call
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a Powershell-specific interpretation of the given `type`. */
|
||||
API::Node getExtraNodeFromType(string qualifiedType) {
|
||||
qualifiedType = "" and
|
||||
result = API::root()
|
||||
or
|
||||
// TODO: How to distinguish between these two cases? And do we need to?
|
||||
exists(string mod, string type | parseRelevantType(qualifiedType, mod, type) |
|
||||
result = API::mod(qualifiedType)
|
||||
or
|
||||
result = API::mod(mod).getType(type)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `path` occurs in a CSV row with type `any`, meaning it can start
|
||||
* matching anywhere, and the path begins with `Method[methodName]`.
|
||||
*/
|
||||
private predicate methodMatchedByName(AccessPath path, string methodName) {
|
||||
isRelevantFullPath("any", path) and
|
||||
exists(AccessPathToken token |
|
||||
token = path.getToken(0) and
|
||||
token.getName() = "Method" and
|
||||
methodName = token.getAnArgument()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`.
|
||||
*/
|
||||
bindingset[token]
|
||||
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
|
||||
token.getName() = "Member" and
|
||||
result = node.getMember(token.getAnArgument())
|
||||
or
|
||||
token.getName() = "Method" and
|
||||
result = node.getMethod(token.getAnArgument())
|
||||
or
|
||||
token.getName() = "Instance" and
|
||||
result = node.getInstance()
|
||||
or
|
||||
token.getName() = "Parameter" and
|
||||
exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos |
|
||||
token.getAnArgument() = FlowSummaryImpl::Input::encodeArgumentPosition(argPos) and
|
||||
DataFlowDispatch::parameterMatch(paramPos, argPos) and
|
||||
result = node.getParameterAtPosition(paramPos)
|
||||
)
|
||||
or
|
||||
exists(DataFlow::ContentSet contents |
|
||||
token.getName() = FlowSummaryImpl::Input::encodeContent(contents, token.getAnArgument()) and
|
||||
result = node.getContents(contents)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`.
|
||||
*/
|
||||
bindingset[token]
|
||||
API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) {
|
||||
token.getName() = "Argument" and
|
||||
exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos |
|
||||
token.getAnArgument() = FlowSummaryImpl::Input::encodeParameterPosition(paramPos) and
|
||||
DataFlowDispatch::parameterMatch(paramPos, argPos) and
|
||||
result = node.getArgumentAtPosition(argPos)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
API::Node getAFuzzySuccessor(API::Node node) {
|
||||
result = node.getMethod(_)
|
||||
or
|
||||
result =
|
||||
node.getArgumentAtPosition(any(DataFlowDispatch::ArgumentPosition apos | not apos.isThis()))
|
||||
or
|
||||
result =
|
||||
node.getParameterAtPosition(any(DataFlowDispatch::ParameterPosition ppos | not ppos.isThis()))
|
||||
or
|
||||
result = node.getReturn()
|
||||
or
|
||||
result = node.getAnElement()
|
||||
or
|
||||
result = node.getInstance()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `invoke` matches the Powershell-specific call site filter in `token`.
|
||||
*/
|
||||
bindingset[token]
|
||||
predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) {
|
||||
none()
|
||||
}
|
||||
|
||||
/** An API graph node representing a method call. */
|
||||
class InvokeNode extends API::MethodAccessNode {
|
||||
/** Gets the number of arguments to the call. */
|
||||
int getNumArgument() { result = this.asCall().getNumberOfArguments() }
|
||||
}
|
||||
|
||||
/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */
|
||||
InvokeNode getAnInvocationOf(API::Node node) { result = node }
|
||||
|
||||
/**
|
||||
* Holds if `name` is a valid name for an access path token in the identifying access path.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
|
||||
name = ["Member", "Method", "Instance", "WithBlock", "WithoutBlock", "Element", "Field"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` is a valid name for an access path token with no arguments, occurring
|
||||
* in an identifying access path.
|
||||
*/
|
||||
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
|
||||
name = ["Instance", "WithBlock", "WithoutBlock"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
|
||||
* in an identifying access path.
|
||||
*/
|
||||
bindingset[name, argument]
|
||||
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
|
||||
name = ["Member", "Method", "Element", "Field"] and
|
||||
exists(argument)
|
||||
or
|
||||
name = ["Argument", "Parameter"] and
|
||||
(
|
||||
argument = ["self", "lambda-self", "block", "any", "any-named"]
|
||||
or
|
||||
argument.regexpMatch("\\w+:") // keyword argument
|
||||
)
|
||||
}
|
||||
|
||||
module ModelOutputSpecific { }
|
||||
Reference in New Issue
Block a user