diff --git a/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll b/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll new file mode 100644 index 00000000000..607c5248ffa --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll @@ -0,0 +1,681 @@ +/** + * Provides an implementation of _API graphs_, which allow efficient modelling of how a given + * value is used by the code base or how values produced by the code base are consumed by a library. + * + * See `API::Node` for more details. + */ + +private import powershell +private import semmle.code.powershell.dataflow.DataFlow +private import semmle.code.powershell.typetracking.ApiGraphShared +private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl +private import semmle.code.powershell.controlflow.Cfg +private import semmle.code.powershell.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch + +/** + * Provides classes and predicates for working with APIs used in a database. + */ +module API { + /** + * A node in the API graph, that is, a value that can be tracked interprocedurally. + * + * The API graph is a graph for tracking values of certain types in a way that accounts for inheritance + * and interprocedural data flow. + * + * API graphs are typically used to identify "API calls", that is, calls to an external function + * whose implementation is not necessarily part of the current codebase. + * + * ### Basic usage + * + * The most basic use of API graphs is typically as follows: + * 1. Start with `API::getTopLevelMember` for the relevant library. + * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function. + * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource`, `asSink`, or `asCall`. + * + * ### Data flow + * + * The members predicates on this class generally take inheritance and data flow into account. + * + * ### Backward data flow + * + * When inspecting the arguments of a call, the data flow direction is backwards. + * + * ### Inheritance + * + * When a class or module object is tracked, inheritance is taken into account. + * + * ### Backward data flow and classes + * + * When inspecting the arguments of a call, and the value flowing into that argument is a user-defined class (or an instance thereof), + * uses of `getMethod` will find method definitions in that class (including inherited ones) rather than finding method calls. + * + * When modeling an external library that is known to call a specific method on a parameter, this makes + * it possible to find the corresponding method definition in user code. + * + * ### Strict left-to-right evaluation + * + * Most member predicates on this class are intended to be chained, and are always evaluated from left to right, which means + * the caller should restrict the initial set of values. + * + * For example, in the following snippet, we always find the uses of `Foo` before finding calls to `bar`: + * ```ql + * API::getTopLevelMember("Foo").getMethod("bar") + * ``` + * In particular, the implementation will never look for calls to `bar` and work backward from there. + * + * Beware of the footgun that is to use API graphs with an unrestricted receiver: + * ```ql + * API::Node barCall(API::Node base) { + * result = base.getMethod("bar") // Do not do this! + * } + * ``` + * The above predicate does not restrict the receiver, and will thus perform an interprocedural data flow + * search starting at every node in the graph, which is very expensive. + */ + class Node extends Impl::TApiNode { + /** + * Gets a data-flow node where this value may flow interprocedurally. + * + * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow. + * See `asSource()` for examples. + */ + bindingset[this] + pragma[inline_late] + DataFlow::Node getAValueReachableFromSource() { + result = getAValueReachableFromSourceInline(this) + } + + /** + * Gets a data-flow node where this value enters the current codebase. + */ + bindingset[this] + pragma[inline_late] + DataFlow::LocalSourceNode asSource() { result = asSourceInline(this) } + + /** Gets a data-flow node where this value potentially flows into an external library. */ + bindingset[this] + pragma[inline_late] + DataFlow::Node asSink() { result = asSinkInline(this) } + + /** Gets a callable that can reach this sink. */ + bindingset[this] + pragma[inline_late] + DataFlow::CallableNode asCallable() { Impl::asCallable(this.getAnEpsilonSuccessor(), result) } + + /** + * Get a data-flow node that transitively flows to this value, provided that this value corresponds + * to a sink. + * + * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow. + * See `asSink()` for examples. + */ + bindingset[this] + pragma[inline_late] + DataFlow::Node getAValueReachingSink() { result = getAValueReachingSinkInline(this) } + + /** Gets the call referred to by this API node. */ + bindingset[this] + pragma[inline_late] + DataFlow::CallNode asCall() { this = Impl::MkMethodAccessNode(result) } + + pragma[inline] + Node getMember(string m) { + // This predicate is currently not 'inline_late' because 'm' can be an input or output + Impl::memberEdge(this.getAnEpsilonSuccessor(), m, result) + } + + /** + * Gets a node that may refer to an instance of the module or class represented by this API node. + */ + bindingset[this] + pragma[inline_late] + Node getInstance() { Impl::instanceEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets a call to `method` with this value as the receiver, or the definition of `method` on + * an object that can reach this sink. + */ + pragma[inline] + Node getMethod(string method) { + // TODO: Consider 'getMethodTarget(method)' for looking up method definitions? + // This predicate is currently not 'inline_late' because 'method' can be an input or output + Impl::methodEdge(this.getAnEpsilonSuccessor(), method, result) + } + + /** + * Gets the result of this call, or the return value of this callable. + */ + bindingset[this] + pragma[inline_late] + Node getReturn() { Impl::returnEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the result of a call to `method` with this value as the receiver, or the return value of `method` defined on + * an object that can reach this sink. + * + * This is a shorthand for `getMethod(method).getReturn()`. + */ + pragma[inline] + Node getReturn(string method) { + // This predicate is currently not 'inline_late' because 'method' can be an input or output + result = this.getMethod(method).getReturn() + } + + /** + * Gets the `n`th positional argument to this call. + */ + pragma[inline] + Node getArgument(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalArgumentEdge(this, n, result) + } + + /** + * Gets the given keyword argument to this call. + */ + pragma[inline] + Node getKeywordArgument(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordArgumentEdge(this, name, result) + } + + /** + * Gets the `n`th positional parameter of this callable, or the `n`th positional argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getArgument(n)` instead. + */ + pragma[inline] + Node getParameter(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), n, result) + } + + /** + * Gets the given keyword parameter of this callable, or keyword argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getKeywordArgument(n)` instead. + */ + pragma[inline] + Node getKeywordParameter(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets the argument passed in argument position `pos` at this call. + */ + pragma[inline] + Node getArgumentAtPosition(DataFlowDispatch::ArgumentPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::argumentEdge(pragma[only_bind_out](this), pos, result) // note: no need for epsilon step since 'this' must be a call + } + + /** + * Gets the parameter at position `pos` of this callable. + */ + pragma[inline] + Node getParameterAtPosition(DataFlowDispatch::ParameterPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::parameterEdge(this.getAnEpsilonSuccessor(), pos, result) + } + + /** + * Gets a representative for the `content` of this value. + * + * When possible, it is preferrable to use one of the specialized variants of this predicate, such as `getAnElement`. + * + * Concretely, this gets sources where `content` is read from this value, and as well as sinks where + * `content` is stored onto this value or onto an object that can reach this sink. + */ + pragma[inline] + Node getContent(DataFlow::Content content) { + // This predicate is currently not 'inline_late' because 'content' can be an input or output + Impl::contentEdge(this.getAnEpsilonSuccessor(), content, result) + } + + /** + * Gets a representative for the `contents` of this value. + * + * See `getContent()` for more details. + */ + bindingset[this, contents] + pragma[inline_late] + Node getContents(DataFlow::ContentSet contents) { + // We always use getAStoreContent when generating content edges, and we always use getAReadContent when querying the graph. + result = this.getContent(contents.getAReadContent()) + } + + /** + * Gets a representative for the instance field of the given `name`. + */ + pragma[inline] + Node getField(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::fieldEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets a representative for an arbitrary element of this collection. + */ + bindingset[this] + pragma[inline_late] + Node getAnElement() { Impl::elementEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the data-flow node that gives rise to this node, if any. + */ + DataFlow::Node getInducingNode() { + this = Impl::MkMethodAccessNode(result) or + this = Impl::MkBackwardNode(result, _) or + this = Impl::MkForwardNode(result, _) or + this = Impl::MkSinkNode(result) + } + + /** Gets the location of this node. */ + Location getLocation() { + result = this.getInducingNode().getLocation() + or + this instanceof RootNode and + result instanceof EmptyLocation + } + + /** + * Gets a textual representation of this element. + */ + string toString() { none() } + + pragma[inline] + private Node getAnEpsilonSuccessor() { result = getAnEpsilonSuccessorInline(this) } + } + + /** The root node of an API graph. */ + private class RootNode extends Node, Impl::MkRoot { + override string toString() { result = "Root()" } + } + + /** A node representing a given type-tracking state when tracking forwards. */ + private class ForwardNode extends Node, Impl::MkForwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + ForwardNode() { this = Impl::MkForwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "ForwardNode(" + node + ")" + else result = "ForwardNode(" + node + ", " + tracker + ")" + } + } + + /** A node representing a given type-tracking state when tracking backwards. */ + private class BackwardNode extends Node, Impl::MkBackwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + BackwardNode() { this = Impl::MkBackwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "BackwardNode(" + node + ")" + else result = "BackwardNode(" + node + ", " + tracker + ")" + } + } + + /** A node representing a module/class object with epsilon edges to its descendents. */ + private class ModuleNode extends Node, Impl::MkModule { + /** Gets the module represented by this API node. */ + string getModule() { this = Impl::MkModule(result) } + + override string toString() { result = "Module(" + this.getModule() + ")" } + + TypeNode getType(string name) { result.getType() = this.getModule() + "." + name } // TODO: Check that name exists in module + } + + private class TypeNode extends Node, Impl::MkType { + /** Gets the type represented by this API node. */ + string getType() { this = Impl::MkType(result) } + + override string toString() { result = "Type(" + this.getType() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its ancestors. */ + private class InstanceUp extends Node, Impl::MkInstanceUp { + /** Gets the module whose instances are represented by this API node. */ + string getType() { this = Impl::MkInstanceUp(result) } + + override string toString() { result = "ModuleInstanceUp(" + this.getType() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its descendents. */ + private class InstanceDownNode extends Node, Impl::MkInstanceDown { + /** Gets the module whose instances are represented by this API node. */ + string getType() { this = Impl::MkInstanceDown(result) } + + override string toString() { result = "ModuleInstanceDown(" + this.getType() + ")" } + } + + /** A node corresponding to the method being invoked at a method call. */ + class MethodAccessNode extends Node, Impl::MkMethodAccessNode { + override string toString() { result = "MethodAccessNode(" + this.asCall() + ")" } + } + + /** + * A node corresponding to an argument, right-hand side of a store, or return value from a callable. + * + * Such a node may serve as the starting-point of backtracking, and has epsilon edges going to + * the backward nodes corresponding to `getALocalSource`. + */ + private class SinkNode extends Node, Impl::MkSinkNode { + override string toString() { result = "SinkNode(" + this.getInducingNode() + ")" } + } + + /** + * An API entry point. + * + * By default, API graph nodes are only created for nodes that come from an external + * library or escape into an external library. The points where values are cross the boundary + * between codebases are called "entry points". + * + * Anything in the global scope is considered to be an entry point, but + * additional entry points may be added by extending this class. + */ + abstract class EntryPoint extends string { + // Note: this class can be deprecated in Ruby, but is still referenced by shared code in ApiGraphModels.qll, + // where it can't be removed since other languages are still dependent on the EntryPoint class. + bindingset[this] + EntryPoint() { any() } + + /** Gets a data-flow node corresponding to a use-node for this entry point. */ + DataFlow::LocalSourceNode getASource() { none() } + + /** Gets a data-flow node corresponding to a def-node for this entry point. */ + DataFlow::Node getASink() { none() } + + /** Gets a call corresponding to a method access node for this entry point. */ + DataFlow::CallNode getACall() { none() } + + /** Gets an API-node for this entry point. */ + API::Node getANode() { Impl::entryPointEdge(this, result) } + } + + // Ensure all entry points are imported from ApiGraphs.qll + private module ImportEntryPoints { + private import semmle.code.powershell.frameworks.data.ModelsAsData + } + + /** Gets the root node. */ + Node root() { result instanceof RootNode } + + /** + * Gets the node that represents the module with qualified + * name `qualifiedModule`. + */ + ModuleNode mod(string qualifiedModule) { result = Impl::MkModule(qualifiedModule) } + + /** + * Gets the node that represents the type with qualified + * name `qualifiedType`. + */ + TypeNode type(string qualifiedType) { result = Impl::MkType(qualifiedType) } + + /** + * Gets an unqualified call at the top-level with the given method name. + */ + pragma[inline] + MethodAccessNode getTopLevelCall(string name) { Impl::toplevelCall(name, result) } + + pragma[nomagic] + private predicate isReachable(DataFlow::LocalSourceNode node, TypeTracker t) { + t.start() and exists(node) + or + exists(DataFlow::LocalSourceNode prev, TypeTracker t2 | + isReachable(prev, t2) and + node = prev.track(t2, t) + ) + } + + private module SharedArg implements ApiGraphSharedSig { + class ApiNode = Node; + + ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkForwardNode(node, t) + } + + ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkBackwardNode(node, t) + } + + ApiNode getSinkNode(DataFlow::Node node) { result = Impl::MkSinkNode(node) } + + pragma[nomagic] + predicate specificEpsilonEdge(ApiNode pred, ApiNode succ) { none() } + } + + /** INTERNAL USE ONLY. */ + module Internal { + private module MkShared = ApiGraphShared; + + import MkShared + } + + private import Internal + import Internal::Public + + cached + private module Impl { + cached + newtype TApiNode = + /** The root of the API graph. */ + MkRoot() or + /** The method accessed at `call`, synthetically treated as a separate object. */ + MkMethodAccessNode(DataFlow::CallNode call) or + MkModule(string qualifiedModule) { + any(UsingStmt using).getName() = qualifiedModule + or + any(Cmd cmd).getQualifiedCommandName() = qualifiedModule + or + any(ModuleManifest manifest).getModuleName() = qualifiedModule + } or + MkType(string qualifiedType) { any(ConstantValue cv).asString() = qualifiedType } or // TODO + /** Instances of `mod` with epsilon edges to its ancestors. */ + MkInstanceUp(string qualifiedType) { exists(MkType(qualifiedType)) } or + /** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */ + MkInstanceDown(string qualifiedType) { exists(MkType(qualifiedType)) } or + /** Intermediate node for following forward data flow. */ + MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + /** Intermediate node for following backward data flow. */ + MkBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + MkSinkNode(DataFlow::Node node) { needsSinkNode(node) } + + private predicate needsSinkNode(DataFlow::Node node) { + node instanceof DataFlowPrivate::ArgumentNode + or + TypeTrackingInput::storeStep(node, _, _) + or + node = any(DataFlow::CallableNode callable).getAReturnNode() + or + node = any(EntryPoint e).getASink() + } + + bindingset[e] + pragma[inline_late] + private DataFlow::Node getNodeFromExpr(Expr e) { result.asExpr().getExpr() = e } + + cached + predicate toplevelCall(string name, Node node) { + exists(DataFlow::CallNode call | + call.asExpr().getExpr().getEnclosingScope() instanceof TopLevel and + call.getName() = name and + node = MkMethodAccessNode(call) + ) + } + + cached + predicate callEdge(Node pred, string name, Node succ) { + exists(DataFlow::CallNode call | + // from receiver to method call node + pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier())) and + succ = MkMethodAccessNode(call) and + name = call.getName() + ) + } + + cached + predicate typeEdge(Node pred, string name, Node succ) { + exists(ModuleNode mod | + pred = mod and + succ = mod.getType(name) + ) + } + + cached + predicate memberEdge(Node pred, string name, Node succ) { + exists(MemberExpr member | succ = getForwardStartNode(getNodeFromExpr(member)) | + pred = getForwardEndNode(getALocalSourceStrict(getNodeFromExpr(member.getQualifier()))) and + name = member.getMemberName() + ) + } + + cached + predicate methodEdge(Node pred, string name, Node succ) { + exists(DataFlow::CallNode call | succ = MkMethodAccessNode(call) and name = call.getName() | + pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier())) + or + exists(string qualifiedModule, ModuleManifest manifest | + pred = mod(qualifiedModule) and + manifest.getModuleName() = qualifiedModule + | + manifest.getACmdLetToExport() = name + or + manifest.getAFunctionToExport() = name + ) + ) + } + + cached + predicate asCallable(Node apiNode, DataFlow::CallableNode callable) { + apiNode = getBackwardStartNode(callable) + } + + cached + predicate contentEdge(Node pred, DataFlow::Content content, Node succ) { + exists(DataFlow::Node object, DataFlow::Node value, DataFlow::ContentSet c | + TypeTrackingInput::loadStep(object, value, c) and + content = c.getAStoreContent() and + // `x -> x.foo` with content "foo" + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = getForwardStartNode(value) + or + // Based on `object.c = value` generate `object -> value` with content `c` + TypeTrackingInput::storeStep(value, object, c) and + content = c.getAStoreContent() and + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = MkSinkNode(value) + ) + } + + cached + predicate fieldEdge(Node pred, string name, Node succ) { + Impl::contentEdge(pred, DataFlowPrivate::TFieldContent(name), succ) + } + + cached + predicate elementEdge(Node pred, Node succ) { + contentEdge(pred, any(DataFlow::ContentSet set | set.isAnyElement()).getAReadContent(), succ) + } + + cached + predicate parameterEdge(Node pred, DataFlowDispatch::ParameterPosition paramPos, Node succ) { + exists(DataFlowPrivate::ParameterNodeImpl parameter, DataFlow::CallableNode callable | + parameter.isSourceParameterOf(callable.asCallableAstNode(), paramPos) and + pred = getBackwardEndNode(callable) and + succ = getForwardStartNode(parameter) + ) + } + + cached + predicate argumentEdge(Node pred, DataFlowDispatch::ArgumentPosition argPos, Node succ) { + exists(DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument | + argument.sourceArgumentOf(call.asExpr(), argPos) and + pred = MkMethodAccessNode(call) and + succ = MkSinkNode(argument) + ) + } + + cached + predicate positionalArgumentEdge(Node pred, int n, Node succ) { + argumentEdge(pred, + any(DataFlowDispatch::ArgumentPosition pos | + pos.isPositional(n, DataFlowPrivate::emptyNamedSet()) + ), succ) + } + + cached + predicate keywordArgumentEdge(Node pred, string name, Node succ) { + argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name)), succ) + } + + private predicate positionalParameterEdge(Node pred, int n, Node succ) { + parameterEdge(pred, + any(DataFlowDispatch::ParameterPosition pos | + pos.isPositional(n, DataFlowPrivate::emptyNamedSet()) + ), succ) + } + + private predicate keywordParameterEdge(Node pred, string name, Node succ) { + parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isKeyword(name)), succ) + } + + cached + predicate positionalParameterOrArgumentEdge(Node pred, int n, Node succ) { + positionalArgumentEdge(pred, n, succ) + or + positionalParameterEdge(pred, n, succ) + } + + cached + predicate keywordParameterOrArgumentEdge(Node pred, string name, Node succ) { + keywordArgumentEdge(pred, name, succ) + or + keywordParameterEdge(pred, name, succ) + } + + cached + predicate instanceEdge(Node pred, Node succ) { + exists(string qualifiedType | pred = MkType(qualifiedType) | + exists(DataFlow::ObjectCreationNode objCreation | + objCreation.getConstructedTypeName() = qualifiedType and + succ = getForwardStartNode(objCreation) + ) + or + exists(DataFlow::ParameterNode p | + p.getParameter().getStaticType() = qualifiedType and + succ = getForwardStartNode(p) + ) + ) + } + + cached + predicate returnEdge(Node pred, Node succ) { + exists(DataFlow::CallNode call | + pred = MkMethodAccessNode(call) and + succ = getForwardStartNode(call) + ) + or + exists(DataFlow::CallableNode callable | + pred = getBackwardEndNode(callable) and + succ = MkSinkNode(callable.getAReturnNode()) + ) + } + + cached + predicate entryPointEdge(EntryPoint entry, Node node) { + node = MkSinkNode(entry.getASink()) or + node = getForwardStartNode(entry.getASource()) or + node = MkMethodAccessNode(entry.getACall()) + } + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/Frameworks.qll b/powershell/ql/lib/semmle/code/powershell/Frameworks.qll index e69de29bb2d..19c46aa64da 100644 --- a/powershell/ql/lib/semmle/code/powershell/Frameworks.qll +++ b/powershell/ql/lib/semmle/code/powershell/Frameworks.qll @@ -0,0 +1,4 @@ +/** + * Helper file that imports all framework modeling. + */ + diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll new file mode 100644 index 00000000000..aec65d1819c --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll @@ -0,0 +1,634 @@ +/** + * INTERNAL use only. This is an experimental API subject to change without notice. + * + * Provides classes and predicates for dealing with flow models specified in extensible predicates. + * + * The extensible predicates have the following columns: + * - Sources: + * `type, path, kind` + * - Sinks: + * `type, path, kind` + * - Summaries: + * `type, path, input, output, kind` + * - Types: + * `type1, type2, path` + * + * The interpretation of a row is similar to API-graphs with a left-to-right + * reading. + * 1. The `type` column selects all instances of a named type. The syntax of this column is language-specific. + * The language defines some type names that the analysis knows how to identify without models. + * It can also be a synthetic type name defined by a type definition (see type definitions below). + * 2. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `type`. + * + * Every language supports the following tokens: + * - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on. + * - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * - ReturnValue: the value returned by a function call + * - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * + * The following tokens are common and should be implemented for languages where it makes sense: + * - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names. + * - Instance: an instance of a class + * - Subclass: a subclass of a class + * - ArrayElement: an element of array + * - Element: an element of a collection-like object + * - MapKey: a key in map-like object + * - MapValue: a value in a map-like object + * - Awaited: the value from a resolved promise/future-like object + * + * For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported. + * + * 3. The `input` and `output` columns specify how data enters and leaves the element selected by the + * first `(type, path)` tuple. Both strings are `.`-separated access paths + * of the same syntax as the `path` column. + * 4. The `kind` column is a tag that can be referenced from QL to determine to + * which classes the interpreted elements should be added. For example, for + * sources `"remote"` indicates a default remote flow source, and for summaries + * `"taint"` indicates a default additional taint step and `"value"` indicates a + * globally applicable value-preserving step. + * + * ### Types + * + * A type row of form `type1; type2; path` indicates that `type2; path` + * should be seen as an instance of the type `type1`. + * + * A type may refer to a static type or a synthetic type name used internally in the model. + * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same + * element. + * See `ModelsAsData.qll` for the language-specific interpretation of type names. + * + * By convention, if one wants to avoid clashes with static types, the type name + * should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that + * the type is not intended to match a static type. + */ + +private import codeql.util.Unit +private import ApiGraphModelsSpecific as Specific + +private module API = Specific::API; + +private module DataFlow = Specific::DataFlow; + +private import semmle.code.powershell.controlflow.CfgNodes +private import ApiGraphModelsExtensions as Extensions +private import codeql.dataflow.internal.AccessPathSyntax + +/** Module containing hooks for providing input data to be interpreted as a model. */ +module ModelInput { + /** + * A unit class for adding additional type model rows from CodeQL models. + */ + class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + + /** + * Gets a data-flow node that is a source of the given `type`. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * This must not depend on API graphs, but ensures that an API node is generated for + * the source. + */ + DataFlow::Node getASource(string type) { none() } + + /** + * Gets a data-flow node that is a sink of the given `type`, + * usually because it is an argument passed to a parameter of that type. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * This must not depend on API graphs, but ensures that an API node is generated for + * the sink. + */ + DataFlow::Node getASink(string type) { none() } + + /** + * Gets an API node that is a source or sink of the given `type`. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * Unlike `getASource` and `getASink`, this may depend on API graphs. + */ + API::Node getAnApiNode(string type) { none() } + } +} + +private import ModelInput + +/** + * An empty class, except in specific tests. + * + * If this is non-empty, all models are parsed even if the type name is not + * considered relevant for the current database. + */ +abstract class TestAllModels extends Unit { } + +/** Holds if a source model exists for the given parameters. */ +predicate sourceModel(string type, string path, string kind, string model) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sourceModel(type, path, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if a sink model exists for the given parameters. */ +private predicate sinkModel(string type, string path, string kind, string model) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sinkModel(type, path, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if a summary model `row` exists for the given parameters. */ +private predicate summaryModel( + string type, string path, string input, string output, string kind, string model +) { + exists(QlBuiltins::ExtensionId madId | + Extensions::summaryModel(type, path, input, output, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if `(type2, path)` should be seen as an instance of `type1`. */ +predicate typeModel(string type1, string type2, string path) { + Extensions::typeModel(type1, type2, path) +} + +/** Holds if a type variable model exists for the given parameters. */ +private predicate typeVariableModel(string name, string path) { + Extensions::typeVariableModel(name, path) +} + +/** + * Holds if the given extension tuple `madId` should pretty-print as `model`. + * + * This predicate should only be used in tests. + */ +predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { + exists(string type, string path, string kind | + Extensions::sourceModel(type, path, kind, madId) and + model = "Source: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string kind | + Extensions::sinkModel(type, path, kind, madId) and + model = "Sink: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string input, string output, string kind | + Extensions::summaryModel(type, path, input, output, kind, madId) and + model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind + ) +} + +/** + * Holds if rows involving `type` might be relevant for the analysis of this database. + */ +predicate isRelevantType(string type) { + ( + sourceModel(type, _, _, _) or + sinkModel(type, _, _, _) or + summaryModel(type, _, _, _, _, _) or + typeModel(_, type, _) + ) and + ( + Specific::isTypeUsed(type) + or + any(TypeModel model).isTypeUsed(type) + or + exists(TestAllModels t) + ) + or + exists(string other | isRelevantType(other) | + typeModel(type, other, _) + or + Specific::hasImplicitTypeModel(type, other) + ) +} + +/** + * Holds if `type,path` is used in some row. + */ +pragma[nomagic] +predicate isRelevantFullPath(string type, string path) { + isRelevantType(type) and + ( + sourceModel(type, path, _, _) or + sinkModel(type, path, _, _) or + summaryModel(type, path, _, _, _, _) or + typeModel(_, type, path) + ) +} + +/** A string from a row that should be parsed as an access path. */ +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _, _) or + summaryModel(type, _, _, s, _, _) + ) + or + typeVariableModel(_, s) +} + +import AccessPath + +/** + * Gets a successor of `node` in the API graph. + */ +bindingset[token] +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { + // API graphs use the same label for arguments and parameters. An edge originating from a + // use-node represents an argument, and an edge originating from a def-node represents a parameter. + // We just map both to the same thing. + token.getName() = ["Argument", "Parameter"] and + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) + or + token.getName() = "ReturnValue" and + result = node.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromNode(node, token) +} + +/** + * Gets an API-graph successor for the given invocation. + */ +bindingset[token] +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { + token.getName() = "Argument" and + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + or + token.getName() = "ReturnValue" and + result = invoke.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromInvoke(invoke, token) +} + +/** + * Holds if `invoke` invokes a call-site filter given by `token`. + */ +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { + token.getName() = "WithArity" and + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) + or + Specific::invocationMatchesExtraCallSiteFilter(invoke, token) +} + +private class TypeModelUseEntry extends API::EntryPoint { + private string type; + + TypeModelUseEntry() { + exists(any(TypeModel tm).getASource(type)) and + this = "TypeModelUseEntry;" + type + } + + override DataFlow::LocalSourceNode getASource() { result = any(TypeModel tm).getASource(type) } + + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } +} + +private class TypeModelDefEntry extends API::EntryPoint { + private string type; + + TypeModelDefEntry() { + exists(any(TypeModel tm).getASink(type)) and + this = "TypeModelDefEntry;" + type + } + + override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(type) } + + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } +} + +/** + * Gets an API node identified by the given `type`. + */ +pragma[nomagic] +private API::Node getNodeFromType(string type) { + exists(string type2, AccessPath path2 | + typeModel(type, type2, path2) and + result = getNodeFromPath(type2, path2) + ) + or + result = any(TypeModelUseEntry e).getNodeForType(type) + or + result = any(TypeModelDefEntry e).getNodeForType(type) + or + result = any(TypeModel t).getAnApiNode(type) + or + result = Specific::getExtraNodeFromType(type) +} + +/** + * Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple. + */ +pragma[nomagic] +API::Node getNodeFromPath(string type, AccessPath path, int n) { + isRelevantFullPath(type, path) and + ( + n = 0 and + result = getNodeFromType(type) + or + result = Specific::getExtraNodeFromPath(type, path, n) + ) + or + result = getSuccessorFromNode(getNodeFromPath(type, path, n - 1), path.getToken(n - 1)) + or + // Similar to the other recursive case, but where the path may have stepped through one or more call-site filters + result = getSuccessorFromInvoke(getInvocationFromPath(type, path, n - 1), path.getToken(n - 1)) + or + // Apply a subpath + result = getNodeFromSubPath(getNodeFromPath(type, path, n - 1), getSubPathAt(path, n - 1)) + or + // Apply a type step + typeStep(getNodeFromPath(type, path, n), result) + or + // Apply a fuzzy step (without advancing 'n') + path.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + path.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromPath(type, path, n - 1) +} + +/** + * Gets a subpath for the `TypeVar` token found at the `n`th token of `path`. + */ +pragma[nomagic] +private AccessPath getSubPathAt(AccessPath path, int n) { + exists(string typeVarName | + path.getToken(n).getAnArgument("TypeVar") = typeVarName and + typeVariableModel(typeVarName, result) + ) +} + +/** + * Gets a node that is found by evaluating the first `n` tokens of `subPath` starting at `base`. + */ +pragma[nomagic] +private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) { + exists(AccessPath path, int k | + base = [getNodeFromPath(_, path, k), getNodeFromSubPath(_, path, k)] and + subPath = getSubPathAt(path, k) and + result = base and + n = 0 + ) + or + exists(string type, AccessPath basePath | + typeStepModel(type, basePath, subPath) and + base = getNodeFromPath(type, basePath) and + result = base and + n = 0 + ) + or + result = getSuccessorFromNode(getNodeFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1)) + or + result = + getSuccessorFromInvoke(getInvocationFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1)) + or + result = + getNodeFromSubPath(getNodeFromSubPath(base, subPath, n - 1), getSubPathAt(subPath, n - 1)) + or + typeStep(getNodeFromSubPath(base, subPath, n), result) and + // Only apply type-steps strictly between the steps on the sub path, not before and after. + // Steps before/after lead to unnecessary transitive edges, which the user of the sub-path + // will themselves find by following type-steps. + n > 0 and + n < subPath.getNumToken() + or + // Apply a fuzzy step (without advancing 'n') + subPath.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + subPath.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromSubPath(base, subPath, n - 1) +} + +/** + * Gets a call site that is found by evaluating the first `n` tokens of `subPath` starting at `base`. + */ +private Specific::InvokeNode getInvocationFromSubPath(API::Node base, AccessPath subPath, int n) { + result = Specific::getAnInvocationOf(getNodeFromSubPath(base, subPath, n)) + or + result = getInvocationFromSubPath(base, subPath, n - 1) and + invocationMatchesCallSiteFilter(result, subPath.getToken(n - 1)) +} + +/** + * Gets a node that is found by evaluating `subPath` starting at `base`. + */ +pragma[nomagic] +private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath) { + result = getNodeFromSubPath(base, subPath, subPath.getNumToken()) +} + +/** Gets the node identified by the given `(type, path)` tuple. */ +private API::Node getNodeFromPath(string type, AccessPath path) { + result = getNodeFromPath(type, path, path.getNumToken()) +} + +pragma[nomagic] +private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) { + summaryModel(type, basePath, "", output, "type", _) +} + +pragma[nomagic] +private predicate typeStep(API::Node pred, API::Node succ) { + exists(string type, AccessPath basePath, AccessPath output | + typeStepModel(type, basePath, output) and + pred = getNodeFromPath(type, basePath) and + succ = getNodeFromSubPath(pred, output) + ) +} + +/** + * Gets an invocation identified by the given `(type, path)` tuple. + * + * Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters. + */ +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path, int n) { + result = Specific::getAnInvocationOf(getNodeFromPath(type, path, n)) + or + result = getInvocationFromPath(type, path, n - 1) and + invocationMatchesCallSiteFilter(result, path.getToken(n - 1)) +} + +/** Gets an invocation identified by the given `(type, path)` tuple. */ +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) { + result = getInvocationFromPath(type, path, path.getNumToken()) +} + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +private predicate isValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"] + or + Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occurring + * in an identifying access path. + */ +bindingset[name] +private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = ["ReturnValue", "Fuzzy"] + or + Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +private predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Argument", "Parameter"] and + argument.regexpMatch("(N-|-)?\\d+(\\.\\.((N-|-)?\\d+)?)?") + or + name = "WithArity" and + argument.regexpMatch("\\d+(\\.\\.(\\d+)?)?") + or + name = "TypeVar" and + exists(argument) + or + Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument) +} + +/** + * Module providing access to the imported models in terms of API graph nodes. + */ +module ModelOutput { + cached + private module Cached { + /** + * Holds if a source model contributed `source` with the given `kind`. + */ + cached + API::Node getASourceNode(string kind, string model) { + exists(string type, string path | + sourceModel(type, path, kind, model) and + result = getNodeFromPath(type, path) + ) + } + + /** + * Holds if a sink model contributed `sink` with the given `kind`. + */ + cached + API::Node getASinkNode(string kind, string model) { + exists(string type, string path | + sinkModel(type, path, kind, model) and + result = getNodeFromPath(type, path) + ) + } + + /** + * Holds if a relevant summary exists for these parameters. + */ + cached + predicate relevantSummaryModel( + string type, string path, string input, string output, string kind, string model + ) { + isRelevantType(type) and + summaryModel(type, path, input, output, kind, model) + } + + /** + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) { + summaryModel(type, path, _, _, _, _) and + baseNode = getInvocationFromPath(type, path) + } + + /** + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + + /** + * Holds if `node` is seen as an instance of `type` due to a type definition + * contributed by a model. + */ + cached + API::Node getATypeNode(string type) { result = getNodeFromType(type) } + } + + import Cached + import Specific::ModelOutputSpecific + private import codeql.mad.ModelValidation as SharedModelVal + + /** + * Holds if a CSV source model contributed `source` with the given `kind`. + */ + API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) } + + /** + * Holds if a CSV sink model contributed `sink` with the given `kind`. + */ + API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) } + + private module KindValConfig implements SharedModelVal::KindValidationConfigSig { + predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) } + + predicate sinkKind(string kind) { sinkModel(_, _, kind, _) } + + predicate sourceKind(string kind) { sourceModel(_, _, kind, _) } + } + + private module KindVal = SharedModelVal::KindValidation; + + /** + * Gets an error message relating to an invalid CSV row in a model. + */ + string getAWarning() { + // Check names and arguments of access path tokens + exists(AccessPath path, AccessPathToken token | + (isRelevantFullPath(_, path) or typeVariableModel(_, path)) and + token = path.getToken(_) + | + not isValidTokenNameInIdentifyingAccessPath(token.getName()) and + result = "Invalid token name '" + token.getName() + "' in access path: " + path + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + exists(string argument | + argument = token.getAnArgument() and + not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and + result = + "Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path + ) + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + token.getNumArgument() = 0 and + not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and + result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path + ) + or + // Check for invalid model kinds + result = KindVal::getInvalidModelKind() + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll new file mode 100644 index 00000000000..d4f61ab4d4a --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -0,0 +1,201 @@ +/** + * Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`. + * + * It must export the following members: + * ```ql + * class Unit // a unit type + * class InvokeNode // a type representing an invocation connected to the API graph + * module API // the API graph module + * predicate isPackageUsed(string package) + * API::Node getExtraNodeFromPath(string package, string type, string path, int n) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) + * InvokeNode getAnInvocationOf(API::Node node) + * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) + * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) + * predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) + * ``` + */ + +private import powershell +private import ApiGraphModels +private import semmle.code.powershell.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax +// Re-export libraries needed by ApiGraphModels.qll +import semmle.code.powershell.ApiGraphs +import semmle.code.powershell.dataflow.DataFlow::DataFlow as DataFlow +private import FlowSummaryImpl::Public +private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch + +bindingset[rawType] +predicate isTypeUsed(string rawType) { any() } + +bindingset[rawType] +private predicate parseType(string rawType, string mod, string type) { + exists(string regexp | + regexp = "(.+)\\.([^\\.]+)" and + mod = rawType.regexpCapture(regexp, 1) and + type = rawType.regexpCapture(regexp, 2) + ) +} + +private predicate parseRelevantType(string rawType, string consts, string suffix) { + isRelevantType(rawType) and + parseType(rawType, consts, suffix) +} + +/** + * Holds if `type` can be obtained from an instance of `otherType` due to + * language semantics modeled by `getExtraNodeFromType`. + */ +bindingset[otherType] +predicate hasImplicitTypeModel(string type, string otherType) { none() } + +/** Gets a Powershell-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */ +bindingset[type, path] +API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { + // A row of form `any;Method[foo]` should match any method named `foo`. + type = "any" and + n = 1 and + exists(string methodName, DataFlow::CallNode call | + methodMatchedByName(path, methodName) and + call.getName() = methodName and + result.(API::MethodAccessNode).asCall() = call + ) +} + +/** Gets a Powershell-specific interpretation of the given `type`. */ +API::Node getExtraNodeFromType(string qualifiedType) { + qualifiedType = "" and + result = API::root() + or + // TODO: How to distinguish between these two cases? And do we need to? + exists(string mod, string type | parseRelevantType(qualifiedType, mod, type) | + result = API::mod(qualifiedType) + or + result = API::mod(mod).getType(type) + ) +} + +/** + * Holds if `path` occurs in a CSV row with type `any`, meaning it can start + * matching anywhere, and the path begins with `Method[methodName]`. + */ +private predicate methodMatchedByName(AccessPath path, string methodName) { + isRelevantFullPath("any", path) and + exists(AccessPathToken token | + token = path.getToken(0) and + token.getName() = "Method" and + methodName = token.getAnArgument() + ) +} + +/** + * Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { + token.getName() = "Member" and + result = node.getMember(token.getAnArgument()) + or + token.getName() = "Method" and + result = node.getMethod(token.getAnArgument()) + or + token.getName() = "Instance" and + result = node.getInstance() + or + token.getName() = "Parameter" and + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + token.getAnArgument() = FlowSummaryImpl::Input::encodeArgumentPosition(argPos) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getParameterAtPosition(paramPos) + ) + or + exists(DataFlow::ContentSet contents | + token.getName() = FlowSummaryImpl::Input::encodeContent(contents, token.getAnArgument()) and + result = node.getContents(contents) + ) +} + +/** + * Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) { + token.getName() = "Argument" and + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + token.getAnArgument() = FlowSummaryImpl::Input::encodeParameterPosition(paramPos) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getArgumentAtPosition(argPos) + ) +} + +pragma[inline] +API::Node getAFuzzySuccessor(API::Node node) { + result = node.getMethod(_) + or + result = + node.getArgumentAtPosition(any(DataFlowDispatch::ArgumentPosition apos | not apos.isThis())) + or + result = + node.getParameterAtPosition(any(DataFlowDispatch::ParameterPosition ppos | not ppos.isThis())) + or + result = node.getReturn() + or + result = node.getAnElement() + or + result = node.getInstance() +} + +/** + * Holds if `invoke` matches the Powershell-specific call site filter in `token`. + */ +bindingset[token] +predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) { + none() +} + +/** An API graph node representing a method call. */ +class InvokeNode extends API::MethodAccessNode { + /** Gets the number of arguments to the call. */ + int getNumArgument() { result = this.asCall().getNumberOfArguments() } +} + +/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */ +InvokeNode getAnInvocationOf(API::Node node) { result = node } + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Member", "Method", "Instance", "WithBlock", "WithoutBlock", "Element", "Field"] +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occurring + * in an identifying access path. + */ +predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = ["Instance", "WithBlock", "WithoutBlock"] +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Member", "Method", "Element", "Field"] and + exists(argument) + or + name = ["Argument", "Parameter"] and + ( + argument = ["self", "lambda-self", "block", "any", "any-named"] + or + argument.regexpMatch("\\w+:") // keyword argument + ) +} + +module ModelOutputSpecific { }