Merge pull request #14560 from alexrford/rb/modgen

Ruby: add a query and script for autogenerating typeModel and summaryModel data extensions entries
This commit is contained in:
Alex Ford
2023-10-27 16:43:42 +01:00
committed by GitHub
6 changed files with 436 additions and 1 deletions

View File

@@ -185,7 +185,7 @@ module ApiGraphShared<ApiGraphSharedSig S> {
bindingset[sink]
pragma[inline_late]
Node getAValueReachingSinkInline(ApiNode sink) {
result = asSinkInline(getAnEpsilonSuccessorInline(sink))
backwardStartNode(result) = getAnEpsilonSuccessorInline(sink)
}
/**

View File

@@ -0,0 +1,35 @@
private import internal.Types
private import internal.Summaries
/**
* Holds if `(type2, path)` should be seen as an instance of `type1`.
*/
query predicate typeModel = Types::typeModel/3;
/**
* Holds if the value at `(type, path)` should be seen as a flow
* source of the given `kind`.
*
* The kind `remote` represents a general remote flow source.
*/
query predicate sourceModel(string type, string path, string kind) { none() }
/**
* Holds if the value at `(type, path)` should be seen as a sink
* of the given `kind`.
*/
query predicate sinkModel(string type, string path, string kind) { none() }
/**
* Holds if calls to `(type, path)`, the value referred to by `input`
* can flow to the value referred to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
query predicate summaryModel = Summaries::summaryModel/5;
/**
* Holds if `path` can be substituted for a token `TypeVar[name]`.
*/
query predicate typeVariableModel(string name, string path) { none() }

View File

@@ -0,0 +1,71 @@
/**
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
*/
private import ruby
private import codeql.ruby.ApiGraphs
private import codeql.ruby.TaintTracking
private import Util as Util
/**
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
*/
module Summaries {
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(DataFlow::MethodNode methodNode | methodNode.isPublic() |
Util::getAnyParameter(methodNode) = source
)
}
predicate isSink(DataFlow::Node sink) { sink = any(DataFlow::MethodNode m).getAReturnNode() }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module ValueFlow {
import DataFlow::Global<Config>
predicate summaryModel(string type, string path, string input, string output) {
exists(DataFlow::MethodNode methodNode, DataFlow::ParameterNode paramNode |
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
flow(paramNode, methodNode.getAReturnNode())
|
Util::pathToMethod(methodNode, type, path) and
input = Util::getArgumentPath(paramNode) and
output = "ReturnValue"
)
}
}
private module TaintFlow {
import TaintTracking::Global<Config>
predicate summaryModel(string type, string path, string input, string output) {
not ValueFlow::summaryModel(type, path, input, output) and
exists(DataFlow::MethodNode methodNode, DataFlow::ParameterNode paramNode |
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
flow(paramNode, methodNode.getAReturnNode())
|
Util::pathToMethod(methodNode, type, path) and
input = Util::getArgumentPath(paramNode) and
output = "ReturnValue"
)
}
}
/**
* Holds if in calls to `(type, path)`, the value referred to by `input`
* can flow to the value referred to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
predicate summaryModel(string type, string path, string input, string output, string kind) {
ValueFlow::summaryModel(type, path, input, output) and kind = "value"
or
TaintFlow::summaryModel(type, path, input, output) and kind = "taint"
}
}

View File

@@ -0,0 +1,46 @@
/**
* Contains predicates for generating `typeModel`s that contain typing
* information for API nodes.
*/
private import ruby
private import codeql.ruby.ApiGraphs
private import Util as Util
/**
* Contains predicates for generating `typeModel`s that contain typing
* information for API nodes.
*/
module Types {
/**
* Holds if `node` should be seen as having the given `type`.
*/
private predicate valueHasTypeName(DataFlow::LocalSourceNode node, string type) {
node.getLocation().getFile() instanceof Util::RelevantFile and
exists(DataFlow::ModuleNode mod |
(
node = mod.getAnImmediateReference().getAMethodCall("new")
or
node = mod.getAnOwnInstanceSelf()
) and
type = mod.getQualifiedName()
or
(
node = mod.getAnImmediateReference()
or
node = mod.getAnOwnModuleSelf()
) and
type = mod.getQualifiedName() + "!"
)
}
/**
* Holds if `(type2, path)` should be seen as an instance of `type1`.
*/
predicate typeModel(string type1, string type2, string path) {
exists(API::Node node |
valueHasTypeName(node.getAValueReachingSink(), type1) and
Util::pathToNode(node, type2, path, true)
)
}
}

View File

@@ -0,0 +1,137 @@
/**
* Contains utility methods and classes to assist with generating data extensions models.
*/
private import ruby
private import codeql.ruby.ApiGraphs
/**
* A file that is relevant in the context of library modeling.
*
* In practice, this means a file that is not part of test code.
*/
class RelevantFile extends File {
RelevantFile() { not this.getRelativePath().regexpMatch(".*/?test(case)?s?/.*") }
}
/**
* Gets an access path of an argument corresponding to the given `paramNode`.
*/
string getArgumentPath(DataFlow::ParameterNode paramNode) {
paramNode.getLocation().getFile() instanceof RelevantFile and
exists(string paramSpecifier |
exists(Ast::Parameter param |
param = paramNode.asParameter() and
(
paramSpecifier = param.getPosition().toString()
or
paramSpecifier = param.(Ast::KeywordParameter).getName() + ":"
or
param instanceof Ast::BlockParameter and
paramSpecifier = "block"
)
)
or
paramNode instanceof DataFlow::SelfParameterNode and paramSpecifier = "self"
|
result = "Argument[" + paramSpecifier + "]"
)
}
/**
* Holds if `(type,path)` evaluates to the given method, when evalauted from a client of the current library.
*/
predicate pathToMethod(DataFlow::MethodNode method, string type, string path) {
method.getLocation().getFile() instanceof RelevantFile and
exists(DataFlow::ModuleNode mod, string methodName |
method = mod.getOwnInstanceMethod(methodName) and
if methodName = "initialize"
then (
type = mod.getQualifiedName() + "!" and
path = "Method[new]"
) else (
type = mod.getQualifiedName() and
path = "Method[" + methodName + "]"
)
or
method = mod.getOwnSingletonMethod(methodName) and
type = mod.getQualifiedName() + "!" and
path = "Method[" + methodName + "]"
)
}
/**
* Gets any parameter to `methodNode`. This may be a positional, keyword,
* block, or self parameter.
*/
DataFlow::ParameterNode getAnyParameter(DataFlow::MethodNode methodNode) {
result =
[
methodNode.getParameter(_), methodNode.getKeywordParameter(_), methodNode.getBlockParameter(),
methodNode.getSelfParameter()
]
}
private predicate pathToNodeBase(API::Node node, string type, string path, boolean isOutput) {
exists(DataFlow::MethodNode method, string prevPath | pathToMethod(method, type, prevPath) |
isOutput = true and
node = method.getAReturnNode().backtrack() and
path = prevPath + ".ReturnValue" and
not method.getMethodName() = "initialize" // ignore return value of initialize method
or
isOutput = false and
exists(DataFlow::ParameterNode paramNode |
paramNode = getAnyParameter(method) and
node = paramNode.track()
|
path = prevPath + "." + getArgumentPath(paramNode)
)
)
}
private predicate pathToNodeRec(
API::Node node, string type, string path, boolean isOutput, int pathLength
) {
pathLength < 8 and
(
pathToNodeBase(node, type, path, isOutput) and
pathLength = 1
or
exists(API::Node prevNode, string prevPath, boolean prevIsOutput, int prevPathLength |
pathToNodeRec(prevNode, type, prevPath, prevIsOutput, prevPathLength) and
pathLength = prevPathLength + 1
|
node = prevNode.getAnElement() and
path = prevPath + ".Element" and
isOutput = prevIsOutput
or
node = prevNode.getReturn() and
path = prevPath + ".ReturnValue" and
isOutput = prevIsOutput
or
prevIsOutput = false and
isOutput = true and
(
exists(int n |
node = prevNode.getParameter(n) and
path = prevPath + ".Parameter[" + n + "]"
)
or
exists(string name |
node = prevNode.getKeywordParameter(name) and
path = prevPath + ".Parameter[" + name + ":]"
)
or
node = prevNode.getBlock() and
path = prevPath + ".Parameter[block]"
)
)
)
}
/**
* Holds if `(type,path)` evaluates to a value corresponding to `node`, when evaluated from a client of the current library.
*/
predicate pathToNode(API::Node node, string type, string path, boolean isOutput) {
pathToNodeRec(node, type, path, isOutput, _)
}