mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Ruby: add a query and script for autogenerating typeModel and summaryModel data extensions entries
This commit is contained in:
@@ -185,7 +185,7 @@ module ApiGraphShared<ApiGraphSharedSig S> {
|
||||
bindingset[sink]
|
||||
pragma[inline_late]
|
||||
Node getAValueReachingSinkInline(ApiNode sink) {
|
||||
result = asSinkInline(getAnEpsilonSuccessorInline(sink))
|
||||
backwardStartNode(result) = getAnEpsilonSuccessorInline(sink)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
35
ruby/ql/src/queries/modeling/GenerateModel.ql
Normal file
35
ruby/ql/src/queries/modeling/GenerateModel.ql
Normal file
@@ -0,0 +1,35 @@
|
||||
private import internal.Types
|
||||
private import internal.Summaries
|
||||
|
||||
/**
|
||||
* Holds if `(type2, path)` should be seen as an instance of `type1`.
|
||||
*/
|
||||
query predicate typeModel = Types::typeModel/3;
|
||||
|
||||
/**
|
||||
* Holds if the value at `(type, path)` should be seen as a flow
|
||||
* source of the given `kind`.
|
||||
*
|
||||
* The kind `remote` represents a general remote flow source.
|
||||
*/
|
||||
query predicate sourceModel(string type, string path, string kind) { none() }
|
||||
|
||||
/**
|
||||
* Holds if the value at `(type, path)` should be seen as a sink
|
||||
* of the given `kind`.
|
||||
*/
|
||||
query predicate sinkModel(string type, string path, string kind) { none() }
|
||||
|
||||
/**
|
||||
* Holds if calls to `(type, path)`, the value referred to by `input`
|
||||
* can flow to the value referred to by `output`.
|
||||
*
|
||||
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
|
||||
* respectively.
|
||||
*/
|
||||
query predicate summaryModel = Summaries::summaryModel/5;
|
||||
|
||||
/**
|
||||
* Holds if `path` can be substituted for a token `TypeVar[name]`.
|
||||
*/
|
||||
query predicate typeVariableModel(string name, string path) { none() }
|
||||
73
ruby/ql/src/queries/modeling/internal/Summaries.qll
Normal file
73
ruby/ql/src/queries/modeling/internal/Summaries.qll
Normal file
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
|
||||
*/
|
||||
|
||||
private import ruby
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import Util as Util
|
||||
|
||||
/**
|
||||
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
|
||||
*/
|
||||
module Summaries {
|
||||
private module Config implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node source) { source instanceof DataFlow::ParameterNode }
|
||||
|
||||
predicate isSink(DataFlow::Node sink) { sink = any(DataFlow::MethodNode m).getAReturnNode() }
|
||||
}
|
||||
|
||||
API::Node getAnyParameterNode(DataFlow::MethodNode methodNode) {
|
||||
result.asSource() =
|
||||
[
|
||||
methodNode.getParameter(_), methodNode.getKeywordParameter(_),
|
||||
methodNode.getBlockParameter(), methodNode.getSelfParameter()
|
||||
]
|
||||
}
|
||||
|
||||
private module ValueFlow {
|
||||
import DataFlow::Global<Config>
|
||||
|
||||
predicate summaryModel(string type, string path, string input, string output) {
|
||||
exists(DataFlow::MethodNode methodNode, API::Node paramNode |
|
||||
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
|
||||
paramNode.getAValueReachableFromSource() = methodNode.getAReturnNode() and
|
||||
paramNode = getAnyParameterNode(methodNode)
|
||||
|
|
||||
Util::pathToMethod(methodNode, type, path) and
|
||||
input = Util::getArgumentPath(paramNode.asSource()) and
|
||||
output = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private module TaintFlow {
|
||||
import TaintTracking::Global<Config>
|
||||
|
||||
predicate summaryModel(string type, string path, string input, string output) {
|
||||
not ValueFlow::summaryModel(type, path, input, output) and
|
||||
exists(DataFlow::MethodNode methodNode, API::Node paramNode |
|
||||
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
|
||||
flow(paramNode.asSource(), methodNode.getAReturnNode()) and
|
||||
paramNode = getAnyParameterNode(methodNode)
|
||||
|
|
||||
Util::pathToMethod(methodNode, type, path) and
|
||||
input = Util::getArgumentPath(paramNode.asSource()) and
|
||||
output = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if calls to `(type, path)`, the value referred to by `input`
|
||||
* can flow to the value referred to by `output`.
|
||||
*
|
||||
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
|
||||
* respectively.
|
||||
*/
|
||||
predicate summaryModel(string type, string path, string input, string output, string kind) {
|
||||
ValueFlow::summaryModel(type, path, input, output) and kind = "value"
|
||||
or
|
||||
TaintFlow::summaryModel(type, path, input, output) and kind = "taint"
|
||||
}
|
||||
}
|
||||
46
ruby/ql/src/queries/modeling/internal/Types.qll
Normal file
46
ruby/ql/src/queries/modeling/internal/Types.qll
Normal file
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* Contains predicates for generating `typeModel`s that contain typing
|
||||
* information for API nodes.
|
||||
*/
|
||||
|
||||
private import ruby
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import Util as Util
|
||||
|
||||
/**
|
||||
* Contains predicates for generating `typeModel`s that contain typing
|
||||
* information for API nodes.
|
||||
*/
|
||||
module Types {
|
||||
/**
|
||||
* Holds `node` should be seen as having the given `type`.
|
||||
*/
|
||||
private predicate valueHasTypeName(DataFlow::LocalSourceNode node, string type) {
|
||||
node.getLocation().getFile() instanceof Util::RelevantFile and
|
||||
exists(DataFlow::ModuleNode mod |
|
||||
(
|
||||
node = mod.getAnImmediateReference().getAMethodCall("new")
|
||||
or
|
||||
node = mod.getAnOwnInstanceSelf()
|
||||
) and
|
||||
type = mod.getQualifiedName()
|
||||
or
|
||||
(
|
||||
node = mod.getAnImmediateReference()
|
||||
or
|
||||
node = mod.getAnOwnModuleSelf()
|
||||
) and
|
||||
type = mod.getQualifiedName() + "!"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `(type2, path)` should be seen as an instance of `type1`.
|
||||
*/
|
||||
predicate typeModel(string type1, string type2, string path) {
|
||||
exists(API::Node node |
|
||||
valueHasTypeName(node.getAValueReachingSink(), type1) and
|
||||
Util::pathToNode(node, type2, path, true)
|
||||
)
|
||||
}
|
||||
}
|
||||
126
ruby/ql/src/queries/modeling/internal/Util.qll
Normal file
126
ruby/ql/src/queries/modeling/internal/Util.qll
Normal file
@@ -0,0 +1,126 @@
|
||||
/**
|
||||
* Contains utility methods and classes to assist with generating data extensions models.
|
||||
*/
|
||||
|
||||
private import ruby
|
||||
private import codeql.ruby.ApiGraphs
|
||||
|
||||
/**
|
||||
* A file that is relevant in the context of library modeling.
|
||||
*
|
||||
* In practice, this means a file that is not part of test code.
|
||||
*/
|
||||
class RelevantFile extends File {
|
||||
RelevantFile() { not this.getRelativePath().regexpMatch(".*/?test(case)?s?/.*") }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an access path of an argument corresponding to the given `paramNode`.
|
||||
*/
|
||||
string getArgumentPath(DataFlow::ParameterNode paramNode) {
|
||||
paramNode.getLocation().getFile() instanceof RelevantFile and
|
||||
exists(Ast::Parameter param, string paramSpecifier |
|
||||
param = paramNode.asParameter() and
|
||||
(
|
||||
paramSpecifier = param.getPosition().toString()
|
||||
or
|
||||
paramSpecifier = param.(Ast::KeywordParameter).getName() + ":"
|
||||
or
|
||||
param instanceof Ast::BlockParameter and
|
||||
paramSpecifier = "block"
|
||||
)
|
||||
|
|
||||
result = "Argument[" + paramSpecifier + "]"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `(type,path)` evaluates to the given method, when evalauted from a client of the current library.
|
||||
*/
|
||||
predicate pathToMethod(DataFlow::MethodNode method, string type, string path) {
|
||||
method.getLocation().getFile() instanceof RelevantFile and
|
||||
exists(DataFlow::ModuleNode mod, string methodName |
|
||||
method = mod.getOwnInstanceMethod(methodName) and
|
||||
if methodName = "initialize"
|
||||
then (
|
||||
type = mod.getQualifiedName() + "!" and
|
||||
path = "Method[new]"
|
||||
) else (
|
||||
type = mod.getQualifiedName() and
|
||||
path = "Method[" + methodName + "]"
|
||||
)
|
||||
or
|
||||
method = mod.getOwnSingletonMethod(methodName) and
|
||||
type = mod.getQualifiedName() + "!" and
|
||||
path = "Method[" + methodName + "]"
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets any parameter to `method`. This may be a positional, keyword, or block parameter. */
|
||||
private DataFlow::ParameterNode getAnyParameter(DataFlow::MethodNode method) {
|
||||
result = [method.getParameter(_), method.getKeywordParameter(_), method.getBlockParameter()]
|
||||
}
|
||||
|
||||
private predicate pathToNodeBase(API::Node node, string type, string path, boolean isOutput) {
|
||||
exists(DataFlow::MethodNode method, string prevPath | pathToMethod(method, type, prevPath) |
|
||||
isOutput = true and
|
||||
node = method.getAReturnNode().backtrack() and
|
||||
path = prevPath + ".ReturnValue" and
|
||||
not method.getMethodName() = "initialize" // ignore return value of initialize method
|
||||
or
|
||||
isOutput = false and
|
||||
exists(DataFlow::ParameterNode paramNode |
|
||||
paramNode = getAnyParameter(method) and
|
||||
node = paramNode.track()
|
||||
|
|
||||
path = prevPath + "." + getArgumentPath(paramNode)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate pathToNodeRec(
|
||||
API::Node node, string type, string path, boolean isOutput, int pathLength
|
||||
) {
|
||||
pathLength < 8 and
|
||||
(
|
||||
pathToNodeBase(node, type, path, isOutput) and
|
||||
pathLength = 1
|
||||
or
|
||||
exists(API::Node prevNode, string prevPath, boolean prevIsOutput, int prevPathLength |
|
||||
pathToNodeRec(prevNode, type, prevPath, prevIsOutput, prevPathLength) and
|
||||
pathLength = prevPathLength + 1
|
||||
|
|
||||
node = prevNode.getAnElement() and
|
||||
path = prevPath + ".Element" and
|
||||
isOutput = prevIsOutput
|
||||
or
|
||||
node = prevNode.getReturn() and
|
||||
path = prevPath + ".ReturnValue" and
|
||||
isOutput = prevIsOutput
|
||||
or
|
||||
prevIsOutput = false and
|
||||
isOutput = true and
|
||||
(
|
||||
exists(int n |
|
||||
node = prevNode.getParameter(n) and
|
||||
path = prevPath + ".Parameter[" + n + "]"
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
node = prevNode.getKeywordParameter(name) and
|
||||
path = prevPath + ".Parameter[" + name + ":]"
|
||||
)
|
||||
or
|
||||
node = prevNode.getBlock() and
|
||||
path = prevPath + ".Parameter[block]"
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `(type,path)` evaluates to a value corresponding to `node`, when evaluated from a client of the current library.
|
||||
*/
|
||||
predicate pathToNode(API::Node node, string type, string path, boolean isOutput) {
|
||||
pathToNodeRec(node, type, path, isOutput, _)
|
||||
}
|
||||
146
ruby/scripts/generate_model.py
Executable file
146
ruby/scripts/generate_model.py
Executable file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# This script generates a data extensions model for a given library in codeql database form
|
||||
# Currently only typeModels and summaryModels are generated
|
||||
# Requires `pyyaml`
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import json
|
||||
import yaml
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generates a data extensions model from a Ruby CodeQL database"
|
||||
)
|
||||
parser.add_argument("database_path", help="filepath to a Ruby CodeQL database")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
required=False,
|
||||
metavar="output_file",
|
||||
help="if provided, the model will be written to this file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--codeql",
|
||||
required=False,
|
||||
metavar="codeql_command",
|
||||
default="codeql",
|
||||
help="if provided, use this command to invoke codeql",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-w",
|
||||
"--overwrite",
|
||||
action="store_true",
|
||||
help="if provided, use this command to invoke codeql",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def die(msg):
|
||||
sys.stderr.write("Error: " + msg + "\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
output_path = args.output
|
||||
check_output_path(output_path, args.overwrite)
|
||||
|
||||
database_path = Path(args.database_path).absolute()
|
||||
check_database_exists(database_path)
|
||||
|
||||
codeql_command = args.codeql.split(" ")
|
||||
with tempfile.NamedTemporaryFile() as query_output_json_file:
|
||||
run_codeql_query(codeql_command, database_path, query_output_json_file)
|
||||
generate_output(query_output_json_file, output_path)
|
||||
|
||||
|
||||
def check_output_path(output_path, overwrite):
|
||||
if output_path == None:
|
||||
return # STDOUT
|
||||
p = Path(output_path).absolute()
|
||||
if p.is_file() and not overwrite:
|
||||
die("file already exists at: " + str(p))
|
||||
elif p.is_dir():
|
||||
die("specified output path is a directory: " + str(p))
|
||||
|
||||
|
||||
def check_database_exists(database_path):
|
||||
if not database_path.exists():
|
||||
die("database not found at: " + str(database_path))
|
||||
elif not database_path.is_dir():
|
||||
die("database not found at: " + str(database_path) + " - not a directory")
|
||||
elif not database_path.joinpath("db-ruby").exists():
|
||||
die("directory: " + str(database_path) + " doesn't look like a Ruby database")
|
||||
|
||||
|
||||
def run_codeql_query(codeql_command, database_path, query_output_json_file):
|
||||
query_path = (
|
||||
Path(__file__)
|
||||
.parent.parent.joinpath("ql/src/queries/modeling/GenerateModel.ql")
|
||||
.absolute()
|
||||
)
|
||||
with tempfile.NamedTemporaryFile() as bqrs_file:
|
||||
subprocess.run(
|
||||
codeql_command
|
||||
+ ["query", "run", "-d", database_path, "-o", bqrs_file.name, query_path]
|
||||
)
|
||||
subprocess.run(
|
||||
codeql_command
|
||||
+ [
|
||||
"bqrs",
|
||||
"decode",
|
||||
"--format",
|
||||
"json",
|
||||
"--output",
|
||||
query_output_json_file.name,
|
||||
bqrs_file.name,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def generate_output(query_output_json_file, output_path):
|
||||
output_string = serialize_output(query_output_json_file)
|
||||
if not output_path == None:
|
||||
Path(output_path).write_text(output_string)
|
||||
else:
|
||||
print(output_string)
|
||||
|
||||
|
||||
def model_kinds():
|
||||
return [
|
||||
"typeModel",
|
||||
"sourceModel",
|
||||
"sinkModel",
|
||||
"summaryModel",
|
||||
"typeVariableModel",
|
||||
]
|
||||
|
||||
|
||||
def serialize_output(query_output_json_file):
|
||||
parsed_json = json.load(query_output_json_file)
|
||||
serialized_tuples = []
|
||||
for extensible_type in model_kinds():
|
||||
if not extensible_type in parsed_json:
|
||||
continue
|
||||
tuples = parsed_json[extensible_type]["tuples"]
|
||||
if tuples:
|
||||
serialized_tuples.append(serialize_type(tuples, extensible_type))
|
||||
return yaml.dump({"extensions": serialized_tuples}, default_style='"')
|
||||
|
||||
|
||||
def serialize_type(tuples, extensible_type):
|
||||
return {
|
||||
"addsTo": {"pack": "codeql/ruby-all", "extensible": extensible_type},
|
||||
"data": tuples,
|
||||
}
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user