Files
codeql/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll
2023-12-10 11:25:43 +01:00

567 lines
21 KiB
Plaintext

/**
* INTERNAL use only. This is an experimental API subject to change without notice.
*
* Provides classes and predicates for dealing with MaD flow models specified
* in data extensions and CSV format.
*
* The CSV specification has the following columns:
* - Sources:
* `namespace; type; subtypes; name; signature; ext; output; kind; provenance`
* - Sinks:
* `namespace; type; subtypes; name; signature; ext; input; kind; provenance`
* - Summaries:
* `namespace; type; subtypes; name; signature; ext; input; output; kind; provenance`
* - Neutrals:
* `namespace; type; name; signature; kind; provenance`
* A neutral is used to indicate that a callable is neutral with respect to flow (no summary), source (is not a source) or sink (is not a sink).
*
* The interpretation of a row is similar to API-graphs with a left-to-right
* reading.
* 1. The `namespace` column selects a namespace.
* 2. The `type` column selects a type within that namespace.
* 3. The `subtypes` is a boolean that indicates whether to jump to an
* arbitrary subtype of that type.
* 4. The `name` column optionally selects a specific named member of the type.
* 5. The `signature` column optionally restricts the named member. If
* `signature` is blank then no such filtering is done. The format of the
* signature is a comma-separated list of types enclosed in parentheses. The
* types can be short names or fully qualified names (mixing these two options
* is not allowed within a single signature).
* 6. The `ext` column specifies additional API-graph-like edges. Currently
* there are only two valid values: "" and "Attribute". The empty string has no
* effect. "Attribute" applies if `name` and `signature` were left blank and
* acts by selecting an element that is attributed with the attribute type
* selected by the first 4 columns. This can be another member such as a field,
* property, method, or parameter.
* 7. The `input` column specifies how data enters the element selected by the
* first 6 columns, and the `output` column specifies how data leaves the
* element selected by the first 6 columns. For sinks, an `input` can be either "",
* "Argument[n]", "Argument[n1..n2]", or "ReturnValue":
* - "": Selects a write to the selected element in case this is a field or property.
* - "Argument[n]": Selects an argument in a call to the selected element.
* The arguments are zero-indexed, and `this` specifies the qualifier.
* - "Argument[n1..n2]": Similar to "Argument[n]" but select any argument in
* the given range. The range is inclusive at both ends.
* - "ReturnValue": Selects a value being returned by the selected element.
* This requires that the selected element is a method with a body.
*
* For sources, an `output` can be either "", "Argument[n]", "Argument[n1..n2]",
* "Parameter", "Parameter[n]", "Parameter[n1..n2]", or "ReturnValue":
* - "": Selects a read of a selected field, property, or parameter.
* - "Argument[n]": Selects the post-update value of an argument in a call to the
* selected element. That is, the value of the argument after the call returns.
* The arguments are zero-indexed, and `this` specifies the qualifier.
* - "Argument[n1..n2]": Similar to "Argument[n]" but select any argument in
* the given range. The range is inclusive at both ends.
* - "Parameter": Selects the value of a parameter of the selected element.
* "Parameter" is also allowed in case the selected element is already a
* parameter itself.
* - "Parameter[n]": Similar to "Parameter" but restricted to a specific
* numbered parameter (zero-indexed, and `this` specifies the value of `this`).
* - "Parameter[n1..n2]": Similar to "Parameter[n]" but selects any parameter
* in the given range. The range is inclusive at both ends.
* - "ReturnValue": Selects the return value of a call to the selected element.
*
* For summaries, `input` and `output` may be suffixed by any number of the
* following, separated by ".":
* - "Element": Selects an element in a collection.
* - "Field[f]": Selects the contents of field `f`.
* - "Property[p]": Selects the contents of property `p`.
*
* 8. The `kind` column is a tag that can be referenced from QL to determine to
* which classes the interpreted elements should be added. For example, for
* sources "remote" indicates a default remote flow source, and for summaries
* "taint" indicates a default additional taint step and "value" indicates a
* globally applicable value-preserving step. For neutrals the kind can be `summary`,
* `source` or `sink` to indicate that the neutral is neutral with respect to
* flow (no summary), source (is not a source) or sink (is not a sink).
* 9. The `provenance` column is a tag to indicate the origin and verification of a model.
* The format is {origin}-{verification} or just "manual" where the origin describes
* the origin of the model and verification describes how the model has been verified.
* Some examples are:
* - "df-generated": The model has been generated by the model generator tool.
* - "df-manual": The model has been generated by the model generator and verified by a human.
* - "manual": The model has been written by hand.
* This information is used in a heuristic for dataflow analysis to determine, if a
* model or source code should be used for determining flow.
*/
import csharp
import ExternalFlowExtensions
private import DataFlowDispatch
private import DataFlowPrivate
private import DataFlowPublic
private import FlowSummaryImpl
private import FlowSummaryImpl::Public
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Private::External
private import semmle.code.csharp.commons.QualifiedName
private import codeql.mad.ModelValidation as SharedModelVal
private predicate relevantNamespace(string namespace) {
sourceModel(namespace, _, _, _, _, _, _, _, _) or
sinkModel(namespace, _, _, _, _, _, _, _, _) or
summaryModel(namespace, _, _, _, _, _, _, _, _, _)
}
private predicate namespaceLink(string shortns, string longns) {
relevantNamespace(shortns) and
relevantNamespace(longns) and
longns.prefix(longns.indexOf(".")) = shortns
}
private predicate canonicalNamespace(string namespace) {
relevantNamespace(namespace) and not namespaceLink(_, namespace)
}
private predicate canonicalNamespaceLink(string namespace, string subns) {
canonicalNamespace(namespace) and
(subns = namespace or namespaceLink(namespace, subns))
}
/**
* Holds if MaD framework coverage of `namespace` is `n` api endpoints of the
* kind `(kind, part)`.
*/
predicate modelCoverage(string namespace, int namespaces, string kind, string part, int n) {
namespaces = strictcount(string subns | canonicalNamespaceLink(namespace, subns)) and
(
part = "source" and
n =
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string output, string provenance |
canonicalNamespaceLink(namespace, subns) and
sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance)
)
or
part = "sink" and
n =
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string input, string provenance |
canonicalNamespaceLink(namespace, subns) and
sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance)
)
or
part = "summary" and
n =
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string input, string output, string provenance |
canonicalNamespaceLink(namespace, subns) and
summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance)
)
)
}
/** Provides a query predicate to check the MaD models for validation errors. */
module ModelValidation {
private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax
private predicate getRelevantAccessPath(string path) {
summaryModel(_, _, _, _, _, _, path, _, _, _) or
summaryModel(_, _, _, _, _, _, _, path, _, _) or
sinkModel(_, _, _, _, _, _, path, _, _) or
sourceModel(_, _, _, _, _, _, path, _, _)
}
private module MkAccessPath = AccessPathSyntax::AccessPath<getRelevantAccessPath/1>;
class AccessPath = MkAccessPath::AccessPath;
class AccessPathToken = MkAccessPath::AccessPathToken;
private string getInvalidModelInput() {
exists(string pred, AccessPath input, AccessPathToken part |
sinkModel(_, _, _, _, _, _, input, _, _) and pred = "sink"
or
summaryModel(_, _, _, _, _, _, input, _, _, _) and pred = "summary"
|
(
invalidSpecComponent(input, part) and
not part = "" and
not (part = "Argument" and pred = "sink") and
not parseArg(part, _) and
not part.getName() = ["Field", "Property"]
or
part = input.getToken(_) and
parseParam(part, _)
or
invalidIndexComponent(input, part)
) and
result = "Unrecognized input specification \"" + part + "\" in " + pred + " model."
)
}
private string getInvalidModelOutput() {
exists(string pred, AccessPath output, AccessPathToken part |
sourceModel(_, _, _, _, _, _, output, _, _) and pred = "source"
or
summaryModel(_, _, _, _, _, _, _, output, _, _) and pred = "summary"
|
(
invalidSpecComponent(output, part) and
not part = "" and
not (part = ["Argument", "Parameter"] and pred = "source") and
not part.getName() = ["Field", "Property"]
or
invalidIndexComponent(output, part)
) and
result = "Unrecognized output specification \"" + part + "\" in " + pred + " model."
)
}
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, _, _, _, _, kind, _) }
predicate sinkKind(string kind) { sinkModel(_, _, _, _, _, _, _, kind, _) }
predicate sourceKind(string kind) { sourceModel(_, _, _, _, _, _, _, kind, _) }
predicate neutralKind(string kind) { neutralModel(_, _, _, _, kind, _) }
}
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
private string getInvalidModelSignature() {
exists(
string pred, string namespace, string type, string name, string signature, string ext,
string provenance
|
sourceModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "source"
or
sinkModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "sink"
or
summaryModel(namespace, type, _, name, signature, ext, _, _, _, provenance) and
pred = "summary"
or
neutralModel(namespace, type, name, signature, _, provenance) and
ext = "" and
pred = "neutral"
|
not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
result = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
or
not type.regexpMatch("[a-zA-Z0-9_<>,\\+]+") and
result = "Dubious type \"" + type + "\" in " + pred + " model."
or
not name.regexpMatch("[a-zA-Z0-9_<>,\\.]*") and
result = "Dubious member name \"" + name + "\" in " + pred + " model."
or
not signature.regexpMatch("|\\([a-zA-Z0-9_<>\\.\\+\\*,\\[\\]]*\\)") and
result = "Dubious signature \"" + signature + "\" in " + pred + " model."
or
not ext.regexpMatch("|Attribute") and
result = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
or
invalidProvenance(provenance) and
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
)
}
/** Holds if some row in a MaD flow model appears to contain typos. */
query predicate invalidModelRow(string msg) {
msg =
[
getInvalidModelSignature(), getInvalidModelInput(), getInvalidModelOutput(),
KindVal::getInvalidModelKind()
]
}
}
private predicate elementSpec(
string namespace, string type, boolean subtypes, string name, string signature, string ext
) {
sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _)
or
sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _)
or
summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _)
or
neutralModel(namespace, type, name, signature, _, _) and ext = "" and subtypes = false
}
private predicate elementSpec(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
UnboundValueOrRefType t
) {
elementSpec(namespace, type, subtypes, name, signature, ext) and
QN::hasQualifiedName(t, namespace, type)
}
private class UnboundValueOrRefType extends ValueOrRefType {
UnboundValueOrRefType() { this.isUnboundDeclaration() }
UnboundValueOrRefType getASubTypeUnbound() {
exists(Type t |
result.getABaseType() = t and
this = t.getUnboundDeclaration()
)
}
}
/** An unbound callable. */
class UnboundCallable extends Callable {
UnboundCallable() { this.isUnboundDeclaration() }
/**
* Holds if this unbound callable overrides or implements (transitively)
* `that` unbound callable.
*/
predicate overridesOrImplementsUnbound(UnboundCallable that) {
exists(Callable c |
this.(Overridable).overridesOrImplements(c) and
that = c.getUnboundDeclaration()
)
}
}
pragma[nomagic]
private predicate subtypeSpecCandidate(string name, UnboundValueOrRefType t) {
exists(UnboundValueOrRefType t0 |
elementSpec(_, _, true, name, _, _, t0) and
t = t0.getASubTypeUnbound+()
)
}
pragma[nomagic]
private predicate callableInfo(Callable c, string name, UnboundValueOrRefType decl) {
decl = c.getDeclaringType() and
(
c.(Operator).getFunctionName() = name
or
not c instanceof Operator and
c.hasName(name)
)
}
private class InterpretedCallable extends Callable {
InterpretedCallable() {
exists(string namespace, string type, string name |
partialModel(this, namespace, type, name, _) and
elementSpec(namespace, type, _, name, _, _)
)
or
exists(string name, UnboundValueOrRefType t |
callableInfo(this, name, t) and
subtypeSpecCandidate(name, t)
)
}
}
pragma[nomagic]
Declaration interpretBaseDeclaration(string namespace, string type, string name, string signature) {
exists(UnboundValueOrRefType t | elementSpec(namespace, type, _, name, signature, _, t) |
result =
any(Declaration d |
QN::hasQualifiedName(d, namespace, type, name) and
(
signature = ""
or
signature = "(" + parameterQualifiedTypeNamesToString(d) + ")"
)
)
or
result = t and
name = "" and
signature = ""
)
}
/** Gets the source/sink/summary/neutral element corresponding to the supplied parameters. */
pragma[nomagic]
Declaration interpretElement(
string namespace, string type, boolean subtypes, string name, string signature, string ext
) {
elementSpec(namespace, type, subtypes, name, signature, ext) and
exists(Declaration base, Declaration d |
base = interpretBaseDeclaration(namespace, type, name, signature) and
(
d = base
or
subtypes = true and
(
d.(UnboundCallable).overridesOrImplementsUnbound(base)
or
d = base.(UnboundValueOrRefType).getASubTypeUnbound+()
)
)
|
ext = "" and result = d
or
ext = "Attribute" and result.(Attributable).getAnAttribute().getType() = d
)
}
/**
* A callable where there exists a MaD sink model that applies to it.
*/
class SinkCallable extends Callable {
SinkCallable() { SourceSinkInterpretationInput::sinkElement(this, _, _) }
}
/**
* A callable where there exists a MaD source model that applies to it.
*/
class SourceCallable extends Callable {
SourceCallable() { SourceSinkInterpretationInput::sourceElement(this, _, _) }
}
cached
private module Cached {
/**
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
cached
predicate sourceNode(Node node, string kind) {
exists(SourceSinkInterpretationInput::InterpretNode n |
isSourceNode(n, kind) and n.asNode() = node
)
}
/**
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
cached
predicate sinkNode(Node node, string kind) {
exists(SourceSinkInterpretationInput::InterpretNode n |
isSinkNode(n, kind) and n.asNode() = node
)
}
}
import Cached
/** Holds if the summary should apply for all overrides of `c`. */
predicate isBaseCallableOrPrototype(UnboundCallable c) {
c.getDeclaringType() instanceof Interface
or
exists(Modifiable m | m = [c.(Modifiable), c.(Accessor).getDeclaration()] |
m.isAbstract()
or
c.getDeclaringType().(Modifiable).isAbstract() and m.(Virtualizable).isVirtual()
)
}
/** Gets a string representing whether the summary should apply for all overrides of `c`. */
private string getCallableOverride(UnboundCallable c) {
if isBaseCallableOrPrototype(c) then result = "true" else result = "false"
}
private module QualifiedNameInput implements QualifiedNameInputSig {
string getUnboundGenericSuffix(UnboundGeneric ug) {
result =
"<" + strictconcat(int i, string s | s = ug.getTypeParameter(i).getName() | s, "," order by i)
+ ">"
}
}
private module QN = QualifiedName<QualifiedNameInput>;
pragma[nomagic]
private string parameterQualifiedType(Parameter p) {
exists(string qualifier, string name |
QN::hasQualifiedName(p.getType(), qualifier, name) and
result = getQualifiedName(qualifier, name)
)
}
/** Gets the string representation of the parameters of `c`. */
string parameterQualifiedTypeNamesToString(Callable c) {
result =
concat(int i, string s | s = parameterQualifiedType(c.getParameter(i)) | s, "," order by i)
}
predicate partialModel(
UnboundCallable c, string namespace, string type, string name, string parameters
) {
QN::hasQualifiedName(c, namespace, type, name) and
parameters = "(" + parameterQualifiedTypeNamesToString(c) + ")"
}
/** Computes the first 6 columns for positive CSV rows of `c`. */
string asPartialModel(UnboundCallable c) {
exists(string namespace, string type, string name, string parameters |
partialModel(c, namespace, type, name, parameters) and
result =
namespace + ";" //
+ type + ";" //
+ getCallableOverride(c) + ";" //
+ name + ";" //
+ parameters + ";" //
+ /* ext + */ ";" //
)
}
/** Computes the first 4 columns for neutral CSV rows of `c`. */
string asPartialNeutralModel(UnboundCallable c) {
exists(string namespace, string type, string name, string parameters |
partialModel(c, namespace, type, name, parameters) and
result =
namespace + ";" //
+ type + ";" //
+ name + ";" //
+ parameters + ";" //
)
}
private predicate interpretSummary(
UnboundCallable c, string input, string output, string kind, string provenance
) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and
c = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
private class SummarizedCallableAdapter extends SummarizedCallable {
SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _) }
private predicate relevantSummaryElementManual(string input, string output, string kind) {
exists(Provenance provenance |
interpretSummary(this, input, output, kind, provenance) and
provenance.isManual()
)
}
private predicate relevantSummaryElementGenerated(string input, string output, string kind) {
exists(Provenance provenance |
interpretSummary(this, input, output, kind, provenance) and
provenance.isGenerated()
)
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string kind |
this.relevantSummaryElementManual(input, output, kind)
or
not this.relevantSummaryElementManual(_, _, _) and
this.relevantSummaryElementGenerated(input, output, kind)
|
if kind = "value" then preservesValue = true else preservesValue = false
)
}
override predicate hasProvenance(Provenance provenance) {
interpretSummary(this, _, _, _, provenance)
}
}
private class NeutralCallableAdapter extends NeutralCallable {
string kind;
string provenance_;
NeutralCallableAdapter() {
exists(string namespace, string type, string name, string signature |
neutralModel(namespace, type, name, signature, kind, provenance_) and
this = interpretElement(namespace, type, false, name, signature, "")
)
}
override string getKind() { result = kind }
override predicate hasProvenance(Provenance provenance) { provenance = provenance_ }
}