Merge pull request #17509 from michaelnebel/modelgen/parammodule

C#/Java: Re-factor the model generator to be a parameterized module.
This commit is contained in:
Michael Nebel
2024-09-26 10:57:16 +02:00
committed by GitHub
27 changed files with 1533 additions and 2226 deletions

View File

@@ -57,10 +57,6 @@
"java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll",
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll"
],
"Model as Data Generation Java/C# - CaptureModels": [
"java/ql/src/utils/modelgenerator/internal/CaptureModels.qll",
"csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll"
],
"Sign Java/C#": [
"java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/Sign.qll",
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/Sign.qll"

View File

@@ -9,5 +9,5 @@
import internal.CaptureModels
from DataFlowSummaryTargetApi api, string flow
where flow = captureContentFlow(api)
where flow = ContentSensitive::captureFlow(api)
select flow order by flow

View File

@@ -6,9 +6,7 @@
* @tags modelgenerator
*/
import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import internal.CaptureModels
import internal.CaptureSummaryFlowQuery
from DataFlowSummaryTargetApi api, string noflow
where noflow = captureNoFlow(api)

View File

@@ -6,9 +6,7 @@
* @tags modelgenerator
*/
import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import internal.CaptureModels
import internal.CaptureSummaryFlowQuery
from DataFlowSummaryTargetApi api, string flow
where flow = captureFlow(api)

View File

@@ -1,635 +1,351 @@
/**
* Provides classes and predicates related to capturing summary, source,
* and sink models of the Standard or a 3rd party library.
*/
private import csharp as CS
private import semmle.code.csharp.commons.Util as Util
private import semmle.code.csharp.commons.Collections as Collections
private import semmle.code.csharp.commons.QualifiedName as QualifiedName
private import semmle.code.csharp.dataflow.internal.DataFlowDispatch
private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
private import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.csharp.dataflow.internal.DataFlowImplSpecific
private import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.code.csharp.dataflow.internal.TaintTrackingImplSpecific
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import semmle.code.csharp.frameworks.System
private import semmle.code.csharp.Location
private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl
private import CaptureModelsSpecific
private import CaptureModelsPrinting
module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDataFlow> {
class Type = CS::Type;
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
private class ReturnNodeExt extends DataFlow::Node {
private DataFlowImplCommon::ReturnKindExt kind;
class Parameter = CS::Parameter;
ReturnNodeExt() {
kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
class Callable = CS::Callable;
class NodeExtended extends CS::DataFlow::Node {
Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingCallable() }
}
/**
* Gets the kind of the return node.
* Holds if any of the parameters of `api` are `System.Func<>`.
*/
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
}
bindingset[c]
private signature string printCallableParamSig(Callable c, ParameterPosition p);
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}
string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
class DataFlowSummaryTargetApi extends SummaryTargetApi {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
class DataFlowSourceTargetApi = SourceTargetApi;
class DataFlowSinkTargetApi = SinkTargetApi;
private module ModelPrintingInput implements ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "df-generated" }
}
module Printing = ModelPrinting<ModelPrintingInput>;
/**
* Holds if `c` is a relevant content kind, where the underlying type is relevant.
*/
private predicate isRelevantTypeInContent(DataFlow::ContentSet c) {
isRelevantType(getUnderlyingContentType(c))
}
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlowPrivate::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
/**
* Gets the MaD string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) }
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data-flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and
state.(TaintRead).getStep() = 0
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::ContentSet c |
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
)
or
exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
private predicate isHigherOrder(Callable api) {
exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() |
t instanceof SystemLinqExpressions::DelegateExtType
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
private predicate irrelevantAccessor(CS::Accessor a) {
a.getDeclaration().(CS::Property).isReadWrite()
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
p.getEnclosingCallable() = api and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asLiftedTaintModel(api, input, output)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
int accessPathLimit() { result = 2 }
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
private predicate isUninterestingForModels(Callable api) {
api.getDeclaringType().getNamespace().getFullName() = ""
or
i > 0 and result = getContent(tail, i - 1)
)
}
/**
* Gets the MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i)
}
/**
* Gets the MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i desc)
}
/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
mentionsField(tail) or isField(head)
)
}
private predicate apiFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to a combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of an objects internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(string input, string output |
exists(
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
|
apiFlow(this, p, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
)
) <= 2 * this.getNumberOfParameters() + 3
}
}
pragma[nomagic]
private predicate apiContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
head = path.getHead() and
tail = path.getTail()
|
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
}
/**
* A module containing predicates for validating access paths containing content sets
* that translates into synthetic fields, when used for generated summary models.
*/
private module AccessPathSyntheticValidation {
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`).
*/
private predicate step(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
p.getType() = t1 and
returnNodeExt.getType() = t2 and
apiContentFlow(_, p, read, returnNodeExt, store, _)
api instanceof CS::ConversionOperator
or
api instanceof Util::MainMethod
or
api instanceof CS::Destructor
or
api instanceof CS::AnonymousFunctionExpr
or
api.(CS::Constructor).isParameterless()
or
exists(Type decl | decl = api.getDeclaringType() |
decl instanceof SystemObjectClass or
decl instanceof SystemValueTypeClass
)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
*
* Step A -> Synth.
*/
private predicate synthPathEntry(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
not hasSyntheticContent(read) and
hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` has synthetic content
* and `store` does not.
*
* Step Synth -> A.
*/
private predicate synthPathExit(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
not hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists a path of steps from `read` to an exit.
*
* read ->* Synth -> A
*/
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
synthPathExit(t, read, _, _)
or
hasSyntheticContent(read) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(t, read, midType, mid) and
reachesSynthExit(midType, mid.reverse())
)
// Disregard properties that have both a get and a set accessor,
// which implicitly means auto implemented properties.
irrelevantAccessor(api)
}
private predicate relevant(Callable api) {
[api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and
api.fromSource() and
api.isUnboundDeclaration() and
not isUninterestingForModels(api)
}
private Callable getARelevantOverrideeOrImplementee(Overridable m) {
m.overridesOrImplements(result) and relevant(result)
}
/**
* Holds if there exists a path of steps from an entry to `store`.
*
* A -> Synth ->* store
* Gets the super implementation of `api` if it is relevant.
* If such a super implementation does not exist, returns `api` if it is relevant.
*/
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
synthPathEntry(_, _, t, store)
or
hasSyntheticContent(store) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(midType, mid, t, store) and
synthEntryReaches(midType, mid.reverse())
)
}
/**
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
* contain content that will be translated into a synthetic field, when being used in
* a MaD summary model, and if there is a range of APIs, such that
* when chaining their flow access paths, there exists access paths `A` and `B` where
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
* be translated into a synthetic field.
*
* This is needed because we don't want to include summaries that reads from or
* stores into a "dead" synthetic field.
*
* Example:
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
* `setX`, which gets and sets a private field `X` on `t`.
* This would lead to the following content flows
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
* As the reads and stores are on synthetic fields we should only make summaries
* if both of these methods exist.
*/
pragma[nomagic]
predicate acceptReadStore(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
or
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
private Callable liftedImpl(Callable api) {
(
result = getARelevantOverrideeOrImplementee(api)
or
synthEntryReaches(t1, store0) and
step(t1, read, t2, store) and
reachesSynthExit(t2, store.reverse())
)
result = api and relevant(api)
) and
not exists(getARelevantOverrideeOrImplementee(result))
}
}
/**
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
* Flow is considered relevant,
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
* the synthetic content is "live" on the relevant declaring type.
*/
private predicate apiRelevantContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath store, boolean preservesValue
) {
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
(
not hasSyntheticContent(read) and not hasSyntheticContent(store)
or
AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store)
)
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel())
}
pragma[nomagic]
private predicate captureContentFlow0(
ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue,
boolean lift
) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads,
PropagateContentFlow::AccessPath stores
|
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
)
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel())
}
/**
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
* the return value or a parameter).
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
exists(string input, string output, boolean lift, boolean preservesValue |
captureContentFlow0(api, input, output, _, lift) and
preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and
result = Printing::asModel(api, input, output, preservesValue, lift)
)
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel())
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
ExternalFlow::sourceNode(source, kind)
predicate isUninterestingForDataFlowModels(Callable api) { isHigherOrder(api) }
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() {
not hasManualSourceModel(this) and
// Do not generate source models for overridable callables
// as virtual dispatch implies that too many methods
// will be considered sources.
not this.(Overridable).overridesOrImplements(_)
}
}
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
Callable lift() { result = lift }
predicate isRelevant() { relevant(this) }
}
/**
* Holds if `t` is a type that is generally used for bulk data in collection types.
* Eg. char[] is roughly equivalent to string and thus a highly
* relevant type for model generation.
*/
private predicate isPrimitiveTypeUsedForBulkData(CS::Type t) {
t instanceof CS::ByteType or
t instanceof CS::CharType
}
/**
* Holds if the collection type `ct` is irrelevant for model generation.
* Collection types where the type of the elements are
* (1) unknown - are considered relevant.
* (2) known - at least one the child types should be relevant (a non-simple type
* or a type used for bulk data)
*/
private predicate irrelevantCollectionType(CS::Type ct) {
Collections::isCollectionType(ct) and
forex(CS::Type child | child = ct.getAChild() |
child instanceof CS::SimpleType and
not isPrimitiveTypeUsedForBulkData(child)
)
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi
predicate isRelevantType(CS::Type t) {
not t instanceof CS::SimpleType and
not t instanceof CS::Enum and
not t instanceof SystemDateTimeStruct and
not t instanceof SystemTypeClass and
not irrelevantCollectionType(t)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
/**
* Gets the underlying type of the content `c`.
*/
private CS::Type getUnderlyingContType(DataFlow::Content c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(DataFlow::Node source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = Printing::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A dataflow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind))
}
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.getType() and not isRelevantType(t))
Type getUnderlyingContentType(DataFlow::ContentSet c) {
exists(DataFlow::Content cont |
c.isSingleton(cont) and
result = getUnderlyingContType(cont)
)
or
sinkModelSanitizer(node)
exists(CS::Property p |
c.isProperty(p) and
result = p.getType()
)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
string qualifierString() { result = "Argument[this]" }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
string parameterAccess(CS::Parameter p) {
if Collections::isCollectionType(p.getType())
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;
private signature string parameterAccessSig(Parameter p);
private module ParamReturnNodeAsOutput<parameterAccessSig/1 getParamAccess> {
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = getParamAccess(c.getParameter(pos.getPosition()))
or
pos.isThisParameter() and
result = qualifierString()
}
}
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = ParamReturnNodeAsOutput<parameterAccess/1>::paramReturnNodeAsOutput(c, pos)
}
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = ParamReturnNodeAsOutput<parameterContentAccess/1>::paramReturnNodeAsOutput(c, pos)
}
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable(_)
}
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) {
node.asExpr() instanceof CS::ThisAccess
}
private predicate isRelevantMemberAccess(DataFlow::Node node) {
exists(CS::MemberAccess access | access = node.asExpr() |
access.hasThisQualifier() and
access.getTarget().isEffectivelyPublic() and
(
access instanceof CS::FieldAccess
or
access.getTarget().(CS::Property).getSetter().isPublic()
)
)
}
predicate sinkModelSanitizer(DataFlow::Node node) { none() }
predicate apiSource(DataFlow::Node source) {
isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode
}
private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) {
exists(DataFlowCall call |
dc1 = call.getEnclosingCallable() and
dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0)
)
}
bindingset[dc1, dc2]
private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) =
fastTC(uniquelyCalls/2)(dc1, dc2)
bindingset[sourceEnclosing, api]
predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) {
not exists(DataFlowCallable dc1, DataFlowCallable dc2 |
uniquelyCallsPlus(dc1, dc2) or dc1 = dc2
|
dc1.getUnderlyingCallable() = api and
dc2.getUnderlyingCallable() = sourceEnclosing
)
}
string getInputArgument(DataFlow::Node source) {
exists(int pos |
pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and
result = "Argument[" + pos + "]"
)
or
source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and
result = qualifierString()
}
bindingset[kind]
predicate isRelevantSinkKind(string kind) { any() }
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent(DataFlow::ContentSet c) { c.isElement() }
predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
not nodeTo.asExpr() instanceof CS::ElementAccess and
not exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c)
)
}
bindingset[d]
private string getFullyQualifiedName(Declaration d) {
exists(string qualifier, string name |
d.hasFullyQualifiedName(qualifier, name) and
result = QualifiedName::getQualifiedName(qualifier, name)
)
}
predicate isField(DataFlow::ContentSet c) {
c.isField(_) or c.isSyntheticField(_) or c.isProperty(_)
}
string getSyntheticName(DataFlow::ContentSet c) {
exists(CS::Field f |
not f.isEffectivelyPublic() and
c.isField(f) and
result = getFullyQualifiedName(f)
)
or
exists(CS::Property p |
not p.isEffectivelyPublic() and
c.isProperty(p) and
result = getFullyQualifiedName(p)
)
or
c.isSyntheticField(result)
}
string printContent(DataFlow::ContentSet c) {
exists(CS::Field f, string name | name = getFullyQualifiedName(f) |
c.isField(f) and
f.isEffectivelyPublic() and
result = "Field[" + name + "]"
)
or
exists(CS::Property p, string name | name = getFullyQualifiedName(p) |
c.isProperty(p) and
p.isEffectivelyPublic() and
result = "Property[" + name + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c.isElement() and
result = "Element"
}
predicate partialModel = ExternalFlow::partialModel/6;
predicate sourceNode = ExternalFlow::sourceNode/2;
predicate sinkNode = ExternalFlow::sinkNode/2;
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(DataFlow::Node src, DataFlow::Node sink, string kind |
PropagateToSink::flow(src, sink) and
ExternalFlow::sinkNode(sink, kind) and
api = src.getEnclosingCallable() and
result = Printing::asSinkModel(api, asInputArgument(src), kind)
)
}
import MakeModelGenerator<Location, CsharpDataFlow, CsharpTaintTracking, ModelGeneratorInput>

View File

@@ -1,5 +1,5 @@
private import csharp as CS
private import codeql.mad.modelgenerator.ModelPrinting
private import codeql.mad.modelgenerator.internal.ModelPrinting
private import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
private module ModelPrintingLang implements ModelPrintingLangSig {

View File

@@ -1,436 +0,0 @@
/**
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
private import csharp as CS
private import semmle.code.csharp.commons.Util as Util
private import semmle.code.csharp.commons.Collections as Collections
private import semmle.code.csharp.commons.QualifiedName as QualifiedName
private import semmle.code.csharp.dataflow.internal.DataFlowDispatch
private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import semmle.code.csharp.frameworks.System
private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
import semmle.code.csharp.dataflow.internal.ContentDataFlow as ContentDataFlow
import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch
module DataFlow = CS::DataFlow;
module TaintTracking = CS::TaintTracking;
class Type = CS::Type;
class Callable = CS::Callable;
class ContentSet = DataFlow::ContentSet;
/**
* Holds if any of the parameters of `api` are `System.Func<>`.
*/
private predicate isHigherOrder(Callable api) {
exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() |
t instanceof SystemLinqExpressions::DelegateExtType
)
}
private predicate irrelevantAccessor(CS::Accessor a) {
a.getDeclaration().(CS::Property).isReadWrite()
}
private predicate isUninterestingForModels(Callable api) {
api.getDeclaringType().getNamespace().getFullName() = ""
or
api instanceof CS::ConversionOperator
or
api instanceof Util::MainMethod
or
api instanceof CS::Destructor
or
api instanceof CS::AnonymousFunctionExpr
or
api.(CS::Constructor).isParameterless()
or
exists(Type decl | decl = api.getDeclaringType() |
decl instanceof SystemObjectClass or
decl instanceof SystemValueTypeClass
)
or
// Disregard properties that have both a get and a set accessor,
// which implicitly means auto implemented properties.
irrelevantAccessor(api)
}
private predicate relevant(Callable api) {
[api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and
api.fromSource() and
api.isUnboundDeclaration() and
not isUninterestingForModels(api)
}
private Callable getARelevantOverrideeOrImplementee(Overridable m) {
m.overridesOrImplements(result) and relevant(result)
}
/**
* Gets the super implementation of `api` if it is relevant.
* If such a super implementation does not exist, returns `api` if it is relevant.
*/
private Callable liftedImpl(Callable api) {
(
result = getARelevantOverrideeOrImplementee(api)
or
result = api and relevant(api)
) and
not exists(getARelevantOverrideeOrImplementee(result))
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel())
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel())
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel())
}
/**
* Holds if it is irrelevant to generate models for `api` based on data flow analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForDataFlowModels(CS::Callable api) { isHigherOrder(api) }
/**
* Holds if it is irrelevant to generate models for `api` based on type-based analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForTypeBasedFlowModels(CS::Callable api) { none() }
/**
* A class of callables that are potentially relevant for generating source or
* sink models.
*/
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
/**
* A class of callables that are potentially relevant for generating sink models.
*/
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
/**
* A class of callables that are potentially relevant for generating source models.
*/
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() {
not hasManualSourceModel(this) and
// Do not generate source models for overridable callables
// as virtual dispatch implies that too many methods
// will be considered sources.
not this.(Overridable).overridesOrImplements(_)
}
}
/**
* A class of callables that are potentially relevant for generating summary or
* neutral models.
*
* In the Standard library and 3rd party libraries it is the callables (or callables that have a
* super implementation) that can be called from outside the library itself.
*/
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
/**
* Gets the callable that a model will be lifted to.
*
* The lifted callable is relevant in terms of model
* generation (this is ensured by `liftedImpl`).
*/
Callable lift() { result = lift }
/**
* Holds if `this` is relevant in terms of model generation.
*/
predicate isRelevant() { relevant(this) }
}
/**
* Holds if `t` is a type that is generally used for bulk data in collection types.
* Eg. char[] is roughly equivalent to string and thus a highly
* relevant type for model generation.
*/
private predicate isPrimitiveTypeUsedForBulkData(CS::Type t) {
t instanceof CS::ByteType or
t instanceof CS::CharType
}
/**
* Holds if the collection type `ct` is irrelevant for model generation.
* Collection types where the type of the elements are
* (1) unknown - are considered relevant.
* (2) known - at least one the child types should be relevant (a non-simple type
* or a type used for bulk data)
*/
private predicate irrelevantCollectionType(CS::Type ct) {
Collections::isCollectionType(ct) and
forex(CS::Type child | child = ct.getAChild() |
child instanceof CS::SimpleType and
not isPrimitiveTypeUsedForBulkData(child)
)
}
/**
* Holds for type `t` for fields that are relevant as an intermediate
* read or write step in the data flow analysis.
* That is, flow through any data-flow node that does not have a relevant type
* will be excluded.
*/
predicate isRelevantType(CS::Type t) {
not t instanceof CS::SimpleType and
not t instanceof CS::Enum and
not t instanceof SystemDateTimeStruct and
not t instanceof SystemTypeClass and
not irrelevantCollectionType(t)
}
/**
* Gets the underlying type of the content `c`.
*/
private CS::Type getUnderlyingContType(DataFlow::Content c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
/**
* Gets the underlying type of the content `c`.
*/
CS::Type getUnderlyingContentType(DataFlow::ContentSet c) {
exists(DataFlow::Content cont |
c.isSingleton(cont) and
result = getUnderlyingContType(cont)
)
or
exists(CS::Property p |
c.isProperty(p) and
result = p.getType()
)
}
/**
* Gets the MaD string representation of the qualifier.
*/
string qualifierString() { result = "Argument[this]" }
string parameterAccess(CS::Parameter p) {
if Collections::isCollectionType(p.getType())
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;
class ParameterPosition = DataFlowDispatch::ParameterPosition;
private signature string parameterAccessSig(Parameter p);
module ParamReturnNodeAsOutput<parameterAccessSig/1 getParamAccess> {
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = getParamAccess(c.getParameter(pos.getPosition()))
or
pos.isThisParameter() and
result = qualifierString()
}
}
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = ParamReturnNodeAsOutput<parameterAccess/1>::paramReturnNodeAsOutput(c, pos)
}
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = ParamReturnNodeAsOutput<parameterContentAccess/1>::paramReturnNodeAsOutput(c, pos)
}
/**
* Gets the enclosing callable of `ret`.
*/
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable(_)
}
/**
* Holds if `node` is an own instance access.
*/
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) {
node.asExpr() instanceof CS::ThisAccess
}
private predicate isRelevantMemberAccess(DataFlow::Node node) {
exists(CS::MemberAccess access | access = node.asExpr() |
access.hasThisQualifier() and
access.getTarget().isEffectivelyPublic() and
(
access instanceof CS::FieldAccess
or
access.getTarget().(CS::Property).getSetter().isPublic()
)
)
}
predicate sinkModelSanitizer(DataFlow::Node node) { none() }
/**
* Holds if `source` is an api entrypoint relevant for creating sink models.
*/
predicate apiSource(DataFlow::Node source) {
isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode
}
private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) {
exists(DataFlowCall call |
dc1 = call.getEnclosingCallable() and
dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0)
)
}
bindingset[dc1, dc2]
private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) =
fastTC(uniquelyCalls/2)(dc1, dc2)
/**
* Holds if it is not relevant to generate a source model for `api`, even
* if flow is detected from a node within `source` to a sink within `api`.
*/
bindingset[sourceEnclosing, api]
predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) {
not exists(DataFlowCallable dc1, DataFlowCallable dc2 | uniquelyCallsPlus(dc1, dc2) or dc1 = dc2 |
dc1.getUnderlyingCallable() = api and
dc2.getUnderlyingCallable() = sourceEnclosing
)
}
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgumentSpecific(DataFlow::Node source) {
exists(int pos |
pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and
result = "Argument[" + pos + "]"
)
or
source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and
result = qualifierString()
}
/**
* Holds if `kind` is a relevant sink kind for creating sink models.
*/
bindingset[kind]
predicate isRelevantSinkKind(string kind) { any() }
/**
* Holds if `kind` is a relevant source kind for creating source models.
*/
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
/**
* Holds if the the content `c` is a container.
*/
predicate containerContent(DataFlow::ContentSet c) { c.isElement() }
/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
not nodeTo.asExpr() instanceof CS::ElementAccess and
not exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c)
)
}
bindingset[d]
private string getFullyQualifiedName(Declaration d) {
exists(string qualifier, string name |
d.hasFullyQualifiedName(qualifier, name) and
result = QualifiedName::getQualifiedName(qualifier, name)
)
}
/**
* Holds if the content set `c` is a field, property or synthetic field.
*/
predicate isField(ContentSet c) { c.isField(_) or c.isSyntheticField(_) or c.isProperty(_) }
/**
* Gets the MaD synthetic name string representation for the content set `c`, if any.
*/
string getSyntheticName(DataFlow::ContentSet c) {
exists(CS::Field f |
not f.isEffectivelyPublic() and
c.isField(f) and
result = getFullyQualifiedName(f)
)
or
exists(CS::Property p |
not p.isEffectivelyPublic() and
c.isProperty(p) and
result = getFullyQualifiedName(p)
)
or
c.isSyntheticField(result)
}
/**
* Gets the MaD string representation of the content set `c`.
*/
string printContent(DataFlow::ContentSet c) {
exists(CS::Field f, string name | name = getFullyQualifiedName(f) |
c.isField(f) and
f.isEffectivelyPublic() and
result = "Field[" + name + "]"
)
or
exists(CS::Property p, string name | name = getFullyQualifiedName(p) |
c.isProperty(p) and
p.isEffectivelyPublic() and
result = "Property[" + name + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c.isElement() and
result = "Element"
}

View File

@@ -1,93 +0,0 @@
private import CaptureModels
/**
* Capture fluent APIs that return `this`.
* Example of a fluent API:
* ```csharp
* public class BasicFlow {
* public BasicFlow ReturnThis(object input)
* {
* // some side effect
* return this;
* }
* ```
* Captured Model:
* ```Summaries;BasicFlow;false;ReturnThis;(System.Object);Argument[this];ReturnValue;value;df-generated```
* Capture APIs that transfer taint from an input parameter to an output return
* value or parameter.
* Allows a sequence of read steps followed by a sequence of store steps.
*
* Examples:
*
* ```csharp
* public class BasicFlow {
* private string tainted;
*
* public String ReturnField()
* {
* return tainted;
* }
*
* public void AssignFieldToArray(object[] target)
* {
* target[0] = tainted;
* }
* }
* ```
* Captured Models:
* ```
* Summaries;BasicFlow;false;ReturnField;();Argument[this];ReturnValue;taint;df-generated |
* Summaries;BasicFlow;false;AssignFieldToArray;(System.Object[]);Argument[this];Argument[0].Element;taint;df-generated
* ```
*
* ```csharp
* public class BasicFlow {
* private string tainted;
*
* public void SetField(string s)
* {
* tainted = s;
* }
* }
* ```
* Captured Model:
* ```Summaries;BasicFlow;false;SetField;(System.String);Argument[0];Argument[this];taint;df-generated```
*
* ```csharp
* public class BasicFlow {
* public void ReturnSubstring(string s)
* {
* return s.Substring(0, 1);
* }
* }
* ```
* Captured Model:
* ```Summaries;BasicFlow;false;ReturnSubstring;(System.String);Argument[0];ReturnValue;taint;df-generated```
*
* ```csharp
* public class BasicFlow {
* public void AssignToArray(int data, int[] target)
* {
* target[0] = data;
* }
* }
* ```
* Captured Model:
* ```Summaries;BasicFlow;false;AssignToArray;(System.Int32,System.Int32[]);Argument[0];Argument[1].Element;taint;df-generated```
*/
string captureFlow(DataFlowSummaryTargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a summary model that applies to `api` and if it relevant to generate
* a model for `api`.
*/
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and
api.isRelevant() and
result = Printing::asNeutralSummaryModel(api)
}

View File

@@ -2,7 +2,7 @@ private import csharp
private import semmle.code.csharp.frameworks.system.collections.Generic as GenericCollections
private import semmle.code.csharp.dataflow.internal.DataFlowPrivate
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import CaptureModelsSpecific as Specific
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private import CaptureModelsPrinting
/**
@@ -38,7 +38,7 @@ private predicate localTypeParameter(Callable callable, TypeParameter tp) {
*/
private predicate parameter(Callable callable, string input, TypeParameter tp) {
exists(Parameter p |
input = Specific::parameterAccess(p) and
input = ModelGeneratorInput::parameterAccess(p) and
p = callable.getAParameter() and
(
// Parameter of type tp
@@ -69,7 +69,7 @@ private string implicit(Callable callable, TypeParameter tp) {
then access = ".Element"
else access = getSyntheticField(tp)
|
result = Specific::qualifierString() + access
result = ModelGeneratorInput::qualifierString() + access
)
}
@@ -191,9 +191,7 @@ private module Printing = ModelPrinting<ModelPrintingInput>;
* A class of callables that are relevant generating summaries for based
* on the Theorems for Free approach.
*/
class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) }
class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi {
/**
* Gets the string representation of all type based summaries for `this`
* inspired by the Theorems for Free approach.

View File

@@ -3,7 +3,7 @@ import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {
string getCapturedModel(Callable c) { result = captureContentFlow(c) }
string getCapturedModel(Callable c) { result = ContentSensitive::captureFlow(c) }
string getKind() { result = "contentbased-summary" }
}

View File

@@ -1,5 +1,5 @@
import csharp
import utils.modelgenerator.internal.CaptureSummaryFlowQuery
import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {

View File

@@ -1,5 +1,5 @@
import csharp
import utils.modelgenerator.internal.CaptureSummaryFlowQuery
import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {

View File

@@ -760,7 +760,7 @@ ContentApprox getContentApprox(Content c) {
/**
* Holds if the the content `c` is a container.
*/
predicate containerContent(Content c) {
predicate containerContent(ContentSet c) {
c instanceof ArrayContent or
c instanceof CollectionContent or
c instanceof MapKeyContent or

View File

@@ -9,5 +9,5 @@
import internal.CaptureModels
from DataFlowSummaryTargetApi api, string flow
where flow = captureContentFlow(api)
where flow = ContentSensitive::captureFlow(api)
select flow order by flow

View File

@@ -7,7 +7,6 @@
*/
import internal.CaptureModels
import internal.CaptureSummaryFlowQuery
from DataFlowSummaryTargetApi api, string noflow
where noflow = captureNoFlow(api)

View File

@@ -7,7 +7,6 @@
*/
import internal.CaptureModels
import internal.CaptureSummaryFlowQuery
from DataFlowSummaryTargetApi api, string flow
where flow = captureFlow(api)

View File

@@ -1,635 +1,292 @@
/**
* Provides classes and predicates related to capturing summary, source,
* and sink models of the Standard or a 3rd party library.
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
private import CaptureModelsSpecific
private import CaptureModelsPrinting
private import java as J
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
private import semmle.code.java.dataflow.internal.DataFlowDispatch
private import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.java.dataflow.internal.DataFlowImplSpecific
private import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.dataflow.internal.ModelExclusions
private import semmle.code.java.dataflow.internal.TaintTrackingImplSpecific
private import semmle.code.java.dataflow.SSA as Ssa
private import semmle.code.java.dataflow.TaintTracking
private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
* Holds if the type `t` is a primitive type used for bulk data.
*/
private class ReturnNodeExt extends DataFlow::Node {
private DataFlowImplCommon::ReturnKindExt kind;
predicate isPrimitiveTypeUsedForBulkData(J::Type t) {
t.hasName(["byte", "char", "Byte", "Character"])
}
ReturnNodeExt() {
kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
module ModelGeneratorInput implements ModelGeneratorInputSig<Location, JavaDataFlow> {
class Type = J::Type;
class Parameter = J::Parameter;
class Callable = J::Callable;
class NodeExtended extends DataFlow::Node {
Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingCallable() }
}
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%")
}
private predicate relevant(Callable api) {
api.isPublic() and
api.getDeclaringType().isPublic() and
api.fromSource() and
not isUninterestingForModels(api) and
not isInfrequentlyUsed(api.getCompilationUnit())
}
private J::Method getARelevantOverride(J::Method m) {
result = m.getAnOverride() and
relevant(result) and
// Other exclusions for overrides.
not m instanceof J::ToStringMethod
}
/**
* Gets the kind of the return node.
* Gets the super implementation of `m` if it is relevant.
* If such a super implementations does not exist, returns `m` if it is relevant.
*/
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
}
bindingset[c]
private signature string printCallableParamSig(Callable c, ParameterPosition p);
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}
string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
class DataFlowSummaryTargetApi extends SummaryTargetApi {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
class DataFlowSourceTargetApi = SourceTargetApi;
class DataFlowSinkTargetApi = SinkTargetApi;
private module ModelPrintingInput implements ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "df-generated" }
}
module Printing = ModelPrinting<ModelPrintingInput>;
/**
* Holds if `c` is a relevant content kind, where the underlying type is relevant.
*/
private predicate isRelevantTypeInContent(DataFlow::ContentSet c) {
isRelevantType(getUnderlyingContentType(c))
}
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlowPrivate::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
/**
* Gets the MaD string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) }
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data-flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and
state.(TaintRead).getStep() = 0
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::ContentSet c |
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
)
or
exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
p.getEnclosingCallable() = api and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asLiftedTaintModel(api, input, output)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
int accessPathLimit() { result = 2 }
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
or
i > 0 and result = getContent(tail, i - 1)
)
}
/**
* Gets the MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i)
}
/**
* Gets the MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i desc)
}
/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
mentionsField(tail) or isField(head)
)
}
private predicate apiFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to a combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of an objects internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(string input, string output |
exists(
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
|
apiFlow(this, p, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
)
) <= 2 * this.getNumberOfParameters() + 3
}
}
pragma[nomagic]
private predicate apiContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
head = path.getHead() and
tail = path.getTail()
|
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
}
/**
* A module containing predicates for validating access paths containing content sets
* that translates into synthetic fields, when used for generated summary models.
*/
private module AccessPathSyntheticValidation {
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`).
*/
private predicate step(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
p.getType() = t1 and
returnNodeExt.getType() = t2 and
apiContentFlow(_, p, read, returnNodeExt, store, _)
)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
*
* Step A -> Synth.
*/
private predicate synthPathEntry(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
not hasSyntheticContent(read) and
hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` has synthetic content
* and `store` does not.
*
* Step Synth -> A.
*/
private predicate synthPathExit(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
not hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists a path of steps from `read` to an exit.
*
* read ->* Synth -> A
*/
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
synthPathExit(t, read, _, _)
or
hasSyntheticContent(read) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(t, read, midType, mid) and
reachesSynthExit(midType, mid.reverse())
)
}
/**
* Holds if there exists a path of steps from an entry to `store`.
*
* A -> Synth ->* store
*/
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
synthPathEntry(_, _, t, store)
or
hasSyntheticContent(store) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(midType, mid, t, store) and
synthEntryReaches(midType, mid.reverse())
)
}
/**
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
* contain content that will be translated into a synthetic field, when being used in
* a MaD summary model, and if there is a range of APIs, such that
* when chaining their flow access paths, there exists access paths `A` and `B` where
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
* be translated into a synthetic field.
*
* This is needed because we don't want to include summaries that reads from or
* stores into a "dead" synthetic field.
*
* Example:
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
* `setX`, which gets and sets a private field `X` on `t`.
* This would lead to the following content flows
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
* As the reads and stores are on synthetic fields we should only make summaries
* if both of these methods exist.
*/
pragma[nomagic]
predicate acceptReadStore(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
or
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
private J::Callable liftedImpl(J::Callable m) {
(
result = getARelevantOverride(m)
or
synthEntryReaches(t1, store0) and
step(t1, read, t2, store) and
reachesSynthExit(t2, store.reverse())
)
result = m and relevant(m)
) and
not exists(getARelevantOverride(result))
}
}
/**
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
* Flow is considered relevant,
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
* the synthetic content is "live" on the relevant declaring type.
*/
private predicate apiRelevantContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath store, boolean preservesValue
) {
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
(
not hasSyntheticContent(read) and not hasSyntheticContent(store)
or
AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store)
)
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable()
}
pragma[nomagic]
private predicate captureContentFlow0(
ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue,
boolean lift
) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads,
PropagateContentFlow::AccessPath stores
|
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
)
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable()
}
/**
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
* the return value or a parameter).
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
exists(string input, string output, boolean lift, boolean preservesValue |
captureContentFlow0(api, input, output, _, lift) and
preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and
result = Printing::asModel(api, input, output, preservesValue, lift)
)
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable()
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
ExternalFlow::sourceNode(source, kind)
predicate isUninterestingForDataFlowModels(Callable api) {
api.getDeclaringType() instanceof J::Interface and not exists(api.getBody())
}
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() { not hasManualSourceModel(this) }
}
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
Callable lift() { result = lift }
predicate isRelevant() { relevant(this) }
}
private string isExtensible(Callable c) {
if c.getDeclaringType().isFinal() then result = "false" else result = "true"
}
/**
* Holds if the callable `c` is in package `package`
* and is a member of `type`.
*/
private predicate qualifiedName(Callable c, string package, string type) {
exists(RefType t | t = c.getDeclaringType() |
package = t.getCompilationUnit().getPackage().getName() and
type = t.getErasure().(J::RefType).getNestedName()
)
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi
predicate isRelevantType(Type t) {
not t instanceof J::TypeClass and
not t instanceof J::EnumType and
not t instanceof J::PrimitiveType and
not t instanceof J::BoxedType and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(J::Array).getElementType() instanceof J::PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(J::Array).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType())
)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
Type getUnderlyingContentType(DataFlow::ContentSet c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
string qualifierString() { result = "Argument[this]" }
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(DataFlow::Node source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = Printing::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A dataflow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi
string parameterAccess(J::Parameter p) {
if
p.getType() instanceof J::Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerFlow::ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind))
}
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.getType() and not isRelevantType(t))
class InstanceParameterNode = DataFlow::InstanceParameterNode;
bindingset[c]
string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos))
or
sinkModelSanitizer(node)
result = qualifierString() and pos = -1
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = parameterContentAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable()
}
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) {
node.asExpr().(J::ThisAccess).isOwnInstanceAccess()
}
predicate sinkModelSanitizer(DataFlow::Node node) {
// exclude variable capture jump steps
exists(Ssa::SsaImplicitInit closure |
closure.captures(_) and
node.asExpr() = closure.getAFirstUse()
)
}
predicate apiSource(DataFlow::Node source) {
(
source.asExpr().(J::FieldAccess).isOwnFieldAccess() or
source instanceof DataFlow::ParameterNode
) and
exists(J::RefType t |
t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and
not t instanceof J::TypeObject and
t.isPublic()
)
}
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
string getInputArgument(DataFlow::Node source) {
exists(int pos |
source.(DataFlow::ParameterNode).isParameterOf(_, pos) and
if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString()
)
or
source.asExpr() instanceof J::FieldAccess and
result = qualifierString()
}
bindingset[kind]
predicate isRelevantSinkKind(string kind) {
not kind = "log-injection" and
not kind.matches("regex-use%") and
not kind = "file-content-store"
}
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent = DataFlowPrivate::containerContent/1;
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
not exists(DataFlow::Content f |
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
)
}
predicate isField(DataFlow::ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and
f = c.(DataFlowUtil::FieldContent).getField() and
result = f.getQualifiedName()
)
or
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
}
string printContent(DataFlow::ContentSet c) {
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
result = "Field[" + f.getQualifiedName() + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
or
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
or
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
}
predicate partialModel(
Callable api, string package, string type, string extensible, string name, string parameters
) {
qualifiedName(api, package, type) and
extensible = isExtensible(api) and
name = api.getName() and
parameters = ExternalFlow::paramsString(api)
}
predicate sourceNode = ExternalFlow::sourceNode/2;
predicate sinkNode = ExternalFlow::sinkNode/2;
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(DataFlow::Node src, DataFlow::Node sink, string kind |
PropagateToSink::flow(src, sink) and
ExternalFlow::sinkNode(sink, kind) and
api = src.getEnclosingCallable() and
result = Printing::asSinkModel(api, asInputArgument(src), kind)
)
}
import MakeModelGenerator<Location, JavaDataFlow, JavaTaintTracking, ModelGeneratorInput>

View File

@@ -1,11 +1,11 @@
private import java as J
private import codeql.mad.modelgenerator.ModelPrinting
private import CaptureModelsSpecific as Specific
private import codeql.mad.modelgenerator.internal.ModelPrinting
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private module ModelPrintingLang implements ModelPrintingLangSig {
class Callable = J::Callable;
predicate partialModel = Specific::partialModel/6;
predicate partialModel = ModelGeneratorInput::partialModel/6;
}
import ModelPrintingImpl<ModelPrintingLang>

View File

@@ -1,380 +0,0 @@
/**
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
private import java as J
private import semmle.code.java.dataflow.internal.DataFlowPrivate
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.dataflow.internal.ModelExclusions
private import semmle.code.java.dataflow.DataFlow as Df
private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf
private import semmle.code.java.dataflow.SSA as Ssa
private import semmle.code.java.dataflow.TaintTracking as Tt
import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch
module DataFlow = Df::DataFlow;
module ContentDataFlow = Cdf::ContentDataFlow;
module TaintTracking = Tt::TaintTracking;
class Type = J::Type;
class Unit = J::Unit;
class Callable = J::Callable;
class ContentSet = DataFlowUtil::ContentSet;
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%")
}
private predicate relevant(Callable api) {
api.isPublic() and
api.getDeclaringType().isPublic() and
api.fromSource() and
not isUninterestingForModels(api) and
not isInfrequentlyUsed(api.getCompilationUnit())
}
private J::Method getARelevantOverride(J::Method m) {
result = m.getAnOverride() and
relevant(result) and
// Other exclusions for overrides.
not m instanceof J::ToStringMethod
}
/**
* Gets the super implementation of `m` if it is relevant.
* If such a super implementations does not exist, returns `m` if it is relevant.
*/
private J::Callable liftedImpl(J::Callable m) {
(
result = getARelevantOverride(m)
or
result = m and relevant(m)
) and
not exists(getARelevantOverride(result))
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable()
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable()
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable()
}
/**
* Holds if it is irrelevant to generate models for `api` based on data flow analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForDataFlowModels(Callable api) {
api.getDeclaringType() instanceof J::Interface and not exists(api.getBody())
}
/**
* A class of callables that are potentially relevant for generating source or
* sink models.
*/
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
/**
* A class of callables that are potentially relevant for generating sink models.
*/
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
/**
* A class of callables that are potentially relevant for generating source models.
*/
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() { not hasManualSourceModel(this) }
}
/**
* Holds if it is irrelevant to generate models for `api` based on type-based analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForTypeBasedFlowModels(Callable api) { none() }
/**
* A class of callables that are potentially relevant for generating summary or
* neutral models.
*
* In the Standard library and 3rd party libraries it is the callables (or callables that have a
* super implementation) that can be called from outside the library itself.
*/
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
/**
* Gets the callable that a model will be lifted to.
*/
Callable lift() { result = lift }
/**
* Holds if this callable is relevant in terms of generating models.
*/
predicate isRelevant() { relevant(this) }
}
private string isExtensible(Callable c) {
if c.getDeclaringType().isFinal() then result = "false" else result = "true"
}
/**
* Holds if the callable `c` is in package `package`
* and is a member of `type`.
*/
private predicate qualifiedName(Callable c, string package, string type) {
exists(RefType t | t = c.getDeclaringType() |
package = t.getCompilationUnit().getPackage().getName() and
type = t.getErasure().(J::RefType).getNestedName()
)
}
predicate partialModel(
Callable api, string package, string type, string extensible, string name, string parameters
) {
qualifiedName(api, package, type) and
extensible = isExtensible(api) and
name = api.getName() and
parameters = ExternalFlow::paramsString(api)
}
predicate isPrimitiveTypeUsedForBulkData(J::Type t) {
t.hasName(["byte", "char", "Byte", "Character"])
}
/**
* Holds for type `t` for fields that are relevant as an intermediate
* read or write step in the data flow analysis.
*/
predicate isRelevantType(J::Type t) {
not t instanceof J::TypeClass and
not t instanceof J::EnumType and
not t instanceof J::PrimitiveType and
not t instanceof J::BoxedType and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(J::Array).getElementType() instanceof J::PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(J::Array).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType())
)
}
/**
* Gets the underlying type of the content `c`.
*/
J::Type getUnderlyingContentType(DataFlow::Content c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
/**
* Gets the MaD string representation of the qualifier.
*/
string qualifierString() { result = "Argument[this]" }
/**
* Gets the MaD string representation of the parameter `p`.
*/
string parameterAccess(J::Parameter p) {
if
p.getType() instanceof J::Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerFlow::ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlow::InstanceParameterNode;
class ParameterPosition = DataFlowDispatch::ParameterPosition;
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
bindingset[c]
string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c` for content flow.
*/
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = parameterContentAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
/**
* Gets the enclosing callable of `ret`.
*/
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable()
}
/**
* Holds if `node` is an own instance access.
*/
predicate isOwnInstanceAccessNode(ReturnNode node) {
node.asExpr().(J::ThisAccess).isOwnInstanceAccess()
}
predicate sinkModelSanitizer(DataFlow::Node node) {
// exclude variable capture jump steps
exists(Ssa::SsaImplicitInit closure |
closure.captures(_) and
node.asExpr() = closure.getAFirstUse()
)
}
/**
* Holds if `source` is an api entrypoint relevant for creating sink models.
*/
predicate apiSource(DataFlow::Node source) {
(
source.asExpr().(J::FieldAccess).isOwnFieldAccess() or
source instanceof DataFlow::ParameterNode
) and
exists(J::RefType t |
t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and
not t instanceof J::TypeObject and
t.isPublic()
)
}
/**
* Holds if it is not relevant to generate a source model for `api`, even
* if flow is detected from a node within `source` to a sink within `api`.
*/
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgumentSpecific(DataFlow::Node source) {
exists(int pos |
source.(DataFlow::ParameterNode).isParameterOf(_, pos) and
if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString()
)
or
source.asExpr() instanceof J::FieldAccess and
result = qualifierString()
}
/**
* Holds if `kind` is a relevant sink kind for creating sink models.
*/
bindingset[kind]
predicate isRelevantSinkKind(string kind) {
not kind = "log-injection" and
not kind.matches("regex-use%") and
not kind = "file-content-store"
}
/**
* Holds if `kind` is a relevant source kind for creating source models.
*/
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent = DataFlowPrivate::containerContent/1;
/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
not exists(DataFlow::Content f |
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
)
}
/**
* Holds if the content set `c` is a field or a synthetic field.
*/
predicate isField(ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}
/**
* Gets the MaD synthetic name string representation for the content set `c`, if any.
*/
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and
f = c.(DataFlowUtil::FieldContent).getField() and
result = f.getQualifiedName()
)
or
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
}
/**
* Gets the MaD string representation of the content set `c`.
*/
string printContent(ContentSet c) {
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
result = "Field[" + f.getQualifiedName() + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
or
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
or
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
}

View File

@@ -1,84 +0,0 @@
private import CaptureModels
/**
* Capture fluent APIs that return `this`.
* Example of a fluent API:
* ```java
* public class Foo {
* public Foo someAPI() {
* // some side-effect
* return this;
* }
* }
* ```
*
* Capture APIs that transfer taint from an input parameter to an output return
* value or parameter.
* Allows a sequence of read steps followed by a sequence of store steps.
*
* Examples:
*
* ```java
* public class Foo {
* private String tainted;
*
* public String returnsTainted() {
* return tainted;
* }
*
* public void putsTaintIntoParameter(List<String> foo) {
* foo.add(tainted);
* }
* }
* ```
* Captured Models:
* ```
* p;Foo;true;returnsTainted;;Argument[this];ReturnValue;taint;df-generated
* p;Foo;true;putsTaintIntoParameter;(List);Argument[this];Argument[0];taint;df-generated
* ```
*
* ```java
* public class Foo {
* private String tainted;
* public void doSomething(String input) {
* tainted = input;
* }
* ```
* Captured Model:
* ```p;Foo;true;doSomething;(String);Argument[0];Argument[this];taint;df-generated```
*
* ```java
* public class Foo {
* public String returnData(String tainted) {
* return tainted.substring(0,10)
* }
* }
* ```
* Captured Model:
* ```p;Foo;true;returnData;;Argument[0];ReturnValue;taint;df-generated```
*
* ```java
* public class Foo {
* public void addToList(String tainted, List<String> foo) {
* foo.add(tainted);
* }
* }
* ```
* Captured Model:
* ```p;Foo;true;addToList;;Argument[0];Argument[1];taint;df-generated```
*/
string captureFlow(DataFlowSummaryTargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a summary model that applies to `api`.
*/
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and
api.isRelevant() and
result = Printing::asNeutralSummaryModel(api)
}

View File

@@ -1,7 +1,8 @@
private import java
private import semmle.code.java.Collections
private import semmle.code.java.dataflow.internal.ContainerFlow
private import CaptureModelsSpecific as Specific
private import CaptureModels as CaptureModels
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private import CaptureModelsPrinting
/**
@@ -81,7 +82,7 @@ private predicate localTypeParameter(Callable callable, TypeVariable tv) {
private string getAccessPath(Type t) {
if
t instanceof Array and
not Specific::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
not CaptureModels::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
then result = ".ArrayElement"
else
if t instanceof ContainerType or t instanceof IterableClass
@@ -134,7 +135,7 @@ private string implicit(Callable callable, TypeVariable tv) {
then access = getAccessPath(decl)
else access = getSyntheticField(tv)
|
result = Specific::qualifierString() + access
result = ModelGeneratorInput::qualifierString() + access
)
}
@@ -286,7 +287,7 @@ private predicate output(Callable callable, TypeVariable tv, string output) {
module ModelPrintingInput implements ModelPrintingSig {
class SummaryApi = TypeBasedFlowTargetApi;
class SourceOrSinkApi = Specific::SourceOrSinkTargetApi;
class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi;
string getProvenance() { result = "tb-generated" }
}
@@ -297,9 +298,7 @@ private module Printing = ModelPrinting<ModelPrintingInput>;
* A class of callables that are relevant generating summaries for based
* on the Theorems for Free approach.
*/
class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) }
class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi {
/**
* Gets the string representation of all type based summaries for `this`
* inspired by the Theorems for Free approach.

View File

@@ -3,7 +3,7 @@ import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {
string getCapturedModel(Callable c) { result = captureContentFlow(c) }
string getCapturedModel(Callable c) { result = ContentSensitive::captureFlow(c) }
string getKind() { result = "contentbased-summary" }
}

View File

@@ -1,5 +1,5 @@
import java
import utils.modelgenerator.internal.CaptureSummaryFlowQuery
import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {

View File

@@ -1,5 +1,5 @@
import java
import utils.modelgenerator.internal.CaptureSummaryFlowQuery
import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest
module InlineMadTestConfig implements InlineMadTestConfigSig {

View File

@@ -0,0 +1,939 @@
/**
* INTERNAL: Do not use.
*
* Provides classes and predicates related to capturing summary, source,
* and sink models of the Standard or a 3rd party library.
*/
private import codeql.dataflow.DataFlow
private import codeql.dataflow.TaintTracking as Tt
private import codeql.dataflow.internal.ContentDataFlowImpl
private import codeql.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import codeql.util.Location
private import ModelPrinting
/**
* Provides language-specific model generator parameters.
*/
signature module ModelGeneratorInputSig<LocationSig Location, InputSig<Location> Lang> {
/**
* A Type.
*/
class Type;
/**
* A Parameter.
*/
class Parameter;
/**
* A Callable.
*/
class Callable {
/**
* Gets the number of parameters of this callable.
*/
int getNumberOfParameters();
/**
* Gets a string representation of this callable.
*/
string toString();
}
/**
* A node.
*/
class NodeExtended extends Lang::Node {
/**
* Gets the type of this node.
*/
Type getType();
/**
* Gets the enclosing callable of this node.
*/
Callable getEnclosingCallable();
/**
* Gets the enclosing callable of this node, when considered as an expression.
*/
Callable getAsExprEnclosingCallable();
/**
* Gets the parameter corresponding to this node, if any.
*/
Parameter asParameter();
}
/**
* A class of callables that are potentially relevant for generating summary or
* neutral models.
*
* In the Standard library and 3rd party libraries it is the callables (or callables that have a
* super implementation) that can be called from outside the library itself.
*/
class SummaryTargetApi extends Callable {
/**
* Gets the callable that a model will be lifted to.
*
* The lifted callable is relevant in terms of model
* generation (this is ensured by `liftedImpl`).
*/
Callable lift();
/**
* Holds if `this` is relevant in terms of model generation.
*/
predicate isRelevant();
}
/**
* A class of callables that are potentially relevant for generating source or
* sink models.
*/
class SourceOrSinkTargetApi extends Callable;
/**
* A class of callables that are potentially relevant for generating source models.
*/
class SourceTargetApi extends SourceOrSinkTargetApi;
/**
* A class of callables that are potentially relevant for generating sink models.
*/
class SinkTargetApi extends SourceOrSinkTargetApi;
/**
* An instance parameter node.
*/
class InstanceParameterNode extends Lang::Node;
/**
* Holds for type `t` for fields that are relevant as an intermediate
* read or write step in the data flow analysis.
* That is, flow through any data-flow node that does not have a relevant type
* will be excluded.
*/
predicate isRelevantType(Type t);
/**
* Gets the underlying type of the content `c`.
*/
Type getUnderlyingContentType(Lang::ContentSet c);
/**
* Gets the MaD string representation of the qualifier.
*/
string qualifierString();
/**
* Gets the MaD string representation of the parameter `p`.
*/
string parameterAccess(Parameter p);
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(Parameter p);
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
bindingset[c]
string paramReturnNodeAsOutput(Callable c, Lang::ParameterPosition p);
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c` when used in content flow.
*/
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, Lang::ParameterPosition pos);
/**
* Gets the enclosing callable of `ret`.
*/
Callable returnNodeEnclosingCallable(Lang::Node node);
/**
* Holds if `node` is an own instance access.
*/
predicate isOwnInstanceAccessNode(Lang::ReturnNode node);
/**
* Holds if `node` is a sanitizer for sink model construction.
*/
predicate sinkModelSanitizer(Lang::Node node);
/**
* Holds if `source` is an api entrypoint relevant for creating sink models.
*/
predicate apiSource(Lang::Node source);
/**
* Gets the MaD input string representation of `source`.
*/
string getInputArgument(Lang::Node source);
/**
* Holds if it is not relevant to generate a source model for `api`, even
* if flow is detected from a node within `source` to a sink within `api`.
*/
bindingset[sourceEnclosing, api]
predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api);
/**
* Holds if `kind` is a relevant sink kind for creating sink models.
*/
bindingset[kind]
predicate isRelevantSinkKind(string kind);
/**
* Holds if `kind` is a relevant source kind for creating source models.
*/
bindingset[kind]
predicate isRelevantSourceKind(string kind);
/**
* Holds if the the content `c` is a container.
*/
predicate containerContent(Lang::ContentSet c);
/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo);
/**
* Holds if the content set `c` is field like.
*/
predicate isField(Lang::ContentSet c);
/**
* Gets the MaD synthetic name string representation for the content set `c`, if any.
*/
string getSyntheticName(Lang::ContentSet c);
/**
* Gets the MaD string representation of the content set `c`.
*/
string printContent(Lang::ContentSet c);
/**
* Holds if it is irrelevant to generate models for `api` based on data flow analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForDataFlowModels(Callable api);
/**
* Holds if `namespace`, `type`, `extensible`, `name` and `parameters` are string representations
* for the corresponding MaD columns for `api`.
*/
predicate partialModel(
Callable api, string namespace, string type, string extensible, string name, string parameters
);
/**
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
predicate sourceNode(Lang::Node node, string kind);
/**
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
predicate sinkNode(Lang::Node node, string kind);
}
module MakeModelGenerator<
LocationSig Location, InputSig<Location> Lang, Tt::InputSig<Location, Lang> TaintLang,
ModelGeneratorInputSig<Location, Lang> ModelGeneratorInput>
{
private module DataFlow {
import Lang
import DataFlowMake<Location, Lang>
import DataFlowImplCommon::MakeImplCommon<Location, Lang>
}
private import ModelGeneratorInput
private import Tt::TaintFlowMake<Location, Lang, TaintLang> as TaintTracking
private module ModelPrintingLang implements ModelPrintingLangSig {
class Callable = ModelGeneratorInput::Callable;
predicate partialModel = ModelGeneratorInput::partialModel/6;
}
private import ModelPrintingImpl<ModelPrintingLang> as Printing
final private class NodeExtendedFinal = NodeExtended;
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
private class ReturnNodeExt extends NodeExtendedFinal {
private DataFlow::ReturnKindExt kind;
ReturnNodeExt() {
kind = DataFlow::getValueReturnPosition(this).getKind() or
kind = DataFlow::getParamReturnPosition(this, _).getKind()
}
/**
* Gets the kind of the return node.
*/
DataFlow::ReturnKindExt getKind() { result = kind }
}
bindingset[c]
private signature string printCallableParamSig(Callable c, DataFlow::ParameterPosition p);
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlow::ValueReturnKind and
result = "ReturnValue"
or
exists(DataFlow::ParameterPosition pos |
pos = node.getKind().(DataFlow::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}
string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
final private class SummaryTargetApiFinal = SummaryTargetApi;
class DataFlowSummaryTargetApi extends SummaryTargetApiFinal {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
class DataFlowSourceTargetApi = SourceTargetApi;
class DataFlowSinkTargetApi = SinkTargetApi;
private module ModelPrintingInput implements Printing::ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "df-generated" }
}
module ModelPrinting = Printing::ModelPrinting<ModelPrintingInput>;
/**
* Holds if `c` is a relevant content kind, where the underlying type is relevant.
*/
private predicate isRelevantTypeInContent(DataFlow::ContentSet c) {
isRelevantType(getUnderlyingContentType(c))
}
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlow::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
/**
* Gets the MaD string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.(NodeExtended).asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgument(NodeExtended source) { result = getInputArgument(source) }
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
private string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data-flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi and
state.(TaintRead).getStep() = 0
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.(NodeExtended).getAsExprEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::ContentSet c |
DataFlow::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
)
or
exists(DataFlow::ContentSet c |
DataFlow::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
p.(NodeExtended).getEnclosingCallable() = api and
returnNodeExt.getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = ModelPrinting::asLiftedTaintModel(api, input, output)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
private string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to the
* return value or parameter or if `api` is a fluent API.
*/
string captureFlow(DataFlowSummaryTargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a summary model that applies to `api`.
*/
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 |
exists(captureFlow(api0)) and api0.lift() = api.lift()
) and
api.isRelevant() and
result = ModelPrinting::asNeutralSummaryModel(api)
}
/**
* Provides classes and predicates related to capturing summary models
* based on content data flow.
*/
module ContentSensitive {
private import MakeImplContentDataFlow<Location, Lang> as ContentDataFlow
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isSink(DataFlow::Node sink) {
sink.(ReturnNodeExt).getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
int accessPathLimit() { result = 2 }
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
private string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.(NodeExtended).asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
or
i > 0 and result = getContent(tail, i - 1)
)
}
/**
* Gets the MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i)
}
/**
* Gets the MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i desc)
}
/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
mentionsField(tail) or isField(head)
)
}
private predicate apiFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.(NodeExtended).getEnclosingCallable() = api
}
/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to a combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of an objects internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(string input, string output |
exists(
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
|
apiFlow(this, p, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
)
) <= 2 * this.getNumberOfParameters() + 3
}
}
pragma[nomagic]
private predicate apiContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.(NodeExtended).getEnclosingCallable() = api
}
/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, DataFlow::ContentSet head |
head = path.getHead() and
tail = path.getTail()
|
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
}
/**
* A module containing predicates for validating access paths containing content sets
* that translates into synthetic fields, when used for generated summary models.
*/
private module AccessPathSyntheticValidation {
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`).
*/
private predicate step(
Type t1, PropagateContentFlow::AccessPath read, Type t2,
PropagateContentFlow::AccessPath store
) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
p.(NodeExtended).getType() = t1 and
returnNodeExt.getType() = t2 and
apiContentFlow(_, p, read, returnNodeExt, store, _)
)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
*
* Step A -> Synth.
*/
private predicate synthPathEntry(
Type t1, PropagateContentFlow::AccessPath read, Type t2,
PropagateContentFlow::AccessPath store
) {
not hasSyntheticContent(read) and
hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` has synthetic content
* and `store` does not.
*
* Step Synth -> A.
*/
private predicate synthPathExit(
Type t1, PropagateContentFlow::AccessPath read, Type t2,
PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
not hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists a path of steps from `read` to an exit.
*
* read ->* Synth -> A
*/
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
synthPathExit(t, read, _, _)
or
hasSyntheticContent(read) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(t, read, midType, mid) and
reachesSynthExit(midType, mid.reverse())
)
}
/**
* Holds if there exists a path of steps from an entry to `store`.
*
* A -> Synth ->* store
*/
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
synthPathEntry(_, _, t, store)
or
hasSyntheticContent(store) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(midType, mid, t, store) and
synthEntryReaches(midType, mid.reverse())
)
}
/**
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
* contain content that will be translated into a synthetic field, when being used in
* a MaD summary model, and if there is a range of APIs, such that
* when chaining their flow access paths, there exists access paths `A` and `B` where
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
* be translated into a synthetic field.
*
* This is needed because we don't want to include summaries that reads from or
* stores into an "internal" synthetic field.
*
* Example:
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
* `setX`, which gets and sets a private field `X` on `t`.
* This would lead to the following content flows
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
* As the reads and stores are on synthetic fields we should only make summaries
* if both of these methods exist.
*/
pragma[nomagic]
predicate acceptReadStore(
Type t1, PropagateContentFlow::AccessPath read, Type t2,
PropagateContentFlow::AccessPath store
) {
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
or
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
or
synthEntryReaches(t1, store0) and
step(t1, read, t2, store) and
reachesSynthExit(t2, store.reverse())
)
}
}
/**
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
* Flow is considered relevant,
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
* the synthetic content is "live" on the relevant declaring type.
*/
private predicate apiRelevantContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath store, boolean preservesValue
) {
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
(
not hasSyntheticContent(read) and not hasSyntheticContent(store)
or
AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read,
returnNodeExt.getType(), store)
)
}
pragma[nomagic]
private predicate captureFlow0(
ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue,
boolean lift
) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores
|
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
)
}
/**
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
* the return value or a parameter).
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureFlow(ContentDataFlowSummaryTargetApi api) {
exists(string input, string output, boolean lift, boolean preservesValue |
captureFlow0(api, input, output, _, lift) and
preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and
result = ModelPrinting::asModel(api, input, output, preservesValue, lift)
)
}
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
sourceNode(source, kind)
)
}
predicate isSink(DataFlow::Node sink) {
sink.(ReturnNodeExt).getEnclosingCallable() instanceof DataFlowSourceTargetApi
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(NodeExtended source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = ModelPrinting::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A dataflow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and
source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSinkTargetApi
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind))
}
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t))
or
sinkModelSanitizer(node)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(NodeExtended src, NodeExtended sink, string kind |
PropagateToSink::flow(src, sink) and
sinkNode(sink, kind) and
api = src.getEnclosingCallable() and
result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind)
)
}
}

View File

@@ -4,4 +4,5 @@ groups: shared
library: true
dependencies:
codeql/util: ${workspace}
codeql/dataflow: ${workspace}
warnOnImplicitThis: true