Java: Initial implementation of content based model generation.

This commit is contained in:
Michael Nebel
2024-08-26 14:58:56 +02:00
parent 4bdf21b022
commit 6365e5edff
4 changed files with 183 additions and 9 deletions

View File

@@ -0,0 +1,13 @@
/**
* @name Capture field based summary models.
* @description Finds applicable field based summary models to be used by other queries.
* @kind diagnostic
* @id java/utils/modelgenerator/fieldbased-summary-models
* @tags modelgenerator
*/
import internal.CaptureModels
from DataFlowSummaryTargetApi api, string flow
where flow = captureContentFlow(api)
select flow order by flow

View File

@@ -18,17 +18,35 @@ private class ReturnNodeExt extends DataFlow::Node {
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
}
string getOutput() {
kind instanceof DataFlowImplCommon::ValueReturnKind and
/**
* Gets the kind of the return node.
*/
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
}
bindingset[c]
private signature string printCallableParamSig(Callable c, ParameterPosition p);
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = kind.(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = paramReturnNodeAsOutput(returnNodeEnclosingCallable(this), pos)
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}
string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
class DataFlowSummaryTargetApi extends SummaryTargetApi {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
@@ -71,7 +89,8 @@ private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
private predicate isRelevantContent(DataFlow::ContentSet c) {
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
@@ -85,6 +104,16 @@ string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
@@ -170,7 +199,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
) {
exists(DataFlow::ContentSet c |
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent(c) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
@@ -180,7 +209,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
or
exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(node1, c, node2) and
isRelevantContent(c) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
@@ -196,6 +225,9 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
@@ -203,7 +235,7 @@ string captureThroughFlow0(
p.getEnclosingCallable() = api and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = returnNodeExt.getOutput() and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asTaintModel(api, input, output)
)
@@ -219,6 +251,69 @@ string captureThroughFlow(DataFlowSummaryTargetApi api) {
)
}
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
int accessPathLimit() { result = 2 }
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail() and
result = "." + printContent(head) + printStoreAccessPath(tail)
)
}
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail() and
result = printReadAccessPath(tail) + "." + printContent(head)
)
}
string captureContentFlow(DataFlowSummaryTargetApi api) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
boolean preservesValue
|
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
result = Printing::asModel(api, input, output, preservesValue)
)
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
@@ -261,7 +356,7 @@ string captureSource(DataFlowSourceTargetApi api) {
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = Printing::asSourceModel(api, sink.getOutput(), kind)
result = Printing::asSourceModel(api, getOutput(sink), kind)
)
}

View File

@@ -4,10 +4,12 @@
private import java as J
private import semmle.code.java.dataflow.internal.DataFlowPrivate
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.dataflow.internal.ModelExclusions
private import semmle.code.java.dataflow.DataFlow as Df
private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf
private import semmle.code.java.dataflow.SSA as Ssa
private import semmle.code.java.dataflow.TaintTracking as Tt
import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
@@ -17,6 +19,8 @@ import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch
module DataFlow = Df::DataFlow;
module ContentDataFlow = Cdf::ContentDataFlow;
module TaintTracking = Tt::TaintTracking;
class Type = J::Type;
@@ -25,6 +29,8 @@ class Unit = J::Unit;
class Callable = J::Callable;
class ContentSet = DataFlowUtil::ContentSet;
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%")
@@ -217,6 +223,12 @@ string parameterAccess(J::Parameter p) {
else result = "Argument[" + p.getPosition() + "]"
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlow::InstanceParameterNode;
class ParameterPosition = DataFlowDispatch::ParameterPosition;
@@ -232,6 +244,17 @@ string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
result = qualifierString() and pos = -1
}
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c` for content flow.
*/
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = parameterContentAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
/**
* Gets the enclosing callable of `ret`.
*/
@@ -305,3 +328,34 @@ bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent = DataFlowPrivate::containerContent/1;
/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
not exists(DataFlow::Content f |
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
)
}
/**
* Gets the MaD string representation of the contentset `c`.
*/
string printContent(ContentSet c) {
exists(Field f, string name |
f = c.(DataFlowUtil::FieldContent).getField() and name = f.getQualifiedName()
|
if f.isPublic() then result = "Field[" + name + "]" else result = "SyntheticField[" + name + "]"
)
or
result = "SyntheticField[" + c.(DataFlowUtil::SyntheticFieldContent).getField() + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
or
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
or
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
}