Merge pull request #8329 from michaelnebel/csharp/model-generator

C#: Capture Summary models.
This commit is contained in:
Michael Nebel
2022-03-14 16:10:05 +01:00
committed by GitHub
18 changed files with 807 additions and 277 deletions

View File

@@ -416,3 +416,13 @@ private class MyConsistencyConfiguration extends Consistency::ConsistencyConfigu
n.getType() instanceof ImmutableType or n instanceof ImplicitVarargsArray
}
}
/**
* Holds if the the content `c` is a container.
*/
predicate containerContent(Content c) {
c instanceof ArrayContent or
c instanceof CollectionContent or
c instanceof MapKeyContent or
c instanceof MapValueContent
}

View File

@@ -62,13 +62,6 @@ private module Cached {
)
}
private predicate containerContent(DataFlow::Content c) {
c instanceof DataFlow::ArrayContent or
c instanceof DataFlow::CollectionContent or
c instanceof DataFlow::MapKeyContent or
c instanceof DataFlow::MapValueContent
}
/**
* Holds if taint can flow in one local step from `src` to `sink` excluding
* local data flow steps. That is, `src` and `sink` are likely to represent

View File

@@ -4,23 +4,12 @@
* @id java/utils/model-generator/summary-models
*/
import java
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.internal.DataFlowImplCommon
import semmle.code.java.dataflow.internal.DataFlowNodes
import semmle.code.java.dataflow.internal.DataFlowPrivate
import semmle.code.java.dataflow.InstanceAccess
import ModelGeneratorUtils
string captureFlow(TargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
private import CaptureSummaryModels
/**
* Capture fluent APIs that return `this`.
* Example of a fluent API:
* ```
* ```java
* public class Foo {
* public Foo someAPI() {
* // some side-effect
@@ -28,75 +17,14 @@ string captureFlow(TargetApi api) {
* }
* }
* ```
*/
string captureQualifierFlow(TargetApi api) {
exists(ReturnStmt rtn |
rtn.getEnclosingCallable() = api and
rtn.getResult().(ThisAccess).isOwnInstanceAccess()
) and
result = asValueModel(api, "Argument[-1]", "ReturnValue")
}
class TaintRead extends DataFlow::FlowState {
TaintRead() { this = "TaintRead" }
}
class TaintStore extends DataFlow::FlowState {
TaintStore() { this = "TaintStore" }
}
class ThroughFlowConfig extends TaintTracking::Configuration {
ThroughFlowConfig() { this = "ThroughFlowConfig" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof TargetApi and
state instanceof TaintRead
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink instanceof ReturnNodeExt and
not sink.(ReturnNode).asExpr().(ThisAccess).isOwnInstanceAccess() and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
override predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
exists(TypedContent tc |
store(node1, tc, node2, _) and
isRelevantContent(tc.getContent()) and
(state1 instanceof TaintRead or state1 instanceof TaintStore) and
state2 instanceof TaintStore
)
or
exists(DataFlow::Content c |
readStep(node1, c, node2) and
isRelevantContent(c) and
state1 instanceof TaintRead and
state2 instanceof TaintRead
)
}
override predicate isSanitizer(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
override DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
/**
*
* Capture APIs that transfer taint from an input parameter to an output return
* value or parameter.
* Allows a sequence of read steps followed by a sequence of store steps.
*
* Examples:
*
* ```
* ```java
* public class Foo {
* private String tainted;
*
@@ -109,13 +37,13 @@ class ThroughFlowConfig extends TaintTracking::Configuration {
* }
* }
* ```
* Captured Model:
* Captured Models:
* ```
* p;Foo;true;returnsTainted;;Argument[-1];ReturnValue;taint
* p;Foo;true;putsTaintIntoParameter;(List);Argument[-1];Argument[0];taint
* ```
*
* ```
* ```java
* public class Foo {
* private String tainted;
* public void doSomething(String input) {
@@ -123,9 +51,9 @@ class ThroughFlowConfig extends TaintTracking::Configuration {
* }
* ```
* Captured Model:
* `p;Foo;true;doSomething;(String);Argument[0];Argument[-1];taint`
* ```p;Foo;true;doSomething;(String);Argument[0];Argument[-1];taint```
*
* ```
* ```java
* public class Foo {
* public String returnData(String tainted) {
* return tainted.substring(0,10)
@@ -133,9 +61,9 @@ class ThroughFlowConfig extends TaintTracking::Configuration {
* }
* ```
* Captured Model:
* `p;Foo;true;returnData;;Argument[0];ReturnValue;taint`
* ```p;Foo;true;returnData;;Argument[0];ReturnValue;taint```
*
* ```
* ```java
* public class Foo {
* public void addToList(String tainted, List<String> foo) {
* foo.add(tainted);
@@ -143,20 +71,11 @@ class ThroughFlowConfig extends TaintTracking::Configuration {
* }
* ```
* Captured Model:
* `p;Foo;true;addToList;;Argument[0];Argument[1];taint`
* ```p;Foo;true;addToList;;Argument[0];Argument[1];taint```
*/
string captureThroughFlow(TargetApi api) {
exists(
ThroughFlowConfig config, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input,
string output
|
config.hasFlow(p, returnNodeExt) and
returnNodeExt.getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = returnNodeAsOutput(returnNodeExt) and
input != output and
result = asTaintModel(api, input, output)
)
string captureFlow(TargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
from TargetApi api, string flow

View File

@@ -0,0 +1,98 @@
/**
* Provides classes and predicates related to capturing summary models
* of the Standard or a 3rd party library.
*/
import CaptureSummaryModelsSpecific
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
string captureQualifierFlow(TargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = asValueModel(api, qualifierString(), "ReturnValue")
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends DataFlow::FlowState {
TaintRead() { this = "TaintRead" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends DataFlow::FlowState {
TaintStore() { this = "TaintStore" }
}
/**
* A TaintTracking Configuration used for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
class ThroughFlowConfig extends TaintTracking::Configuration {
ThroughFlowConfig() { this = "ThroughFlowConfig" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof TargetApi and
state instanceof TaintRead
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
override predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
exists(TypedContent tc |
store(node1, tc, node2, _) and
isRelevantContent(tc.getContent()) and
(state1 instanceof TaintRead or state1 instanceof TaintStore) and
state2 instanceof TaintStore
)
or
exists(DataFlow::Content c |
readStep(node1, c, node2) and
isRelevantContent(c) and
state1 instanceof TaintRead and
state2 instanceof TaintRead
)
}
override predicate isSanitizer(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
override DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow(TargetApi api) {
exists(
ThroughFlowConfig config, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input,
string output
|
config.hasFlow(p, returnNodeExt) and
returnNodeExt.getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = returnNodeAsOutput(returnNodeExt) and
input != output and
result = asTaintModel(api, input, output)
)
}

View File

@@ -0,0 +1,21 @@
/**
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
import java
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.internal.DataFlowImplCommon
import semmle.code.java.dataflow.internal.DataFlowNodes
import semmle.code.java.dataflow.internal.DataFlowPrivate
import semmle.code.java.dataflow.InstanceAccess
import ModelGeneratorUtils
Callable returnNodeEnclosingCallable(ReturnNodeExt ret) {
result = getNodeEnclosingCallable(ret).asCallable()
}
predicate isOwnInstanceAccessNode(ReturnNode node) {
node.asExpr().(ThisAccess).isOwnInstanceAccess()
}
string qualifierString() { result = "Argument[-1]" }

View File

@@ -1,143 +1,8 @@
import java
private import semmle.code.java.dataflow.ExternalFlow
private import semmle.code.java.dataflow.internal.ContainerFlow
private import semmle.code.java.dataflow.internal.DataFlowImplCommon
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.internal.DataFlowPrivate
Method superImpl(Method m) {
result = m.getAnOverride() and
not exists(result.getAnOverride()) and
not m instanceof ToStringMethod
}
class TargetApi extends Callable {
TargetApi() {
this.isPublic() and
this.fromSource() and
(
this.getDeclaringType().isPublic() or
superImpl(this).getDeclaringType().isPublic()
) and
isRelevantForModels(this)
}
}
/** DEPRECATED: Alias for TargetApi */
deprecated class TargetAPI = TargetApi;
private string isExtensible(RefType ref) {
if ref.isFinal() then result = "false" else result = "true"
}
predicate isRelevantForModels(Callable api) {
not isInTestFile(api.getCompilationUnit().getFile()) and
not isJdkInternal(api.getCompilationUnit()) and
not api instanceof MainMethod
}
private predicate isInTestFile(File file) {
file.getAbsolutePath().matches("%src/test/%") or
file.getAbsolutePath().matches("%/guava-tests/%") or
file.getAbsolutePath().matches("%/guava-testlib/%")
}
private predicate isJdkInternal(CompilationUnit cu) {
cu.getPackage().getName().matches("org.graalvm%") or
cu.getPackage().getName().matches("com.sun%") or
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%") or
cu.getPackage().getName().matches("sun%") or
cu.getPackage().getName().matches("jdk.%") or
cu.getPackage().getName().matches("java2d.%") or
cu.getPackage().getName().matches("build.tools.%") or
cu.getPackage().getName().matches("propertiesparser.%") or
cu.getPackage().getName().matches("org.jcp.%") or
cu.getPackage().getName().matches("org.w3c.%") or
cu.getPackage().getName().matches("org.ietf.jgss.%") or
cu.getPackage().getName().matches("org.xml.sax%") or
cu.getPackage().getName() = "compileproperties" or
cu.getPackage().getName() = "netscape.javascript" or
cu.getPackage().getName() = ""
}
bindingset[input, output]
string asTaintModel(TargetApi api, string input, string output) {
result = asSummaryModel(api, input, output, "taint")
}
bindingset[input, output]
string asValueModel(TargetApi api, string input, string output) {
result = asSummaryModel(api, input, output, "value")
}
bindingset[input, output, kind]
string asSummaryModel(TargetApi api, string input, string output, string kind) {
result =
asPartialModel(api) + input + ";" //
+ output + ";" //
+ kind
}
bindingset[input, kind]
string asSinkModel(TargetApi api, string input, string kind) {
result = asPartialModel(api) + input + ";" + kind
}
bindingset[output, kind]
string asSourceModel(TargetApi api, string output, string kind) {
result = asPartialModel(api) + output + ";" + kind
}
import ModelGeneratorUtilsSpecific
/**
* Computes the first 6 columns for CSV rows.
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private string asPartialModel(TargetApi api) {
result =
typeAsSummaryModel(api) + ";" //
+ isExtensible(bestTypeForModel(api)) + ";" //
+ api.getName() + ";" //
+ paramsString(api) + ";" //
+ /* ext + */ ";" //
}
/**
* Returns the appropriate type name for the model. Either the type
* declaring the method or the supertype introducing the method.
*/
private string typeAsSummaryModel(TargetApi api) { result = typeAsModel(bestTypeForModel(api)) }
private RefType bestTypeForModel(TargetApi api) {
if exists(superImpl(api))
then superImpl(api).fromSource() and result = superImpl(api).getDeclaringType()
else result = api.getDeclaringType()
}
private string typeAsModel(RefType type) {
result = type.getCompilationUnit().getPackage().getName() + ";" + type.nestedName()
}
predicate isRelevantType(Type t) {
not t instanceof TypeClass and
not t instanceof EnumType and
not t instanceof PrimitiveType and
not t instanceof BoxedType and
not t.(RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(Array).getElementType() instanceof PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
) and
(
not t.(Array).getElementType() instanceof BoxedType or
isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
) and
(
not t.(CollectionType).getElementType() instanceof BoxedType or
isPrimitiveTypeUsedForBulkData(t.(CollectionType).getElementType())
)
}
predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::Content f |
readStep(node1, f, node2) and
@@ -149,50 +14,58 @@ predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
else any()
)
or
exists(DataFlow::Content f | storeStep(node1, f, node2) |
f instanceof DataFlow::ArrayContent or
f instanceof DataFlow::CollectionContent or
f instanceof DataFlow::MapKeyContent or
f instanceof DataFlow::MapValueContent
)
exists(DataFlow::Content f | storeStep(node1, f, node2) | containerContent(f))
}
predicate isRelevantContent(DataFlow::Content f) {
isRelevantType(f.(DataFlow::FieldContent).getField().getType()) or
f instanceof DataFlow::ArrayContent or
f instanceof DataFlow::CollectionContent or
f instanceof DataFlow::MapKeyContent or
f instanceof DataFlow::MapValueContent
/**
* Holds if content `c` is either a field or synthetic field of a relevant type
* or a container like content.
*/
predicate isRelevantContent(DataFlow::Content c) {
isRelevantType(c.(DataFlow::FieldContent).getField().getType()) or
isRelevantType(c.(DataFlow::SyntheticFieldContent).getField().getType()) or
containerContent(c)
}
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.asParameter())
or
result = "Argument[-1]" and p instanceof DataFlow::InstanceParameterNode
/**
* Gets the summary model for `api` with `input`, `output` and `kind`.
*/
bindingset[input, output, kind]
string asSummaryModel(TargetApi api, string input, string output, string kind) {
result =
asPartialModel(api) + input + ";" //
+ output + ";" //
+ kind
}
string returnNodeAsOutput(ReturnNodeExt node) {
if node.getKind() instanceof ValueReturnKind
then result = "ReturnValue"
else
exists(int pos | pos = node.getKind().(ParamUpdateReturnKind).getPosition() |
result = parameterAccess(node.getEnclosingCallable().getParameter(pos))
or
result = "Argument[-1]" and pos = -1
)
/**
* Gets the value summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asValueModel(TargetApi api, string input, string output) {
result = asSummaryModel(api, input, output, "value")
}
string parameterAccess(Parameter p) {
if
p.getType() instanceof Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
/**
* Gets the taint summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asTaintModel(TargetApi api, string input, string output) {
result = asSummaryModel(api, input, output, "taint")
}
predicate isPrimitiveTypeUsedForBulkData(Type t) {
t.getName().regexpMatch("byte|char|Byte|Character")
/**
* Gets the sink model for `api` with `input` and `kind`.
*/
bindingset[input, kind]
string asSinkModel(TargetApi api, string input, string kind) {
result = asPartialModel(api) + input + ";" + kind
}
/**
* Gets the source model for `api` with `output` and `kind`.
*/
bindingset[output, kind]
string asSourceModel(TargetApi api, string output, string kind) {
result = asPartialModel(api) + output + ";" + kind
}

View File

@@ -0,0 +1,156 @@
import java
import semmle.code.java.dataflow.internal.DataFlowPrivate
import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.ExternalFlow
private import semmle.code.java.dataflow.internal.ContainerFlow
private import semmle.code.java.dataflow.internal.DataFlowImplCommon
Method superImpl(Method m) {
result = m.getAnOverride() and
not exists(result.getAnOverride()) and
not m instanceof ToStringMethod
}
private predicate isInTestFile(File file) {
file.getAbsolutePath().matches("%src/test/%") or
file.getAbsolutePath().matches("%/guava-tests/%") or
file.getAbsolutePath().matches("%/guava-testlib/%")
}
private predicate isJdkInternal(CompilationUnit cu) {
cu.getPackage().getName().matches("org.graalvm%") or
cu.getPackage().getName().matches("com.sun%") or
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%") or
cu.getPackage().getName().matches("sun%") or
cu.getPackage().getName().matches("jdk.%") or
cu.getPackage().getName().matches("java2d.%") or
cu.getPackage().getName().matches("build.tools.%") or
cu.getPackage().getName().matches("propertiesparser.%") or
cu.getPackage().getName().matches("org.jcp.%") or
cu.getPackage().getName().matches("org.w3c.%") or
cu.getPackage().getName().matches("org.ietf.jgss.%") or
cu.getPackage().getName().matches("org.xml.sax%") or
cu.getPackage().getName() = "compileproperties" or
cu.getPackage().getName() = "netscape.javascript" or
cu.getPackage().getName() = ""
}
predicate isRelevantForModels(Callable api) {
not isInTestFile(api.getCompilationUnit().getFile()) and
not isJdkInternal(api.getCompilationUnit()) and
not api instanceof MainMethod
}
/**
* A class of Callables that are relevant for generating summary, source and sinks models for.
*
* In the Standard library and 3rd party libraries it the Callables that can be called
* from outside the library itself.
*/
class TargetApi extends Callable {
TargetApi() {
this.isPublic() and
this.fromSource() and
(
this.getDeclaringType().isPublic() or
superImpl(this).getDeclaringType().isPublic()
) and
isRelevantForModels(this)
}
}
private string isExtensible(RefType ref) {
if ref.isFinal() then result = "false" else result = "true"
}
private string typeAsModel(RefType type) {
result = type.getCompilationUnit().getPackage().getName() + ";" + type.nestedName()
}
private RefType bestTypeForModel(TargetApi api) {
if exists(superImpl(api))
then superImpl(api).fromSource() and result = superImpl(api).getDeclaringType()
else result = api.getDeclaringType()
}
/**
* Returns the appropriate type name for the model. Either the type
* declaring the method or the supertype introducing the method.
*/
private string typeAsSummaryModel(TargetApi api) { result = typeAsModel(bestTypeForModel(api)) }
/**
* Computes the first 6 columns for CSV rows.
*/
string asPartialModel(TargetApi api) {
result =
typeAsSummaryModel(api) + ";" //
+ isExtensible(bestTypeForModel(api)) + ";" //
+ api.getName() + ";" //
+ paramsString(api) + ";" //
+ /* ext + */ ";" //
}
private predicate isPrimitiveTypeUsedForBulkData(Type t) {
t.getName().regexpMatch("byte|char|Byte|Character")
}
/**
* Holds for type `t` for fields that are relevant as an intermediate
* read or write step in the data flow analysis.
*/
predicate isRelevantType(Type t) {
not t instanceof TypeClass and
not t instanceof EnumType and
not t instanceof PrimitiveType and
not t instanceof BoxedType and
not t.(RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(Array).getElementType() instanceof PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
) and
(
not t.(Array).getElementType() instanceof BoxedType or
isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
) and
(
not t.(CollectionType).getElementType() instanceof BoxedType or
isPrimitiveTypeUsedForBulkData(t.(CollectionType).getElementType())
)
}
private string parameterAccess(Parameter p) {
if
p.getType() instanceof Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
/**
* Gets the model string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.asParameter())
or
result = "Argument[-1]" and p instanceof DataFlow::InstanceParameterNode
}
/**
* Gets the model string represention of the the return node `node`.
*/
string returnNodeAsOutput(ReturnNodeExt node) {
if node.getKind() instanceof ValueReturnKind
then result = "ReturnValue"
else
exists(int pos | pos = node.getKind().(ParamUpdateReturnKind).getPosition() |
result = parameterAccess(node.getEnclosingCallable().getParameter(pos))
or
result = "Argument[-1]" and pos = -1
)
}