Merge pull request #17742 from michaelnebel/csharp/higherordermodels

C#: Models for higher order methods.
This commit is contained in:
Michael Nebel
2024-11-07 11:12:46 +01:00
committed by GitHub
6 changed files with 203 additions and 10 deletions

View File

@@ -13,6 +13,7 @@ private import semmle.code.csharp.Unification
private import semmle.code.csharp.controlflow.Guards
private import semmle.code.csharp.dispatch.Dispatch
private import semmle.code.csharp.frameworks.EntityFramework
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import semmle.code.csharp.frameworks.NHibernate
private import semmle.code.csharp.frameworks.Razor
private import semmle.code.csharp.frameworks.system.Collections
@@ -1146,7 +1147,11 @@ private module Cached {
TPrimaryConstructorParameterContent(Parameter p) {
p.getCallable() instanceof PrimaryConstructor
} or
TCapturedVariableContent(VariableCapture::CapturedVariable v)
TCapturedVariableContent(VariableCapture::CapturedVariable v) or
TDelegateCallArgumentContent(int i) {
i = [0 .. max(any(DelegateLikeCall dc).getNumberOfArguments()) - 1]
} or
TDelegateCallReturnContent()
cached
newtype TContentSet =
@@ -1162,7 +1167,9 @@ private module Cached {
TPrimaryConstructorParameterApproxContent(string firstChar) {
firstChar = approximatePrimaryConstructorParameterContent(_)
} or
TCapturedVariableContentApprox(VariableCapture::CapturedVariable v)
TCapturedVariableContentApprox(VariableCapture::CapturedVariable v) or
TDelegateCallArgumentApproxContent() or
TDelegateCallReturnApproxContent()
pragma[nomagic]
private predicate commonSubTypeGeneral(DataFlowTypeOrUnifiable t1, RelevantGvnType t2) {
@@ -2273,6 +2280,21 @@ private predicate recordProperty(RecordType t, ContentSet c, string name) {
)
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to
* the content set `c` of a delegate call.
*
* If there is a delegate call f(x), then we store "x" on "f"
* using a delegate argument content set.
*/
private predicate storeStepDelegateCall(ExplicitArgumentNode node1, ContentSet c, Node node2) {
exists(ExplicitDelegateLikeDataFlowCall call, int i |
node1.argumentOf(call, TPositionalArgumentPosition(i)) and
lambdaCall(call, _, node2.(PostUpdateNode).getPreUpdateNode()) and
c.isDelegateCallArgument(i)
)
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to
* content `c`.
@@ -2305,6 +2327,8 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
or
storeStepDelegateCall(node1, c, node2)
}
private class ReadStepConfiguration extends ControlFlowReachabilityConfiguration {
@@ -2425,6 +2449,21 @@ private predicate readContentStep(Node node1, Content c, Node node2) {
VariableCapture::readStep(node1, c, node2)
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to
* the content set `c` of a delegate call.
*
* If there is a delegate call f(x), then we read the return of the delegate
* call.
*/
private predicate readStepDelegateCall(Node node1, ContentSet c, OutNode node2) {
exists(ExplicitDelegateLikeDataFlowCall call |
lambdaCall(call, _, node1) and
node2.getCall(TNormalReturnKind()) = call and
c.isDelegateCallReturn()
)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of content `c`.
*/
@@ -2443,6 +2482,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
or
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
or
readStepDelegateCall(node1, c, node2)
}
private predicate clearsCont(Node n, Content c) {
@@ -3037,6 +3078,12 @@ class ContentApprox extends TContentApprox {
exists(VariableCapture::CapturedVariable v |
this = TCapturedVariableContentApprox(v) and result = "captured " + v
)
or
this = TDelegateCallArgumentApproxContent() and
result = "approximated delegate call argument"
or
this = TDelegateCallReturnApproxContent() and
result = "approximated delegate call return"
}
}
@@ -3073,6 +3120,12 @@ ContentApprox getContentApprox(Content c) {
TPrimaryConstructorParameterApproxContent(approximatePrimaryConstructorParameterContent(c))
or
result = TCapturedVariableContentApprox(VariableCapture::getCapturedVariableContent(c))
or
c instanceof DelegateCallArgumentContent and
result = TDelegateCallArgumentApproxContent()
or
c instanceof DelegateCallReturnContent and
result = TDelegateCallReturnApproxContent()
}
/**

View File

@@ -3,6 +3,7 @@ private import DataFlowDispatch
private import DataFlowPrivate
private import semmle.code.csharp.controlflow.Guards
private import semmle.code.csharp.Unification
private import semmle.code.csharp.frameworks.system.linq.Expressions
/**
* An element, viewed as a node in a data flow graph. Either an expression
@@ -238,6 +239,30 @@ class PropertyContent extends Content, TPropertyContent {
override Location getLocation() { result = p.getLocation() }
}
/**
* A reference to the index of an argument of a delegate call.
*/
class DelegateCallArgumentContent extends Content, TDelegateCallArgumentContent {
private int i;
DelegateCallArgumentContent() { this = TDelegateCallArgumentContent(i) }
override string toString() { result = "delegate argument at position " + i }
override Location getLocation() { result instanceof EmptyLocation }
}
/**
* A reference to the return of a delegate call.
*/
class DelegateCallReturnContent extends Content, TDelegateCallReturnContent {
DelegateCallReturnContent() { this = TDelegateCallReturnContent() }
override string toString() { result = "delegate return" }
override Location getLocation() { result instanceof EmptyLocation }
}
/**
* A reference to a synthetic field corresponding to a
* primary constructor parameter.
@@ -299,6 +324,16 @@ class ContentSet extends TContentSet {
*/
predicate isProperty(Property p) { this = TPropertyContentSet(p) }
/**
* Holds if this content set represents the `i`th argument of a delegate call.
*/
predicate isDelegateCallArgument(int i) { this.isSingleton(TDelegateCallArgumentContent(i)) }
/**
* Holds if this content set represents the return of a delegate call.
*/
predicate isDelegateCallReturn() { this.isSingleton(TDelegateCallReturnContent()) }
/** Holds if this content set represents the field `f`. */
predicate isField(Field f) { this.isSingleton(TFieldContent(f)) }

View File

@@ -101,7 +101,9 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel())
}
predicate isUninterestingForDataFlowModels(Callable api) { isHigherOrder(api) }
predicate isUninterestingForDataFlowModels(Callable api) { none() }
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) }
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
@@ -174,8 +176,15 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
* Gets the underlying type of the content `c`.
*/
private CS::Type getUnderlyingContType(DataFlow::Content c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::FieldContent).getField().getType()
or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
or
// Use System.Object as the type of delegate arguments and returns as the content doesn't
// contain any type information.
c instanceof DataFlow::DelegateCallArgumentContent and result instanceof ObjectType
or
c instanceof DataFlow::DelegateCallReturnContent and result instanceof ObjectType
}
Type getUnderlyingContentType(DataFlow::ContentSet c) {
@@ -309,6 +318,10 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
c.isField(_) or c.isSyntheticField(_) or c.isProperty(_)
}
predicate isCallback(DataFlow::ContentSet c) {
c.isDelegateCallArgument(_) or c.isDelegateCallReturn()
}
string getSyntheticName(DataFlow::ContentSet c) {
exists(CS::Field f |
not f.isEffectivelyPublic() and
@@ -342,6 +355,10 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
or
c.isElement() and
result = "Element"
or
exists(int i | c.isDelegateCallArgument(i) and result = "Parameter[" + i + "]")
or
c.isDelegateCallReturn() and result = "ReturnValue"
}
predicate partialModel = ExternalFlow::partialModel/6;

View File

@@ -62,6 +62,15 @@ public class BasicFlow
{
return tainted;
}
public Func<object, object> MyFunction;
// summary=Models;BasicFlow;false;ApplyMyFunction;(System.Object);;Argument[0];Argument[this];taint;df-generated
// summary=Models;BasicFlow;false;ApplyMyFunction;(System.Object);;Argument[this];ReturnValue;taint;df-generated
// No content based flow as MaD doesn't support callback logic in fields and properties.
public object ApplyMyFunction(object o)
{
return MyFunction(o);
}
}
public class CollectionFlow
@@ -497,18 +506,55 @@ public class SimpleTypes
}
}
// No models as higher order methods are excluded
// from model generation.
// Methods in this class are "neutral" with respect to the heuristic model generation, but
// the content based model generation is able to produce flow summaries for them.
public class HigherOrderParameters
{
// neutral=Models;HigherOrderParameters;M1;(System.String,System.Func<System.String,System.String>);summary;df-generated
// contentbased-summary=Models;HigherOrderParameters;false;M1;(System.String,System.Func<System.String,System.String>);;Argument[0];ReturnValue;value;dfc-generated
public string M1(string s, Func<string, string> map)
{
return s;
}
public object M2(Func<object, object> map, object o)
// neutral=Models;HigherOrderParameters;Apply;(System.Func<System.Object,System.Object>,System.Object);summary;df-generated
// contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Func<System.Object,System.Object>,System.Object);;Argument[1];Argument[0].Parameter[0];value;dfc-generated
// contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Func<System.Object,System.Object>,System.Object);;Argument[0].ReturnValue;ReturnValue;value;dfc-generated
public object Apply(Func<object, object> f, object o)
{
return map(o);
return f(o);
}
// neutral=Models;HigherOrderParameters;Apply2;(System.Object,System.Func<System.Object,System.Object,System.Object>);summary;df-generated
// contentbased-summary=Models;HigherOrderParameters;false;Apply2;(System.Object,System.Func<System.Object,System.Object,System.Object>);;Argument[0];Argument[1].Parameter[1];value;dfc-generated
// contentbased-summary=Models;HigherOrderParameters;false;Apply2;(System.Object,System.Func<System.Object,System.Object,System.Object>);;Argument[1].ReturnValue;ReturnValue;value;dfc-generated
public object Apply2(object o, Func<object, object, object> f)
{
var x = f(null, o);
return x;
}
// neutral=Models;HigherOrderParameters;Apply;(System.Action<System.Object>,System.Object);summary;df-generated
// contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Action<System.Object>,System.Object);;Argument[1];Argument[0].Parameter[0];value;dfc-generated
public void Apply(Action<object> a, object o)
{
a(o);
}
}
public static class HigherOrderExtensionMethods
{
// neutral=Models;HigherOrderExtensionMethods;Select<TSource,TResult>;(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>);summary;df-generated
// contentbased-summary=Models;HigherOrderExtensionMethods;false;Select<TSource,TResult>;(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>);;Argument[0].Element;Argument[1].Parameter[0];value;dfc-generated
// contentbased-summary=Models;HigherOrderExtensionMethods;false;Select<TSource,TResult>;(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>);;Argument[1].ReturnValue;ReturnValue.Element;value;dfc-generated
public static IEnumerable<TResult> Select<TSource, TResult>(
this IEnumerable<TSource> source,
Func<TSource, TResult> selector)
{
foreach (var item in source)
{
yield return selector(item);
}
}
}

View File

@@ -88,6 +88,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, JavaDataF
api.getDeclaringType() instanceof J::Interface and not exists(api.getBody())
}
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() }
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
@@ -252,6 +254,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, JavaDataF
c instanceof DataFlowUtil::SyntheticFieldContent
}
predicate isCallback(DataFlow::ContentSet c) { none() }
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and

View File

@@ -206,6 +206,11 @@ signature module ModelGeneratorInputSig<LocationSig Location, InputSig<Location>
*/
predicate isField(Lang::ContentSet c);
/**
* Holds if the content set `c` is callback like.
*/
predicate isCallback(Lang::ContentSet c);
/**
* Gets the MaD synthetic name string representation for the content set `c`, if any.
*/
@@ -223,6 +228,15 @@ signature module ModelGeneratorInputSig<LocationSig Location, InputSig<Location>
*/
predicate isUninterestingForDataFlowModels(Callable api);
/**
* Holds if it is irrelevant to generate models for `api` based on the heuristic
* (non-content) flow analysis.
*
* This serves as an extra filter for the `relevant`
* and `isUninterestingForDataFlowModels` predicates.
*/
predicate isUninterestingForHeuristicDataFlowModels(Callable api);
/**
* Holds if `namespace`, `type`, `extensible`, `name` and `parameters` are string representations
* for the corresponding MaD columns for `api`.
@@ -300,7 +314,7 @@ module MakeModelGenerator<
}
}
string getOutput(ReturnNodeExt node) {
private string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
@@ -432,7 +446,11 @@ module MakeModelGenerator<
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi and
exists(Callable c |
c = source.(NodeExtended).getEnclosingCallable() and
c instanceof DataFlowSummaryTargetApi and
not isUninterestingForHeuristicDataFlowModels(c)
) and
state.(TaintRead).getStep() = 0
}
@@ -605,6 +623,24 @@ module MakeModelGenerator<
isField(ap.getAtIndex(_))
}
/**
* Holds if this access path `ap` mentions a callback.
*/
private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) {
isCallback(ap.getAtIndex(_))
}
/**
* Holds if the access path `ap` is not a parameter or returnvalue of a callback
* stored in a field.
*
* That is, we currently don't include summaries that rely on parameters or return values
* of callbacks stored in fields.
*/
private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) {
not (mentionsField(ap) and mentionsCallback(ap))
}
private predicate apiFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
@@ -846,6 +882,8 @@ module MakeModelGenerator<
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
validateAccessPath(reads) and
validateAccessPath(stores) and
(
if mentionsField(reads) or mentionsField(stores)
then lift = false and api.isRelevant()