Merge branch 'main' into generate-more-value-preserving-summaries-2

This commit is contained in:
Mathias Vorreiter Pedersen
2025-05-02 10:51:11 +01:00
240 changed files with 3011 additions and 1935 deletions

View File

@@ -7,6 +7,7 @@
*/
import internal.CaptureModels
import SummaryModels
from DataFlowSummaryTargetApi api, string flow
where flow = ContentSensitive::captureFlow(api, _, _)

View File

@@ -7,6 +7,7 @@
*/
import internal.CaptureModels
import SummaryModels
from DataFlowSummaryTargetApi api, string noflow
where noflow = captureNeutral(api)

View File

@@ -7,6 +7,7 @@
*/
import internal.CaptureModels
import SinkModels
from DataFlowSinkTargetApi api, string sink
where sink = Heuristic::captureSink(api)

View File

@@ -7,6 +7,7 @@
*/
import internal.CaptureModels
import SourceModels
from DataFlowSourceTargetApi api, string source
where source = Heuristic::captureSource(api)

View File

@@ -7,6 +7,7 @@
*/
import internal.CaptureModels
import SummaryModels
from DataFlowSummaryTargetApi api, string flow
where flow = captureFlow(api, _)

View File

@@ -10,6 +10,7 @@
import csharp
import utils.modelgenerator.internal.CaptureModels
import SummaryModels
import PartialFlow::PartialPathGraph
int explorationLimit() { result = 3 }

View File

@@ -10,6 +10,7 @@
import csharp
import utils.modelgenerator.internal.CaptureModels
import SummaryModels
import Heuristic
import PropagateTaintFlow::PathGraph

View File

@@ -15,7 +15,41 @@ private import semmle.code.csharp.frameworks.System
private import semmle.code.csharp.Location
private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl
module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDataFlow> {
private predicate irrelevantAccessor(CS::Accessor a) {
a.getDeclaration().(CS::Property).isReadWrite()
}
private predicate isUninterestingForModels(Callable api) {
api.getDeclaringType().getNamespace().getFullName() = ""
or
api instanceof CS::ConversionOperator
or
api instanceof Util::MainMethod
or
api instanceof CS::Destructor
or
api instanceof CS::AnonymousFunctionExpr
or
api.(CS::Constructor).isParameterless()
or
exists(Type decl | decl = api.getDeclaringType() |
decl instanceof SystemObjectClass or
decl instanceof SystemValueTypeClass
)
or
// Disregard properties that have both a get and a set accessor,
// which implicitly means auto implemented properties.
irrelevantAccessor(api)
}
private predicate relevant(Callable api) {
[api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and
api.fromSource() and
api.isUnboundDeclaration() and
not isUninterestingForModels(api)
}
module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig<Location, CsharpDataFlow> {
class Type = CS::Type;
class Parameter = CS::Parameter;
@@ -24,127 +58,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
class NodeExtended = CS::DataFlow::Node;
Callable getAsExprEnclosingCallable(NodeExtended node) {
result = node.asExpr().getEnclosingCallable()
}
Callable getEnclosingCallable(NodeExtended node) { result = node.getEnclosingCallable() }
Parameter asParameter(NodeExtended node) { result = node.asParameter() }
/**
* Holds if any of the parameters of `api` are `System.Func<>`.
*/
private predicate isHigherOrder(Callable api) {
exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() |
t instanceof SystemLinqExpressions::DelegateExtType
)
}
private predicate irrelevantAccessor(CS::Accessor a) {
a.getDeclaration().(CS::Property).isReadWrite()
}
private predicate isUninterestingForModels(Callable api) {
api.getDeclaringType().getNamespace().getFullName() = ""
or
api instanceof CS::ConversionOperator
or
api instanceof Util::MainMethod
or
api instanceof CS::Destructor
or
api instanceof CS::AnonymousFunctionExpr
or
api.(CS::Constructor).isParameterless()
or
exists(Type decl | decl = api.getDeclaringType() |
decl instanceof SystemObjectClass or
decl instanceof SystemValueTypeClass
)
or
// Disregard properties that have both a get and a set accessor,
// which implicitly means auto implemented properties.
irrelevantAccessor(api)
}
private predicate relevant(Callable api) {
[api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and
api.fromSource() and
api.isUnboundDeclaration() and
not isUninterestingForModels(api)
}
private Callable getARelevantOverrideeOrImplementee(Overridable m) {
m.overridesOrImplements(result) and relevant(result)
}
/**
* Gets the super implementation of `api` if it is relevant.
* If such a super implementation does not exist, returns `api` if it is relevant.
*/
private Callable liftedImpl(Callable api) {
(
result = getARelevantOverrideeOrImplementee(api)
or
result = api and relevant(api)
) and
not exists(getARelevantOverrideeOrImplementee(result))
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel())
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel())
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel())
}
predicate isUninterestingForDataFlowModels(Callable api) { none() }
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) }
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() {
not hasManualSourceModel(this) and
// Do not generate source models for overridable callables
// as virtual dispatch implies that too many methods
// will be considered sources.
not this.(Overridable).overridesOrImplements(_)
}
}
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
Callable lift() { result = lift }
predicate isRelevant() {
relevant(this) and
not hasManualSummaryModel(this)
}
}
/**
* Holds if `t` is a type that is generally used for bulk data in collection types.
* Eg. char[] is roughly equivalent to string and thus a highly
@@ -205,6 +120,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
)
}
class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;
string qualifierString() { result = "Argument[this]" }
string parameterAccess(CS::Parameter p) {
@@ -215,8 +132,6 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;
private signature string parameterAccessSig(Parameter p);
private module ParamReturnNodeAsOutput<parameterAccessSig/1 getParamAccess> {
@@ -251,63 +166,92 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
node.asExpr() instanceof CS::ThisAccess
}
private predicate isRelevantMemberAccess(DataFlow::Node node) {
exists(CS::MemberAccess access | access = node.asExpr() |
access.hasThisQualifier() and
access.getTarget().isEffectivelyPublic() and
(
access instanceof CS::FieldAccess
or
access.getTarget().(CS::Property).getSetter().isPublic()
)
)
}
predicate sinkModelSanitizer(DataFlow::Node node) { none() }
predicate apiSource(DataFlow::Node source) {
isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode
}
private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) {
exists(DataFlowCall call |
dc1 = call.getEnclosingCallable() and
dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0)
)
}
bindingset[dc1, dc2]
private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) =
fastTC(uniquelyCalls/2)(dc1, dc2)
bindingset[sourceEnclosing, api]
predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) {
not exists(DataFlowCallable dc1, DataFlowCallable dc2 |
uniquelyCallsPlus(dc1, dc2) or dc1 = dc2
|
dc1.getUnderlyingCallable() = api and
dc2.getUnderlyingCallable() = sourceEnclosing
)
}
string getInputArgument(DataFlow::Node source) {
exists(int pos |
pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and
result = "Argument[" + pos + "]"
)
or
source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and
result = qualifierString()
}
bindingset[kind]
predicate isRelevantSinkKind(string kind) { any() }
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent(DataFlow::ContentSet c) { c.isElement() }
string partialModelRow(Callable api, int i) {
i = 0 and ExternalFlow::partialModel(api, result, _, _, _, _) // package
or
i = 1 and ExternalFlow::partialModel(api, _, result, _, _, _) // type
or
i = 2 and ExternalFlow::partialModel(api, _, _, result, _, _) // extensible
or
i = 3 and ExternalFlow::partialModel(api, _, _, _, result, _) // name
or
i = 4 and ExternalFlow::partialModel(api, _, _, _, _, result) // parameters
or
i = 5 and result = "" and exists(api) // ext
}
string partialNeutralModelRow(Callable api, int i) {
i = 0 and result = partialModelRow(api, 0) // package
or
i = 1 and result = partialModelRow(api, 1) // type
or
i = 2 and result = partialModelRow(api, 3) // name
or
i = 3 and result = partialModelRow(api, 4) // parameters
}
}
private import ModelGeneratorCommonInput
private import MakeModelGeneratorFactory<Location, CsharpDataFlow, CsharpTaintTracking, ModelGeneratorCommonInput>
module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig {
Callable getAsExprEnclosingCallable(NodeExtended node) {
result = node.asExpr().getEnclosingCallable()
}
Parameter asParameter(NodeExtended node) { result = node.asParameter() }
/**
* Holds if any of the parameters of `api` are `System.Func<>`.
*/
private predicate isHigherOrder(Callable api) {
exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() |
t instanceof SystemLinqExpressions::DelegateExtType
)
}
private Callable getARelevantOverrideeOrImplementee(Overridable m) {
m.overridesOrImplements(result) and relevant(result)
}
/**
* Gets the super implementation of `api` if it is relevant.
* If such a super implementation does not exist, returns `api` if it is relevant.
*/
private Callable liftedImpl(Callable api) {
(
result = getARelevantOverrideeOrImplementee(api)
or
result = api and relevant(api)
) and
not exists(getARelevantOverrideeOrImplementee(result))
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel())
}
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) }
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
Callable lift() { result = lift }
predicate isRelevant() {
relevant(this) and
not hasManualSummaryModel(this)
}
}
predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
not nodeTo.asExpr() instanceof CS::ElementAccess and
@@ -370,34 +314,88 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<Location, CsharpDat
or
c.isDelegateCallReturn() and result = "ReturnValue"
}
}
string partialModelRow(Callable api, int i) {
i = 0 and ExternalFlow::partialModel(api, result, _, _, _, _) // package
or
i = 1 and ExternalFlow::partialModel(api, _, result, _, _, _) // type
or
i = 2 and ExternalFlow::partialModel(api, _, _, result, _, _) // extensible
or
i = 3 and ExternalFlow::partialModel(api, _, _, _, result, _) // name
or
i = 4 and ExternalFlow::partialModel(api, _, _, _, _, result) // parameters
or
i = 5 and result = "" and exists(api) // ext
private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig {
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel())
}
string partialNeutralModelRow(Callable api, int i) {
i = 0 and result = partialModelRow(api, 0) // package
or
i = 1 and result = partialModelRow(api, 1) // type
or
i = 2 and result = partialModelRow(api, 3) // name
or
i = 3 and result = partialModelRow(api, 4) // parameters
class SourceTargetApi extends Callable {
SourceTargetApi() {
relevant(this) and
not hasManualSourceModel(this) and
// Do not generate source models for overridable callables
// as virtual dispatch implies that too many methods
// will be considered sources.
not this.(Overridable).overridesOrImplements(_)
}
}
private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) {
exists(DataFlowCall call |
dc1 = call.getEnclosingCallable() and
dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0)
)
}
bindingset[dc1, dc2]
private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) =
fastTC(uniquelyCalls/2)(dc1, dc2)
bindingset[sourceEnclosing, api]
predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) {
not exists(DataFlowCallable dc1, DataFlowCallable dc2 |
uniquelyCallsPlus(dc1, dc2) or dc1 = dc2
|
dc1.getUnderlyingCallable() = api and
dc2.getUnderlyingCallable() = sourceEnclosing
)
}
predicate sourceNode = ExternalFlow::sourceNode/2;
}
private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig {
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel())
}
class SinkTargetApi extends Callable {
SinkTargetApi() { relevant(this) and not hasManualSinkModel(this) }
}
private predicate isRelevantMemberAccess(DataFlow::Node node) {
exists(CS::MemberAccess access | access = node.asExpr() |
access.hasThisQualifier() and
access.getTarget().isEffectivelyPublic() and
(
access instanceof CS::FieldAccess
or
access.getTarget().(CS::Property).getSetter().isPublic()
)
)
}
predicate apiSource(DataFlow::Node source) {
isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode
}
string getInputArgument(DataFlow::Node source) {
exists(int pos |
pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and
result = "Argument[" + pos + "]"
)
or
source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and
result = qualifierString()
}
predicate sinkNode = ExternalFlow::sinkNode/2;
}
import MakeModelGenerator<Location, CsharpDataFlow, CsharpTaintTracking, ModelGeneratorInput>
import MakeSummaryModelGenerator<SummaryModelGeneratorInput> as SummaryModels
import MakeSourceModelGenerator<SourceModelGeneratorInput> as SourceModels
import MakeSinkModelGenerator<SinkModelGeneratorInput> as SinkModels

View File

@@ -1,6 +1,6 @@
private import csharp as CS
private import codeql.mad.modelgenerator.internal.ModelPrinting
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput
private module ModelPrintingLang implements ModelPrintingLangSig {
class Callable = CS::Callable;

View File

@@ -2,7 +2,8 @@ private import csharp
private import semmle.code.csharp.frameworks.system.collections.Generic as GenericCollections
private import semmle.code.csharp.dataflow.internal.DataFlowPrivate
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput
private import CaptureModels::SummaryModelGeneratorInput as SummaryModelGeneratorInput
private import CaptureModelsPrinting
/**
@@ -177,21 +178,19 @@ private predicate output(Callable callable, TypeParameter tp, string output) {
delegateSink(callable, tp, output)
}
private module ModelPrintingInput implements ModelPrintingSig {
private module ModelPrintingInput implements ModelPrintingSummarySig {
class SummaryApi = TypeBasedFlowTargetApi;
class SourceOrSinkApi = TypeBasedFlowTargetApi;
string getProvenance() { result = "tb-generated" }
}
private module Printing = ModelPrinting<ModelPrintingInput>;
private module Printing = ModelPrintingSummary<ModelPrintingInput>;
/**
* A class of callables that are relevant generating summaries for based
* on the Theorems for Free approach.
*/
class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi {
class TypeBasedFlowTargetApi extends SummaryModelGeneratorInput::SummaryTargetApi {
/**
* Gets the string representation of all type based summaries for `this`
* inspired by the Theorems for Free approach.