From 666855dbd79a49396ba4fc760b5838c659b65fbd Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 26 Nov 2025 10:33:52 +0100 Subject: [PATCH] Shared: Improvements to content-sensitive model generation --- .../test/utils-tests/modelgenerator/option.rs | 8 +++---- .../dataflow/internal/ContentDataFlowImpl.qll | 15 +++++++++++- .../internal/ModelGeneratorImpl.qll | 23 ++++++++++--------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/rust/ql/test/utils-tests/modelgenerator/option.rs b/rust/ql/test/utils-tests/modelgenerator/option.rs index 145b6e34592..fd5cd649c2c 100644 --- a/rust/ql/test/utils-tests/modelgenerator/option.rs +++ b/rust/ql/test/utils-tests/modelgenerator/option.rs @@ -299,8 +299,7 @@ impl MyOption { } // summary=::insert;Argument[0];Argument[self].Reference.Field[test::option::MyOption::MySome(0)];value;dfc-generated - // This summary is currently missing because of access path limit - // summary-MISSING=::insert;Argument[0];ReturnValue.Reference;value;dfc-generated + // summary=::insert;Argument[0];ReturnValue.Reference;value;dfc-generated // The content of `self` is overwritten so it does not flow to the return value. // SPURIOUS-summary=::insert;Argument[self].Reference.Field[test::option::MyOption::MySome(0)];ReturnValue.Reference;value;dfc-generated pub fn insert(&mut self, value: T) -> &mut T { @@ -311,8 +310,7 @@ impl MyOption { } // summary=::get_or_insert;Argument[0];Argument[self].Reference.Field[test::option::MyOption::MySome(0)];value;dfc-generated - // This summary is currently missing because of access path limit - // summary-MISSING=::get_or_insert;Argument[0];ReturnValue.Reference;value;dfc-generated + // summary=::get_or_insert;Argument[0];ReturnValue.Reference;value;dfc-generated // summary=::get_or_insert;Argument[self].Reference.Field[test::option::MyOption::MySome(0)];ReturnValue.Reference;value;dfc-generated pub fn get_or_insert(&mut self, value: T) -> &mut T { self.get_or_insert_with(|| value) @@ -328,7 +326,7 @@ impl MyOption { // summary=::get_or_insert_with;Argument[self].Reference.Field[test::option::MyOption::MySome(0)];ReturnValue.Reference;value;dfc-generated // summary=::get_or_insert_with;Argument[0].ReturnValue;Argument[self].Reference.Field[test::option::MyOption::MySome(0)];value;dfc-generated - // SPURIOUS-summary=::get_or_insert_with;Argument[0];Argument[self].Reference.Field[test::option::MyOption::MySome(0)];value;dfc-generated + // summary=::get_or_insert_with;Argument[0].ReturnValue;ReturnValue.Reference;value;dfc-generated pub fn get_or_insert_with(&mut self, f: F) -> &mut T where F: FnOnce() -> T, diff --git a/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll index baf473efff1..0c84d33244c 100644 --- a/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll @@ -75,6 +75,9 @@ module MakeImplContentDataFlow Lang> { /** Gets a limit on the number of reads out of sources and number of stores into sinks. */ default int accessPathLimit() { result = Lang::accessPathLimit() } + /** Gets the access path limit used in the internal invocation of the standard data flow library. */ + default int accessPathLimitInternal() { result = Lang::accessPathLimit() } + /** Holds if `c` is relevant for reads out of sources or stores into sinks. */ default predicate isRelevantContent(ContentSet c) { any() } } @@ -110,7 +113,7 @@ module MakeImplContentDataFlow Lang> { FlowFeature getAFeature() { result = ContentConfig::getAFeature() } - predicate accessPathLimit = ContentConfig::accessPathLimit/0; + predicate accessPathLimit = ContentConfig::accessPathLimitInternal/0; // needed to record reads/stores inside summarized callables predicate includeHiddenNodes() { any() } @@ -274,6 +277,16 @@ module MakeImplContentDataFlow Lang> { ) } + /** + * Gets the length of this access path. + */ + int length() { + this = TAccessPathNil() and + result = 0 + or + result = this.getTail().length() + 1 + } + /** * Gets the content set at index `i` in this access path, if any. */ diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll index 51dafc2cc96..7f61315c125 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -703,14 +703,17 @@ module MakeModelGeneratorFactory< } /** - * Holds if the access path `ap` is not a parameter or returnvalue of a callback - * stored in a field. + * Holds if `ap` is valid for generating summary models. * - * That is, we currently don't include summaries that rely on parameters or return values - * of callbacks stored in fields. + * We currently don't include summaries that rely on parameters or return values + * of callbacks stored in fields, as those are not supported by the data flow + * library. + * + * We also exclude access paths with contents not supported by `printContent`. */ private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { - not (mentionsField(ap) and mentionsCallback(ap)) + not (mentionsField(ap) and mentionsCallback(ap)) and + forall(int i | i in [0 .. ap.length() - 1] | exists(getContent(ap, i))) } private predicate apiFlow( @@ -720,7 +723,9 @@ module MakeModelGeneratorFactory< ) { PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api + getEnclosingCallable(p) = api and + validateAccessPath(reads) and + validateAccessPath(stores) } /** @@ -763,9 +768,7 @@ module MakeModelGeneratorFactory< PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue ) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api and + apiFlow(api, p, reads, returnNodeExt, stores, preservesValue) and p = api.getARelevantParameterNode() } @@ -956,8 +959,6 @@ module MakeModelGeneratorFactory< input = parameterNodeAsExactInput(p) + printReadAccessPath(reads) and output = getExactOutput(returnNodeExt) + printStoreAccessPath(stores) and input != output and - validateAccessPath(reads) and - validateAccessPath(stores) and ( if mentionsField(reads) or mentionsField(stores) then lift = false and api.isRelevant()