diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll index ba50b34d7dd..d6ba48d77cb 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll @@ -33,4 +33,10 @@ newtype TNode = TExceptionalInvocationReturnNode(InvokeExpr e) or TGlobalAccessPathRoot() or TTemplatePlaceholderTag(Templating::TemplatePlaceholderTag tag) or - TReflectiveParametersNode(Function f) + TReflectiveParametersNode(Function f) or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) + } diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index d9b9d7e18e2..39630269a33 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -79,6 +79,7 @@ taintFlow | test.js:269:10:269:31 | this.ba ... ource() | test.js:269:10:269:31 | this.ba ... ource() | | test.js:272:6:272:40 | new MyS ... ource() | test.js:272:6:272:40 | new MyS ... ource() | | test.js:274:6:274:39 | testlib ... eName() | test.js:274:6:274:39 | testlib ... eName() | +| test.js:277:8:277:31 | "danger ... .danger | test.js:277:8:277:31 | "danger ... .danger | isSink | test.js:54:18:54:25 | source() | test-sink | | test.js:55:22:55:29 | source() | test-sink | diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ext.yml b/javascript/ql/test/library-tests/frameworks/data/test.ext.yml index 8aec6766913..eed5c054fbf 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ext.yml +++ b/javascript/ql/test/library-tests/frameworks/data/test.ext.yml @@ -11,6 +11,7 @@ extensions: - ['testlib', 'Member[ParamDecoratorSource].DecoratedParameter', 'test-source'] - ['testlib', 'Member[getSource].ReturnValue', 'test-source'] - ['(testlib)', 'Member[parenthesizedPackageName].ReturnValue', 'test-source'] + - ['danger-constant', 'Member[danger]', 'test-source'] - addsTo: pack: codeql/javascript-all diff --git a/javascript/ql/test/library-tests/frameworks/data/test.js b/javascript/ql/test/library-tests/frameworks/data/test.js index 97bb49f8cf7..71fd64e10f6 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.js +++ b/javascript/ql/test/library-tests/frameworks/data/test.js @@ -272,3 +272,9 @@ class MySubclass2 extends MySubclass { sink(new MySubclass2().baseclassSource()); // NOT OK sink(testlib.parenthesizedPackageName()); // NOT OK + +function dangerConstant() { + sink("danger-constant".danger); // NOT OK + sink("danger-constant".safe); // OK + sink("danger-constant"); // OK +} diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index 24af57cb14d..cca38c28642 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -2,6 +2,15 @@ import javascript import testUtilities.ConsistencyChecking import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels +class TypeModelFromCodeQL extends ModelInput::TypeModel { + override predicate isTypeUsed(string type) { type = "danger-constant" } + + override DataFlow::Node getASource(string type) { + type = "danger-constant" and + result.getStringValue() = "danger-constant" + } +} + class BasicTaintTracking extends TaintTracking::Configuration { BasicTaintTracking() { this = "BasicTaintTracking" } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 4192241d619..a3075dc4be3 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -9,6 +9,7 @@ import Attributes import LocalSources private import semmle.python.essa.SsaCompute private import semmle.python.dataflow.new.internal.ImportStar +private import semmle.python.frameworks.data.ModelsAsData private import FlowSummaryImpl as FlowSummaryImpl private import semmle.python.frameworks.data.ModelsAsData @@ -125,6 +126,13 @@ newtype TNode = f = any(VariableCapture::CapturedVariable v).getACapturingScope() and // TODO: Remove this restriction when adding proper support for captured variables in the body of the function we generate for comprehensions exists(TFunction(f)) + } or + /** An empty, unused node type that exists to prevent unwanted dependencies on data flow nodes. */ + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) } private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index b5e412e9d07..edab51da858 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -588,7 +588,13 @@ private module Cached { n in [-1 .. 10] and splatPos = unique(int i | splatArgumentAt(c, i) and i > 0) } or - TCaptureNode(VariableCapture::Flow::SynthesizedCaptureNode cn) + TCaptureNode(VariableCapture::Flow::SynthesizedCaptureNode cn) or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) + } class TSelfParameterNode = TSelfMethodParameterNode or TSelfToplevelParameterNode; diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or