From 43abc727807c049f50505cf44fc84a3e017c0a64 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 1 Sep 2023 13:44:09 +0200 Subject: [PATCH 1/7] JS: Add TypeModel.isTypeUsed f --- .../dataflow/internal/DataFlowNode.qll | 9 ++++++++- .../frameworks/data/internal/ApiGraphModels.qll | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll index ba50b34d7dd..59d6049c8fc 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll @@ -5,6 +5,7 @@ */ private import javascript +private import semmle.javascript.frameworks.data.ModelsAsData /** * The raw data type underlying `DataFlow::Node`. @@ -33,4 +34,10 @@ newtype TNode = TExceptionalInvocationReturnNode(InvokeExpr e) or TGlobalAccessPathRoot() or TTemplatePlaceholderTag(Templating::TemplatePlaceholderTag tag) or - TReflectiveParametersNode(Function f) + TReflectiveParametersNode(Function f) or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) + } diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or From 632cce2c16dda71f3cde7e816957947b64e60420 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 1 Sep 2023 13:53:51 +0200 Subject: [PATCH 2/7] JS: Add failing test due to overpruning --- .../test/library-tests/frameworks/data/test.expected | 1 + .../ql/test/library-tests/frameworks/data/test.js | 6 ++++++ .../ql/test/library-tests/frameworks/data/test.ql | 11 +++++++++++ 3 files changed, 18 insertions(+) diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index d9b9d7e18e2..b1ee96edcc5 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -1,4 +1,5 @@ consistencyIssue +| library-tests/frameworks/data/test.js:261 | expected an alert, but found none | NOT OK | | taintFlow | paramDecorator.ts:6:54:6:54 | x | paramDecorator.ts:7:10:7:10 | x | | test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | diff --git a/javascript/ql/test/library-tests/frameworks/data/test.js b/javascript/ql/test/library-tests/frameworks/data/test.js index 97bb49f8cf7..71fd64e10f6 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.js +++ b/javascript/ql/test/library-tests/frameworks/data/test.js @@ -272,3 +272,9 @@ class MySubclass2 extends MySubclass { sink(new MySubclass2().baseclassSource()); // NOT OK sink(testlib.parenthesizedPackageName()); // NOT OK + +function dangerConstant() { + sink("danger-constant".danger); // NOT OK + sink("danger-constant".safe); // OK + sink("danger-constant"); // OK +} diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index 24af57cb14d..6fcd64f1ef8 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -2,6 +2,17 @@ import javascript import testUtilities.ConsistencyChecking import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels +class TypeModelFromCodeQL extends ModelInput::TypeModel { + override DataFlow::Node getASource(string type) { + type = "danger-constant" and + result.getStringValue() = "danger-constant" + } +} + +class SourceFromDangerConstant extends ModelInput::SourceModelCsv { + override predicate row(string row) { row = "danger-constant;Member[danger];test-source" } +} + class BasicTaintTracking extends TaintTracking::Configuration { BasicTaintTracking() { this = "BasicTaintTracking" } From 6f19fc2fcdddbba42e844fb162d628085465797e Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 1 Sep 2023 13:54:47 +0200 Subject: [PATCH 3/7] JS: Add isTypeUsed to avoid overpruning --- .../ql/test/library-tests/frameworks/data/test.expected | 2 +- .../ql/test/library-tests/frameworks/data/test.ext.yml | 1 + javascript/ql/test/library-tests/frameworks/data/test.ql | 6 ++---- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index b1ee96edcc5..39630269a33 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -1,5 +1,4 @@ consistencyIssue -| library-tests/frameworks/data/test.js:261 | expected an alert, but found none | NOT OK | | taintFlow | paramDecorator.ts:6:54:6:54 | x | paramDecorator.ts:7:10:7:10 | x | | test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | @@ -80,6 +79,7 @@ taintFlow | test.js:269:10:269:31 | this.ba ... ource() | test.js:269:10:269:31 | this.ba ... ource() | | test.js:272:6:272:40 | new MyS ... ource() | test.js:272:6:272:40 | new MyS ... ource() | | test.js:274:6:274:39 | testlib ... eName() | test.js:274:6:274:39 | testlib ... eName() | +| test.js:277:8:277:31 | "danger ... .danger | test.js:277:8:277:31 | "danger ... .danger | isSink | test.js:54:18:54:25 | source() | test-sink | | test.js:55:22:55:29 | source() | test-sink | diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ext.yml b/javascript/ql/test/library-tests/frameworks/data/test.ext.yml index 8aec6766913..eed5c054fbf 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ext.yml +++ b/javascript/ql/test/library-tests/frameworks/data/test.ext.yml @@ -11,6 +11,7 @@ extensions: - ['testlib', 'Member[ParamDecoratorSource].DecoratedParameter', 'test-source'] - ['testlib', 'Member[getSource].ReturnValue', 'test-source'] - ['(testlib)', 'Member[parenthesizedPackageName].ReturnValue', 'test-source'] + - ['danger-constant', 'Member[danger]', 'test-source'] - addsTo: pack: codeql/javascript-all diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index 6fcd64f1ef8..cca38c28642 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -3,16 +3,14 @@ import testUtilities.ConsistencyChecking import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels class TypeModelFromCodeQL extends ModelInput::TypeModel { + override predicate isTypeUsed(string type) { type = "danger-constant" } + override DataFlow::Node getASource(string type) { type = "danger-constant" and result.getStringValue() = "danger-constant" } } -class SourceFromDangerConstant extends ModelInput::SourceModelCsv { - override predicate row(string row) { row = "danger-constant;Member[danger];test-source" } -} - class BasicTaintTracking extends TaintTracking::Configuration { BasicTaintTracking() { this = "BasicTaintTracking" } From 14c71a351e61b78be9f323626ceea26e5145b0c0 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 1 Sep 2023 14:00:28 +0200 Subject: [PATCH 4/7] Sync shared files --- .../frameworks/data/internal/ApiGraphModels.qll | 17 +++++++++++++++++ .../frameworks/data/internal/ApiGraphModels.qll | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index b850a4acea0..7215bd71550 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -168,9 +168,20 @@ module ModelInput { * A unit class for adding additional type model rows from CodeQL models. */ class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + /** * Gets a data-flow node that is a source of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the source. */ @@ -180,6 +191,8 @@ module ModelInput { * Gets a data-flow node that is a sink of the given `type`, * usually because it is an argument passed to a parameter of that type. * + * Note that `type` should also be included in `isTypeUsed`. + * * This must not depend on API graphs, but ensures that an API node is generated for * the sink. */ @@ -188,6 +201,8 @@ module ModelInput { /** * Gets an API node that is a source or sink of the given `type`. * + * Note that `type` should also be included in `isTypeUsed`. + * * Unlike `getASource` and `getASink`, this may depend on API graphs. */ API::Node getAnApiNode(string type) { none() } @@ -367,6 +382,8 @@ predicate isRelevantType(string type) { ( Specific::isTypeUsed(type) or + any(TypeModel model).isTypeUsed(type) + or exists(TestAllModels t) ) or From 13d01f1ec4c8168c7a3220be4455683bdcc4b3e3 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 1 Sep 2023 14:00:38 +0200 Subject: [PATCH 5/7] Ruby/Python: add recursion guard --- .../python/dataflow/new/internal/DataFlowPublic.qll | 7 +++++++ .../lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 4192241d619..8af34b4a4c0 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -9,6 +9,7 @@ import Attributes import LocalSources private import semmle.python.essa.SsaCompute private import semmle.python.dataflow.new.internal.ImportStar +private import semmle.python.frameworks.data.ModelsAsData private import FlowSummaryImpl as FlowSummaryImpl private import semmle.python.frameworks.data.ModelsAsData @@ -125,6 +126,12 @@ newtype TNode = f = any(VariableCapture::CapturedVariable v).getACapturingScope() and // TODO: Remove this restriction when adding proper support for captured variables in the body of the function we generate for comprehensions exists(TFunction(f)) + } or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) } private import semmle.python.internal.CachedStages diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index b50d04ed9f4..a27f30e00e7 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -588,7 +588,13 @@ private module Cached { n in [-1 .. 10] and splatPos = unique(int i | splatArgumentAt(c, i) and i > 0) } or - TCaptureNode(VariableCapture::Flow::SynthesizedCaptureNode cn) + TCaptureNode(VariableCapture::Flow::SynthesizedCaptureNode cn) or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) + } class TSelfParameterNode = TSelfMethodParameterNode or TSelfToplevelParameterNode; From 3b211089d6f96737bb9d84c53418e9b2811def5c Mon Sep 17 00:00:00 2001 From: Asger F Date: Wed, 6 Sep 2023 12:57:49 +0200 Subject: [PATCH 6/7] JS: Remove redundant import --- .../ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll | 1 - 1 file changed, 1 deletion(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll index 59d6049c8fc..d6ba48d77cb 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowNode.qll @@ -5,7 +5,6 @@ */ private import javascript -private import semmle.javascript.frameworks.data.ModelsAsData /** * The raw data type underlying `DataFlow::Node`. From 0b78d1d953d5147ff3eddd91d8b49857bf257b41 Mon Sep 17 00:00:00 2001 From: Asger F Date: Wed, 6 Sep 2023 12:59:01 +0200 Subject: [PATCH 7/7] Python: add qldoc --- .../lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 8af34b4a4c0..a3075dc4be3 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -127,6 +127,7 @@ newtype TNode = // TODO: Remove this restriction when adding proper support for captured variables in the body of the function we generate for comprehensions exists(TFunction(f)) } or + /** An empty, unused node type that exists to prevent unwanted dependencies on data flow nodes. */ TForbiddenRecursionGuard() { none() and // We want to prune irrelevant models before materialising data flow nodes, so types contributed